Borealis5b_90k / trainer_state.json
AlexWortega's picture
Upload checkpoint-90000
95fec73 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.8251875887243967,
"eval_steps": 1500,
"global_step": 90000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.001323276432446738,
"grad_norm": 48.5,
"learning_rate": 1.5560495395363608e-06,
"loss": 4.0059,
"step": 50
},
{
"epoch": 0.002646552864893476,
"grad_norm": 5.46875,
"learning_rate": 3.1438551921244835e-06,
"loss": 2.8343,
"step": 100
},
{
"epoch": 0.003969829297340214,
"grad_norm": 4.8125,
"learning_rate": 4.731660844712607e-06,
"loss": 2.5854,
"step": 150
},
{
"epoch": 0.005293105729786952,
"grad_norm": 6.1875,
"learning_rate": 6.31946649730073e-06,
"loss": 2.5068,
"step": 200
},
{
"epoch": 0.0066163821622336905,
"grad_norm": 8.25,
"learning_rate": 7.907272149888853e-06,
"loss": 2.4497,
"step": 250
},
{
"epoch": 0.007939658594680429,
"grad_norm": 6.90625,
"learning_rate": 9.495077802476977e-06,
"loss": 2.374,
"step": 300
},
{
"epoch": 0.009262935027127167,
"grad_norm": 7.15625,
"learning_rate": 1.1082883455065099e-05,
"loss": 2.3558,
"step": 350
},
{
"epoch": 0.010586211459573905,
"grad_norm": 14.25,
"learning_rate": 1.2670689107653221e-05,
"loss": 2.3181,
"step": 400
},
{
"epoch": 0.011909487892020643,
"grad_norm": 11.0625,
"learning_rate": 1.4258494760241345e-05,
"loss": 2.2872,
"step": 450
},
{
"epoch": 0.013232764324467381,
"grad_norm": 13.0,
"learning_rate": 1.584630041282947e-05,
"loss": 2.2729,
"step": 500
},
{
"epoch": 0.01455604075691412,
"grad_norm": 16.375,
"learning_rate": 1.743410606541759e-05,
"loss": 2.2402,
"step": 550
},
{
"epoch": 0.015879317189360857,
"grad_norm": 8.4375,
"learning_rate": 1.902191171800571e-05,
"loss": 2.196,
"step": 600
},
{
"epoch": 0.017202593621807597,
"grad_norm": 8.75,
"learning_rate": 2.0609717370593838e-05,
"loss": 2.1501,
"step": 650
},
{
"epoch": 0.018525870054254334,
"grad_norm": 7.5625,
"learning_rate": 2.2197523023181962e-05,
"loss": 2.1772,
"step": 700
},
{
"epoch": 0.019849146486701073,
"grad_norm": 7.96875,
"learning_rate": 2.3785328675770086e-05,
"loss": 2.1476,
"step": 750
},
{
"epoch": 0.02117242291914781,
"grad_norm": 7.78125,
"learning_rate": 2.5373134328358206e-05,
"loss": 2.1264,
"step": 800
},
{
"epoch": 0.02249569935159455,
"grad_norm": 10.4375,
"learning_rate": 2.696093998094633e-05,
"loss": 2.0611,
"step": 850
},
{
"epoch": 0.023818975784041286,
"grad_norm": 13.0625,
"learning_rate": 2.8548745633534454e-05,
"loss": 2.0682,
"step": 900
},
{
"epoch": 0.025142252216488026,
"grad_norm": 8.25,
"learning_rate": 3.0136551286122578e-05,
"loss": 2.0182,
"step": 950
},
{
"epoch": 0.026465528648934762,
"grad_norm": 8.75,
"learning_rate": 3.17243569387107e-05,
"loss": 1.9976,
"step": 1000
},
{
"epoch": 0.027788805081381502,
"grad_norm": 6.5625,
"learning_rate": 3.331216259129882e-05,
"loss": 1.9901,
"step": 1050
},
{
"epoch": 0.02911208151382824,
"grad_norm": 8.4375,
"learning_rate": 3.489996824388694e-05,
"loss": 1.9705,
"step": 1100
},
{
"epoch": 0.030435357946274978,
"grad_norm": 9.25,
"learning_rate": 3.6487773896475066e-05,
"loss": 1.9057,
"step": 1150
},
{
"epoch": 0.031758634378721715,
"grad_norm": 7.5,
"learning_rate": 3.807557954906319e-05,
"loss": 1.9104,
"step": 1200
},
{
"epoch": 0.03308191081116845,
"grad_norm": 12.6875,
"learning_rate": 3.9663385201651314e-05,
"loss": 1.8885,
"step": 1250
},
{
"epoch": 0.034405187243615194,
"grad_norm": 7.34375,
"learning_rate": 4.125119085423944e-05,
"loss": 1.785,
"step": 1300
},
{
"epoch": 0.03572846367606193,
"grad_norm": 7.21875,
"learning_rate": 4.283899650682756e-05,
"loss": 1.2491,
"step": 1350
},
{
"epoch": 0.03705174010850867,
"grad_norm": 4.03125,
"learning_rate": 4.442680215941568e-05,
"loss": 0.543,
"step": 1400
},
{
"epoch": 0.0383750165409554,
"grad_norm": 3.359375,
"learning_rate": 4.60146078120038e-05,
"loss": 0.3884,
"step": 1450
},
{
"epoch": 0.03969829297340215,
"grad_norm": 2.921875,
"learning_rate": 4.7602413464591926e-05,
"loss": 0.3543,
"step": 1500
},
{
"epoch": 0.03969829297340215,
"eval_cer": 0.08528059032633051,
"eval_loss": 0.4342532753944397,
"eval_runtime": 157.0758,
"eval_samples_per_second": 16.101,
"eval_steps_per_second": 0.127,
"eval_wer": 0.21550466744457408,
"step": 1500
},
{
"epoch": 0.04102156940584888,
"grad_norm": 2.6875,
"learning_rate": 4.919021911718005e-05,
"loss": 0.3145,
"step": 1550
},
{
"epoch": 0.04234484583829562,
"grad_norm": 2.734375,
"learning_rate": 5.0778024769768174e-05,
"loss": 0.3196,
"step": 1600
},
{
"epoch": 0.043668122270742356,
"grad_norm": 2.609375,
"learning_rate": 5.2365830422356304e-05,
"loss": 0.2973,
"step": 1650
},
{
"epoch": 0.0449913987031891,
"grad_norm": 2.0625,
"learning_rate": 5.395363607494443e-05,
"loss": 0.3009,
"step": 1700
},
{
"epoch": 0.046314675135635835,
"grad_norm": 2.0625,
"learning_rate": 5.554144172753255e-05,
"loss": 0.2681,
"step": 1750
},
{
"epoch": 0.04763795156808257,
"grad_norm": 1.9375,
"learning_rate": 5.712924738012067e-05,
"loss": 0.2769,
"step": 1800
},
{
"epoch": 0.04896122800052931,
"grad_norm": 12.625,
"learning_rate": 5.871705303270879e-05,
"loss": 0.2833,
"step": 1850
},
{
"epoch": 0.05028450443297605,
"grad_norm": 2.0,
"learning_rate": 6.0304858685296916e-05,
"loss": 0.2482,
"step": 1900
},
{
"epoch": 0.05160778086542279,
"grad_norm": 2.125,
"learning_rate": 6.189266433788503e-05,
"loss": 0.2641,
"step": 1950
},
{
"epoch": 0.052931057297869524,
"grad_norm": 1.609375,
"learning_rate": 6.348046999047316e-05,
"loss": 0.2513,
"step": 2000
},
{
"epoch": 0.05425433373031626,
"grad_norm": 1.859375,
"learning_rate": 6.506827564306128e-05,
"loss": 0.2599,
"step": 2050
},
{
"epoch": 0.055577610162763004,
"grad_norm": 1.6796875,
"learning_rate": 6.66560812956494e-05,
"loss": 0.2556,
"step": 2100
},
{
"epoch": 0.05690088659520974,
"grad_norm": 1.5390625,
"learning_rate": 6.824388694823753e-05,
"loss": 0.2412,
"step": 2150
},
{
"epoch": 0.05822416302765648,
"grad_norm": 1.6875,
"learning_rate": 6.983169260082565e-05,
"loss": 0.2564,
"step": 2200
},
{
"epoch": 0.05954743946010321,
"grad_norm": 1.6796875,
"learning_rate": 7.141949825341378e-05,
"loss": 0.2599,
"step": 2250
},
{
"epoch": 0.060870715892549956,
"grad_norm": 1.8515625,
"learning_rate": 7.30073039060019e-05,
"loss": 0.249,
"step": 2300
},
{
"epoch": 0.06219399232499669,
"grad_norm": 1.5,
"learning_rate": 7.459510955859002e-05,
"loss": 0.2474,
"step": 2350
},
{
"epoch": 0.06351726875744343,
"grad_norm": 1.5859375,
"learning_rate": 7.618291521117815e-05,
"loss": 0.2364,
"step": 2400
},
{
"epoch": 0.06484054518989017,
"grad_norm": 1.21875,
"learning_rate": 7.777072086376627e-05,
"loss": 0.2814,
"step": 2450
},
{
"epoch": 0.0661638216223369,
"grad_norm": 1.5859375,
"learning_rate": 7.93585265163544e-05,
"loss": 0.2427,
"step": 2500
},
{
"epoch": 0.06748709805478365,
"grad_norm": 1.6484375,
"learning_rate": 8.094633216894252e-05,
"loss": 0.2416,
"step": 2550
},
{
"epoch": 0.06881037448723039,
"grad_norm": 1.3984375,
"learning_rate": 8.253413782153064e-05,
"loss": 0.2402,
"step": 2600
},
{
"epoch": 0.07013365091967712,
"grad_norm": 1.5234375,
"learning_rate": 8.412194347411875e-05,
"loss": 0.2467,
"step": 2650
},
{
"epoch": 0.07145692735212386,
"grad_norm": 1.40625,
"learning_rate": 8.570974912670689e-05,
"loss": 0.232,
"step": 2700
},
{
"epoch": 0.07278020378457059,
"grad_norm": 1.359375,
"learning_rate": 8.7297554779295e-05,
"loss": 0.2449,
"step": 2750
},
{
"epoch": 0.07410348021701733,
"grad_norm": 1.28125,
"learning_rate": 8.888536043188314e-05,
"loss": 0.2173,
"step": 2800
},
{
"epoch": 0.07542675664946408,
"grad_norm": 1.3203125,
"learning_rate": 9.047316608447125e-05,
"loss": 0.2315,
"step": 2850
},
{
"epoch": 0.0767500330819108,
"grad_norm": 1.3984375,
"learning_rate": 9.206097173705939e-05,
"loss": 0.235,
"step": 2900
},
{
"epoch": 0.07807330951435755,
"grad_norm": 1.3359375,
"learning_rate": 9.36487773896475e-05,
"loss": 0.237,
"step": 2950
},
{
"epoch": 0.0793965859468043,
"grad_norm": 1.109375,
"learning_rate": 9.523658304223562e-05,
"loss": 0.232,
"step": 3000
},
{
"epoch": 0.0793965859468043,
"eval_cer": 0.05592713018964346,
"eval_loss": 0.3169604539871216,
"eval_runtime": 121.8922,
"eval_samples_per_second": 20.748,
"eval_steps_per_second": 0.164,
"eval_wer": 0.1594588681446908,
"step": 3000
},
{
"epoch": 0.08071986237925102,
"grad_norm": 1.2578125,
"learning_rate": 9.682438869482374e-05,
"loss": 0.2229,
"step": 3050
},
{
"epoch": 0.08204313881169777,
"grad_norm": 1.28125,
"learning_rate": 9.841219434741187e-05,
"loss": 0.2255,
"step": 3100
},
{
"epoch": 0.0833664152441445,
"grad_norm": 1.296875,
"learning_rate": 9.999999999999999e-05,
"loss": 0.2313,
"step": 3150
},
{
"epoch": 0.08468969167659124,
"grad_norm": 1.328125,
"learning_rate": 0.00010158780565258812,
"loss": 0.2704,
"step": 3200
},
{
"epoch": 0.08601296810903798,
"grad_norm": 1.3125,
"learning_rate": 0.00010317561130517623,
"loss": 0.228,
"step": 3250
},
{
"epoch": 0.08733624454148471,
"grad_norm": 1.421875,
"learning_rate": 0.00010476341695776436,
"loss": 0.2261,
"step": 3300
},
{
"epoch": 0.08865952097393145,
"grad_norm": 1.15625,
"learning_rate": 0.00010635122261035247,
"loss": 0.2303,
"step": 3350
},
{
"epoch": 0.0899827974063782,
"grad_norm": 1.296875,
"learning_rate": 0.00010793902826294061,
"loss": 0.231,
"step": 3400
},
{
"epoch": 0.09130607383882493,
"grad_norm": 1.25,
"learning_rate": 0.00010952683391552872,
"loss": 0.2282,
"step": 3450
},
{
"epoch": 0.09262935027127167,
"grad_norm": 1.109375,
"learning_rate": 0.00011111463956811686,
"loss": 0.2599,
"step": 3500
},
{
"epoch": 0.0939526267037184,
"grad_norm": 1.1015625,
"learning_rate": 0.00011270244522070497,
"loss": 0.2263,
"step": 3550
},
{
"epoch": 0.09527590313616514,
"grad_norm": 1.125,
"learning_rate": 0.0001142902508732931,
"loss": 0.2176,
"step": 3600
},
{
"epoch": 0.09659917956861189,
"grad_norm": 1.25,
"learning_rate": 0.00011587805652588123,
"loss": 0.2458,
"step": 3650
},
{
"epoch": 0.09792245600105862,
"grad_norm": 1.2265625,
"learning_rate": 0.00011746586217846934,
"loss": 0.229,
"step": 3700
},
{
"epoch": 0.09924573243350536,
"grad_norm": 1.0078125,
"learning_rate": 0.00011905366783105748,
"loss": 0.2172,
"step": 3750
},
{
"epoch": 0.1005690088659521,
"grad_norm": 1.2265625,
"learning_rate": 0.00012064147348364559,
"loss": 0.2444,
"step": 3800
},
{
"epoch": 0.10189228529839883,
"grad_norm": 1.046875,
"learning_rate": 0.00012222927913623372,
"loss": 0.2339,
"step": 3850
},
{
"epoch": 0.10321556173084558,
"grad_norm": 1.0390625,
"learning_rate": 0.00012381708478882185,
"loss": 0.2192,
"step": 3900
},
{
"epoch": 0.1045388381632923,
"grad_norm": 0.98046875,
"learning_rate": 0.00012540489044140997,
"loss": 0.222,
"step": 3950
},
{
"epoch": 0.10586211459573905,
"grad_norm": 1.046875,
"learning_rate": 0.00012699269609399807,
"loss": 0.2504,
"step": 4000
},
{
"epoch": 0.10718539102818579,
"grad_norm": 1.09375,
"learning_rate": 0.00012858050174658622,
"loss": 0.2471,
"step": 4050
},
{
"epoch": 0.10850866746063252,
"grad_norm": 1.4921875,
"learning_rate": 0.00013016830739917432,
"loss": 0.2208,
"step": 4100
},
{
"epoch": 0.10983194389307926,
"grad_norm": 1.1171875,
"learning_rate": 0.00013175611305176247,
"loss": 0.2317,
"step": 4150
},
{
"epoch": 0.11115522032552601,
"grad_norm": 1.15625,
"learning_rate": 0.00013334391870435056,
"loss": 0.2151,
"step": 4200
},
{
"epoch": 0.11247849675797274,
"grad_norm": 1.234375,
"learning_rate": 0.00013493172435693871,
"loss": 0.2224,
"step": 4250
},
{
"epoch": 0.11380177319041948,
"grad_norm": 1.453125,
"learning_rate": 0.0001365195300095268,
"loss": 0.2208,
"step": 4300
},
{
"epoch": 0.11512504962286621,
"grad_norm": 1.015625,
"learning_rate": 0.00013810733566211494,
"loss": 0.2482,
"step": 4350
},
{
"epoch": 0.11644832605531295,
"grad_norm": 0.94140625,
"learning_rate": 0.00013969514131470306,
"loss": 0.2133,
"step": 4400
},
{
"epoch": 0.1177716024877597,
"grad_norm": 1.0390625,
"learning_rate": 0.00014128294696729118,
"loss": 0.2217,
"step": 4450
},
{
"epoch": 0.11909487892020643,
"grad_norm": 1.046875,
"learning_rate": 0.0001428707526198793,
"loss": 0.2714,
"step": 4500
},
{
"epoch": 0.11909487892020643,
"eval_cer": 0.059526817954636074,
"eval_loss": 0.30962076783180237,
"eval_runtime": 110.9375,
"eval_samples_per_second": 22.797,
"eval_steps_per_second": 0.18,
"eval_wer": 0.15847432905484246,
"step": 4500
},
{
"epoch": 0.12041815535265317,
"grad_norm": 0.9453125,
"learning_rate": 0.00014445855827246743,
"loss": 0.2175,
"step": 4550
},
{
"epoch": 0.12174143178509991,
"grad_norm": 1.1171875,
"learning_rate": 0.00014604636392505555,
"loss": 0.2319,
"step": 4600
},
{
"epoch": 0.12306470821754664,
"grad_norm": 1.09375,
"learning_rate": 0.00014763416957764368,
"loss": 0.2312,
"step": 4650
},
{
"epoch": 0.12438798464999339,
"grad_norm": 1.234375,
"learning_rate": 0.0001492219752302318,
"loss": 0.2293,
"step": 4700
},
{
"epoch": 0.12571126108244013,
"grad_norm": 0.9453125,
"learning_rate": 0.00015080978088281993,
"loss": 0.2221,
"step": 4750
},
{
"epoch": 0.12703453751488686,
"grad_norm": 1.0078125,
"learning_rate": 0.00015239758653540805,
"loss": 0.2238,
"step": 4800
},
{
"epoch": 0.1283578139473336,
"grad_norm": 1.03125,
"learning_rate": 0.00015398539218799615,
"loss": 0.2354,
"step": 4850
},
{
"epoch": 0.12968109037978034,
"grad_norm": 0.90234375,
"learning_rate": 0.0001555731978405843,
"loss": 0.2111,
"step": 4900
},
{
"epoch": 0.13100436681222707,
"grad_norm": 0.95703125,
"learning_rate": 0.00015716100349317242,
"loss": 0.2164,
"step": 4950
},
{
"epoch": 0.1323276432446738,
"grad_norm": 1.046875,
"learning_rate": 0.00015874880914576054,
"loss": 0.2217,
"step": 5000
},
{
"epoch": 0.13365091967712056,
"grad_norm": 0.9375,
"learning_rate": 0.0001603366147983487,
"loss": 0.2148,
"step": 5050
},
{
"epoch": 0.1349741961095673,
"grad_norm": 0.96875,
"learning_rate": 0.0001619244204509368,
"loss": 0.2289,
"step": 5100
},
{
"epoch": 0.13629747254201402,
"grad_norm": 0.91796875,
"learning_rate": 0.00016351222610352492,
"loss": 0.2314,
"step": 5150
},
{
"epoch": 0.13762074897446078,
"grad_norm": 0.984375,
"learning_rate": 0.00016510003175611304,
"loss": 0.2169,
"step": 5200
},
{
"epoch": 0.1389440254069075,
"grad_norm": 0.90625,
"learning_rate": 0.00016668783740870116,
"loss": 0.2268,
"step": 5250
},
{
"epoch": 0.14026730183935424,
"grad_norm": 0.95703125,
"learning_rate": 0.0001682756430612893,
"loss": 0.2184,
"step": 5300
},
{
"epoch": 0.141590578271801,
"grad_norm": 0.828125,
"learning_rate": 0.0001698634487138774,
"loss": 0.2333,
"step": 5350
},
{
"epoch": 0.14291385470424772,
"grad_norm": 0.8203125,
"learning_rate": 0.0001714512543664655,
"loss": 0.2297,
"step": 5400
},
{
"epoch": 0.14423713113669445,
"grad_norm": 0.90625,
"learning_rate": 0.00017303906001905366,
"loss": 0.2156,
"step": 5450
},
{
"epoch": 0.14556040756914118,
"grad_norm": 0.80078125,
"learning_rate": 0.00017462686567164178,
"loss": 0.2253,
"step": 5500
},
{
"epoch": 0.14688368400158794,
"grad_norm": 0.921875,
"learning_rate": 0.0001762146713242299,
"loss": 0.2328,
"step": 5550
},
{
"epoch": 0.14820696043403467,
"grad_norm": 1.0078125,
"learning_rate": 0.000177802476976818,
"loss": 0.2157,
"step": 5600
},
{
"epoch": 0.1495302368664814,
"grad_norm": 1.0078125,
"learning_rate": 0.00017939028262940615,
"loss": 0.2118,
"step": 5650
},
{
"epoch": 0.15085351329892815,
"grad_norm": 0.8359375,
"learning_rate": 0.00018097808828199428,
"loss": 0.2223,
"step": 5700
},
{
"epoch": 0.15217678973137488,
"grad_norm": 0.83984375,
"learning_rate": 0.00018256589393458237,
"loss": 0.2301,
"step": 5750
},
{
"epoch": 0.1535000661638216,
"grad_norm": 0.70703125,
"learning_rate": 0.0001841536995871705,
"loss": 0.2152,
"step": 5800
},
{
"epoch": 0.15482334259626837,
"grad_norm": 1.03125,
"learning_rate": 0.00018574150523975865,
"loss": 0.2105,
"step": 5850
},
{
"epoch": 0.1561466190287151,
"grad_norm": 1.03125,
"learning_rate": 0.00018732931089234677,
"loss": 0.2686,
"step": 5900
},
{
"epoch": 0.15746989546116183,
"grad_norm": 0.859375,
"learning_rate": 0.00018891711654493487,
"loss": 0.2262,
"step": 5950
},
{
"epoch": 0.1587931718936086,
"grad_norm": 0.8046875,
"learning_rate": 0.000190504922197523,
"loss": 0.2176,
"step": 6000
},
{
"epoch": 0.1587931718936086,
"eval_cer": 0.05191187784647697,
"eval_loss": 0.3073718547821045,
"eval_runtime": 114.513,
"eval_samples_per_second": 22.085,
"eval_steps_per_second": 0.175,
"eval_wer": 0.1504521586931155,
"step": 6000
},
{
"epoch": 0.16011644832605532,
"grad_norm": 0.94140625,
"learning_rate": 0.00019209272785011114,
"loss": 0.2144,
"step": 6050
},
{
"epoch": 0.16143972475850205,
"grad_norm": 0.76171875,
"learning_rate": 0.00019368053350269924,
"loss": 0.2096,
"step": 6100
},
{
"epoch": 0.1627630011909488,
"grad_norm": 0.8359375,
"learning_rate": 0.00019526833915528737,
"loss": 0.2307,
"step": 6150
},
{
"epoch": 0.16408627762339553,
"grad_norm": 0.7734375,
"learning_rate": 0.0001968561448078755,
"loss": 0.2096,
"step": 6200
},
{
"epoch": 0.16540955405584226,
"grad_norm": 1.4140625,
"learning_rate": 0.00019844395046046364,
"loss": 0.2407,
"step": 6250
},
{
"epoch": 0.166732830488289,
"grad_norm": 0.98046875,
"learning_rate": 0.00020003175611305174,
"loss": 0.3033,
"step": 6300
},
{
"epoch": 0.16805610692073575,
"grad_norm": 0.91796875,
"learning_rate": 0.00020161956176563986,
"loss": 0.2261,
"step": 6350
},
{
"epoch": 0.16937938335318248,
"grad_norm": 0.796875,
"learning_rate": 0.000203207367418228,
"loss": 0.234,
"step": 6400
},
{
"epoch": 0.1707026597856292,
"grad_norm": 0.76171875,
"learning_rate": 0.0002047951730708161,
"loss": 0.2131,
"step": 6450
},
{
"epoch": 0.17202593621807596,
"grad_norm": 0.8984375,
"learning_rate": 0.00020638297872340423,
"loss": 0.2275,
"step": 6500
},
{
"epoch": 0.1733492126505227,
"grad_norm": 0.75,
"learning_rate": 0.00020797078437599236,
"loss": 0.2231,
"step": 6550
},
{
"epoch": 0.17467248908296942,
"grad_norm": 0.6796875,
"learning_rate": 0.0002095585900285805,
"loss": 0.2131,
"step": 6600
},
{
"epoch": 0.17599576551541618,
"grad_norm": 0.7421875,
"learning_rate": 0.0002111463956811686,
"loss": 0.2196,
"step": 6650
},
{
"epoch": 0.1773190419478629,
"grad_norm": 0.76171875,
"learning_rate": 0.00021273420133375673,
"loss": 0.2097,
"step": 6700
},
{
"epoch": 0.17864231838030964,
"grad_norm": 0.77734375,
"learning_rate": 0.00021432200698634485,
"loss": 0.2114,
"step": 6750
},
{
"epoch": 0.1799655948127564,
"grad_norm": 0.8125,
"learning_rate": 0.000215909812638933,
"loss": 0.2283,
"step": 6800
},
{
"epoch": 0.18128887124520313,
"grad_norm": 0.76953125,
"learning_rate": 0.0002174976182915211,
"loss": 0.2285,
"step": 6850
},
{
"epoch": 0.18261214767764986,
"grad_norm": 0.734375,
"learning_rate": 0.00021908542394410922,
"loss": 0.2159,
"step": 6900
},
{
"epoch": 0.1839354241100966,
"grad_norm": 0.80078125,
"learning_rate": 0.00022067322959669732,
"loss": 0.2125,
"step": 6950
},
{
"epoch": 0.18525870054254334,
"grad_norm": 0.75390625,
"learning_rate": 0.00022226103524928547,
"loss": 0.2028,
"step": 7000
},
{
"epoch": 0.18658197697499007,
"grad_norm": 0.9453125,
"learning_rate": 0.0002238488409018736,
"loss": 0.2058,
"step": 7050
},
{
"epoch": 0.1879052534074368,
"grad_norm": 0.67578125,
"learning_rate": 0.00022543664655446172,
"loss": 0.2004,
"step": 7100
},
{
"epoch": 0.18922852983988356,
"grad_norm": 0.671875,
"learning_rate": 0.00022702445220704981,
"loss": 0.2159,
"step": 7150
},
{
"epoch": 0.1905518062723303,
"grad_norm": 0.76953125,
"learning_rate": 0.00022861225785963797,
"loss": 0.2102,
"step": 7200
},
{
"epoch": 0.19187508270477702,
"grad_norm": 0.75,
"learning_rate": 0.0002302000635122261,
"loss": 0.2061,
"step": 7250
},
{
"epoch": 0.19319835913722377,
"grad_norm": 0.84765625,
"learning_rate": 0.0002317878691648142,
"loss": 0.2354,
"step": 7300
},
{
"epoch": 0.1945216355696705,
"grad_norm": 0.78125,
"learning_rate": 0.0002333756748174023,
"loss": 0.2471,
"step": 7350
},
{
"epoch": 0.19584491200211723,
"grad_norm": 0.80859375,
"learning_rate": 0.00023496348046999046,
"loss": 0.2141,
"step": 7400
},
{
"epoch": 0.197168188434564,
"grad_norm": 0.75390625,
"learning_rate": 0.00023655128612257858,
"loss": 0.2106,
"step": 7450
},
{
"epoch": 0.19849146486701072,
"grad_norm": 0.8125,
"learning_rate": 0.00023813909177516668,
"loss": 0.2073,
"step": 7500
},
{
"epoch": 0.19849146486701072,
"eval_cer": 0.05027208248395303,
"eval_loss": 0.2880328893661499,
"eval_runtime": 114.539,
"eval_samples_per_second": 22.08,
"eval_steps_per_second": 0.175,
"eval_wer": 0.1450554259043174,
"step": 7500
},
{
"epoch": 0.19981474129945745,
"grad_norm": 0.734375,
"learning_rate": 0.00023972689742775483,
"loss": 0.1975,
"step": 7550
},
{
"epoch": 0.2011380177319042,
"grad_norm": 0.8125,
"learning_rate": 0.00024131470308034296,
"loss": 0.2048,
"step": 7600
},
{
"epoch": 0.20246129416435094,
"grad_norm": 0.76953125,
"learning_rate": 0.00024290250873293108,
"loss": 0.2097,
"step": 7650
},
{
"epoch": 0.20378457059679767,
"grad_norm": 0.7890625,
"learning_rate": 0.0002444903143855192,
"loss": 0.2029,
"step": 7700
},
{
"epoch": 0.20510784702924442,
"grad_norm": 0.8046875,
"learning_rate": 0.00024607812003810733,
"loss": 0.2114,
"step": 7750
},
{
"epoch": 0.20643112346169115,
"grad_norm": 0.80859375,
"learning_rate": 0.0002476659256906954,
"loss": 0.2223,
"step": 7800
},
{
"epoch": 0.20775439989413788,
"grad_norm": 0.828125,
"learning_rate": 0.0002492537313432836,
"loss": 0.2074,
"step": 7850
},
{
"epoch": 0.2090776763265846,
"grad_norm": 0.671875,
"learning_rate": 0.00025084153699587167,
"loss": 0.2023,
"step": 7900
},
{
"epoch": 0.21040095275903137,
"grad_norm": 1.078125,
"learning_rate": 0.0002524293426484598,
"loss": 0.2184,
"step": 7950
},
{
"epoch": 0.2117242291914781,
"grad_norm": 0.70703125,
"learning_rate": 0.0002540171483010479,
"loss": 0.2055,
"step": 8000
},
{
"epoch": 0.21304750562392483,
"grad_norm": 0.921875,
"learning_rate": 0.00025560495395363607,
"loss": 0.2169,
"step": 8050
},
{
"epoch": 0.21437078205637158,
"grad_norm": 0.8046875,
"learning_rate": 0.00025719275960622417,
"loss": 0.2325,
"step": 8100
},
{
"epoch": 0.2156940584888183,
"grad_norm": 0.82421875,
"learning_rate": 0.0002587805652588123,
"loss": 0.1956,
"step": 8150
},
{
"epoch": 0.21701733492126504,
"grad_norm": 0.7578125,
"learning_rate": 0.0002603683709114004,
"loss": 0.2345,
"step": 8200
},
{
"epoch": 0.2183406113537118,
"grad_norm": 0.80859375,
"learning_rate": 0.00026195617656398857,
"loss": 0.203,
"step": 8250
},
{
"epoch": 0.21966388778615853,
"grad_norm": 0.67578125,
"learning_rate": 0.00026354398221657666,
"loss": 0.2008,
"step": 8300
},
{
"epoch": 0.22098716421860526,
"grad_norm": 0.77734375,
"learning_rate": 0.0002651317878691648,
"loss": 0.2265,
"step": 8350
},
{
"epoch": 0.22231044065105202,
"grad_norm": 0.72265625,
"learning_rate": 0.0002667195935217529,
"loss": 0.2012,
"step": 8400
},
{
"epoch": 0.22363371708349875,
"grad_norm": 0.76953125,
"learning_rate": 0.000268307399174341,
"loss": 0.2045,
"step": 8450
},
{
"epoch": 0.22495699351594547,
"grad_norm": 0.64453125,
"learning_rate": 0.00026989520482692916,
"loss": 0.2057,
"step": 8500
},
{
"epoch": 0.22628026994839223,
"grad_norm": 0.828125,
"learning_rate": 0.0002714830104795173,
"loss": 0.2172,
"step": 8550
},
{
"epoch": 0.22760354638083896,
"grad_norm": 0.88671875,
"learning_rate": 0.0002730708161321054,
"loss": 0.2137,
"step": 8600
},
{
"epoch": 0.2289268228132857,
"grad_norm": 0.6640625,
"learning_rate": 0.0002746586217846935,
"loss": 0.2021,
"step": 8650
},
{
"epoch": 0.23025009924573242,
"grad_norm": 1.0390625,
"learning_rate": 0.00027624642743728165,
"loss": 0.2064,
"step": 8700
},
{
"epoch": 0.23157337567817918,
"grad_norm": 0.74609375,
"learning_rate": 0.0002778342330898698,
"loss": 0.2082,
"step": 8750
},
{
"epoch": 0.2328966521106259,
"grad_norm": 1.3046875,
"learning_rate": 0.0002794220387424579,
"loss": 0.2139,
"step": 8800
},
{
"epoch": 0.23421992854307264,
"grad_norm": 0.7890625,
"learning_rate": 0.000281009844395046,
"loss": 0.2009,
"step": 8850
},
{
"epoch": 0.2355432049755194,
"grad_norm": 0.6875,
"learning_rate": 0.00028259765004763415,
"loss": 0.2015,
"step": 8900
},
{
"epoch": 0.23686648140796612,
"grad_norm": 0.69140625,
"learning_rate": 0.0002841854557002223,
"loss": 0.2006,
"step": 8950
},
{
"epoch": 0.23818975784041285,
"grad_norm": 0.6640625,
"learning_rate": 0.0002857732613528104,
"loss": 0.2276,
"step": 9000
},
{
"epoch": 0.23818975784041285,
"eval_cer": 0.05210842866047812,
"eval_loss": 0.2827185392379761,
"eval_runtime": 111.4286,
"eval_samples_per_second": 22.696,
"eval_steps_per_second": 0.179,
"eval_wer": 0.1450554259043174,
"step": 9000
},
{
"epoch": 0.2395130342728596,
"grad_norm": 0.6484375,
"learning_rate": 0.0002873610670053985,
"loss": 0.192,
"step": 9050
},
{
"epoch": 0.24083631070530634,
"grad_norm": 0.6328125,
"learning_rate": 0.00028894887265798664,
"loss": 0.2112,
"step": 9100
},
{
"epoch": 0.24215958713775307,
"grad_norm": 0.671875,
"learning_rate": 0.00029053667831057474,
"loss": 0.2005,
"step": 9150
},
{
"epoch": 0.24348286357019983,
"grad_norm": 0.71875,
"learning_rate": 0.0002921244839631629,
"loss": 0.2092,
"step": 9200
},
{
"epoch": 0.24480614000264656,
"grad_norm": 0.66015625,
"learning_rate": 0.000293712289615751,
"loss": 0.2021,
"step": 9250
},
{
"epoch": 0.24612941643509328,
"grad_norm": 0.6875,
"learning_rate": 0.00029530009526833914,
"loss": 0.2614,
"step": 9300
},
{
"epoch": 0.24745269286754004,
"grad_norm": 0.671875,
"learning_rate": 0.00029688790092092723,
"loss": 0.2146,
"step": 9350
},
{
"epoch": 0.24877596929998677,
"grad_norm": 0.62109375,
"learning_rate": 0.0002984757065735154,
"loss": 0.2077,
"step": 9400
},
{
"epoch": 0.25009924573243353,
"grad_norm": 0.96484375,
"learning_rate": 0.0002999999999080824,
"loss": 0.2342,
"step": 9450
},
{
"epoch": 0.25142252216488026,
"grad_norm": 0.65234375,
"learning_rate": 0.00029999993786372365,
"loss": 0.221,
"step": 9500
},
{
"epoch": 0.252745798597327,
"grad_norm": 0.62890625,
"learning_rate": 0.0002999997609224513,
"loss": 0.1937,
"step": 9550
},
{
"epoch": 0.2540690750297737,
"grad_norm": 1.1484375,
"learning_rate": 0.0002999994690844009,
"loss": 0.2232,
"step": 9600
},
{
"epoch": 0.25539235146222045,
"grad_norm": 0.6953125,
"learning_rate": 0.0002999990623497959,
"loss": 0.2037,
"step": 9650
},
{
"epoch": 0.2567156278946672,
"grad_norm": 0.67578125,
"learning_rate": 0.00029999854071894786,
"loss": 0.1957,
"step": 9700
},
{
"epoch": 0.25803890432711396,
"grad_norm": 0.8046875,
"learning_rate": 0.0002999979041922564,
"loss": 0.2061,
"step": 9750
},
{
"epoch": 0.2593621807595607,
"grad_norm": 0.7734375,
"learning_rate": 0.0002999971527702091,
"loss": 0.2019,
"step": 9800
},
{
"epoch": 0.2606854571920074,
"grad_norm": 0.7265625,
"learning_rate": 0.00029999628645338154,
"loss": 0.2129,
"step": 9850
},
{
"epoch": 0.26200873362445415,
"grad_norm": 0.625,
"learning_rate": 0.0002999953052424372,
"loss": 0.1945,
"step": 9900
},
{
"epoch": 0.2633320100569009,
"grad_norm": 0.6875,
"learning_rate": 0.00029999420913812776,
"loss": 0.2055,
"step": 9950
},
{
"epoch": 0.2646552864893476,
"grad_norm": 0.71484375,
"learning_rate": 0.0002999929981412927,
"loss": 0.1968,
"step": 10000
},
{
"epoch": 0.26597856292179434,
"grad_norm": 0.6015625,
"learning_rate": 0.0002999916722528598,
"loss": 0.2005,
"step": 10050
},
{
"epoch": 0.2673018393542411,
"grad_norm": 0.70703125,
"learning_rate": 0.0002999902314738445,
"loss": 0.2064,
"step": 10100
},
{
"epoch": 0.26862511578668785,
"grad_norm": 0.69921875,
"learning_rate": 0.0002999886758053506,
"loss": 0.2014,
"step": 10150
},
{
"epoch": 0.2699483922191346,
"grad_norm": 0.59375,
"learning_rate": 0.00029998700524856947,
"loss": 0.1872,
"step": 10200
},
{
"epoch": 0.2712716686515813,
"grad_norm": 0.65234375,
"learning_rate": 0.00029998521980478083,
"loss": 0.1846,
"step": 10250
},
{
"epoch": 0.27259494508402804,
"grad_norm": 0.67578125,
"learning_rate": 0.0002999833194753523,
"loss": 0.1961,
"step": 10300
},
{
"epoch": 0.27391822151647477,
"grad_norm": 0.65625,
"learning_rate": 0.00029998130426173956,
"loss": 0.1887,
"step": 10350
},
{
"epoch": 0.27524149794892155,
"grad_norm": 0.58203125,
"learning_rate": 0.0002999791741654861,
"loss": 0.1912,
"step": 10400
},
{
"epoch": 0.2765647743813683,
"grad_norm": 0.76171875,
"learning_rate": 0.0002999769291882236,
"loss": 0.2197,
"step": 10450
},
{
"epoch": 0.277888050813815,
"grad_norm": 0.68359375,
"learning_rate": 0.0002999745693316717,
"loss": 0.2021,
"step": 10500
},
{
"epoch": 0.277888050813815,
"eval_cer": 0.06062750251304255,
"eval_loss": 0.2784423530101776,
"eval_runtime": 172.9503,
"eval_samples_per_second": 14.623,
"eval_steps_per_second": 0.116,
"eval_wer": 0.1528588098016336,
"step": 10500
},
{
"epoch": 0.27921132724626174,
"grad_norm": 0.62109375,
"learning_rate": 0.0002999720945976379,
"loss": 0.2076,
"step": 10550
},
{
"epoch": 0.28053460367870847,
"grad_norm": 0.5546875,
"learning_rate": 0.00029996950498801785,
"loss": 0.2289,
"step": 10600
},
{
"epoch": 0.2818578801111552,
"grad_norm": 0.8125,
"learning_rate": 0.00029996680050479514,
"loss": 0.1823,
"step": 10650
},
{
"epoch": 0.283181156543602,
"grad_norm": 0.609375,
"learning_rate": 0.00029996398115004135,
"loss": 0.2035,
"step": 10700
},
{
"epoch": 0.2845044329760487,
"grad_norm": 0.55078125,
"learning_rate": 0.0002999610469259161,
"loss": 0.1845,
"step": 10750
},
{
"epoch": 0.28582770940849545,
"grad_norm": 0.62890625,
"learning_rate": 0.00029995799783466684,
"loss": 0.1911,
"step": 10800
},
{
"epoch": 0.2871509858409422,
"grad_norm": 0.5546875,
"learning_rate": 0.00029995483387862924,
"loss": 0.1996,
"step": 10850
},
{
"epoch": 0.2884742622733889,
"grad_norm": 0.765625,
"learning_rate": 0.0002999515550602267,
"loss": 0.1826,
"step": 10900
},
{
"epoch": 0.28979753870583563,
"grad_norm": 0.76171875,
"learning_rate": 0.00029994816138197074,
"loss": 0.1857,
"step": 10950
},
{
"epoch": 0.29112081513828236,
"grad_norm": 0.625,
"learning_rate": 0.000299944652846461,
"loss": 0.1872,
"step": 11000
},
{
"epoch": 0.29244409157072915,
"grad_norm": 0.66796875,
"learning_rate": 0.0002999410294563847,
"loss": 0.1938,
"step": 11050
},
{
"epoch": 0.2937673680031759,
"grad_norm": 0.73828125,
"learning_rate": 0.0002999372912145175,
"loss": 0.1905,
"step": 11100
},
{
"epoch": 0.2950906444356226,
"grad_norm": 0.8671875,
"learning_rate": 0.00029993343812372274,
"loss": 0.1967,
"step": 11150
},
{
"epoch": 0.29641392086806934,
"grad_norm": 0.59765625,
"learning_rate": 0.00029992947018695187,
"loss": 0.1769,
"step": 11200
},
{
"epoch": 0.29773719730051607,
"grad_norm": 0.61328125,
"learning_rate": 0.0002999253874072441,
"loss": 0.1993,
"step": 11250
},
{
"epoch": 0.2990604737329628,
"grad_norm": 0.578125,
"learning_rate": 0.0002999211897877269,
"loss": 0.1774,
"step": 11300
},
{
"epoch": 0.3003837501654096,
"grad_norm": 0.59765625,
"learning_rate": 0.00029991687733161546,
"loss": 0.1969,
"step": 11350
},
{
"epoch": 0.3017070265978563,
"grad_norm": 0.6171875,
"learning_rate": 0.0002999124500422131,
"loss": 0.1819,
"step": 11400
},
{
"epoch": 0.30303030303030304,
"grad_norm": 0.671875,
"learning_rate": 0.00029990790792291103,
"loss": 0.1726,
"step": 11450
},
{
"epoch": 0.30435357946274977,
"grad_norm": 0.6015625,
"learning_rate": 0.0002999032509771884,
"loss": 0.1836,
"step": 11500
},
{
"epoch": 0.3056768558951965,
"grad_norm": 0.55859375,
"learning_rate": 0.0002998984792086123,
"loss": 0.1972,
"step": 11550
},
{
"epoch": 0.3070001323276432,
"grad_norm": 0.71484375,
"learning_rate": 0.0002998935926208379,
"loss": 0.1854,
"step": 11600
},
{
"epoch": 0.30832340876008996,
"grad_norm": 0.7265625,
"learning_rate": 0.0002998885912176081,
"loss": 0.1767,
"step": 11650
},
{
"epoch": 0.30964668519253674,
"grad_norm": 0.578125,
"learning_rate": 0.000299883475002754,
"loss": 0.1745,
"step": 11700
},
{
"epoch": 0.31096996162498347,
"grad_norm": 0.62890625,
"learning_rate": 0.0002998782439801945,
"loss": 0.1822,
"step": 11750
},
{
"epoch": 0.3122932380574302,
"grad_norm": 0.63671875,
"learning_rate": 0.00029987289815393633,
"loss": 0.1706,
"step": 11800
},
{
"epoch": 0.31361651448987693,
"grad_norm": 0.546875,
"learning_rate": 0.0002998674375280745,
"loss": 0.179,
"step": 11850
},
{
"epoch": 0.31493979092232366,
"grad_norm": 0.486328125,
"learning_rate": 0.00029986186210679156,
"loss": 0.174,
"step": 11900
},
{
"epoch": 0.3162630673547704,
"grad_norm": 0.66796875,
"learning_rate": 0.00029985617189435827,
"loss": 0.1769,
"step": 11950
},
{
"epoch": 0.3175863437872172,
"grad_norm": 0.53515625,
"learning_rate": 0.0002998503668951331,
"loss": 0.1696,
"step": 12000
},
{
"epoch": 0.3175863437872172,
"eval_cer": 0.049974448394179846,
"eval_loss": 0.25553205609321594,
"eval_runtime": 121.3958,
"eval_samples_per_second": 20.833,
"eval_steps_per_second": 0.165,
"eval_wer": 0.13353267211201866,
"step": 12000
},
{
"epoch": 0.3189096202196639,
"grad_norm": 0.625,
"learning_rate": 0.00029984444711356266,
"loss": 0.1806,
"step": 12050
},
{
"epoch": 0.32023289665211063,
"grad_norm": 0.640625,
"learning_rate": 0.0002998384125541814,
"loss": 0.1838,
"step": 12100
},
{
"epoch": 0.32155617308455736,
"grad_norm": 0.47265625,
"learning_rate": 0.0002998322632216116,
"loss": 0.1843,
"step": 12150
},
{
"epoch": 0.3228794495170041,
"grad_norm": 0.6953125,
"learning_rate": 0.0002998259991205635,
"loss": 0.1808,
"step": 12200
},
{
"epoch": 0.3242027259494508,
"grad_norm": 0.51953125,
"learning_rate": 0.0002998196202558354,
"loss": 0.1796,
"step": 12250
},
{
"epoch": 0.3255260023818976,
"grad_norm": 0.53125,
"learning_rate": 0.0002998131266323133,
"loss": 0.1744,
"step": 12300
},
{
"epoch": 0.32684927881434434,
"grad_norm": 0.6015625,
"learning_rate": 0.0002998065182549712,
"loss": 0.1725,
"step": 12350
},
{
"epoch": 0.32817255524679106,
"grad_norm": 0.59765625,
"learning_rate": 0.00029979979512887096,
"loss": 0.179,
"step": 12400
},
{
"epoch": 0.3294958316792378,
"grad_norm": 0.578125,
"learning_rate": 0.0002997929572591624,
"loss": 0.1667,
"step": 12450
},
{
"epoch": 0.3308191081116845,
"grad_norm": 0.6015625,
"learning_rate": 0.00029978600465108314,
"loss": 0.1695,
"step": 12500
},
{
"epoch": 0.33214238454413125,
"grad_norm": 0.6953125,
"learning_rate": 0.00029977893730995876,
"loss": 0.1863,
"step": 12550
},
{
"epoch": 0.333465660976578,
"grad_norm": 0.5078125,
"learning_rate": 0.0002997717552412027,
"loss": 0.1763,
"step": 12600
},
{
"epoch": 0.33478893740902477,
"grad_norm": 0.6015625,
"learning_rate": 0.0002997644584503163,
"loss": 0.1765,
"step": 12650
},
{
"epoch": 0.3361122138414715,
"grad_norm": 0.62890625,
"learning_rate": 0.0002997570469428887,
"loss": 0.1882,
"step": 12700
},
{
"epoch": 0.3374354902739182,
"grad_norm": 0.52734375,
"learning_rate": 0.00029974952072459705,
"loss": 0.1783,
"step": 12750
},
{
"epoch": 0.33875876670636496,
"grad_norm": 0.5546875,
"learning_rate": 0.0002997418798012062,
"loss": 0.1825,
"step": 12800
},
{
"epoch": 0.3400820431388117,
"grad_norm": 0.65625,
"learning_rate": 0.00029973412417856897,
"loss": 0.1803,
"step": 12850
},
{
"epoch": 0.3414053195712584,
"grad_norm": 0.671875,
"learning_rate": 0.000299726253862626,
"loss": 0.1644,
"step": 12900
},
{
"epoch": 0.3427285960037052,
"grad_norm": 0.6015625,
"learning_rate": 0.00029971826885940585,
"loss": 0.1705,
"step": 12950
},
{
"epoch": 0.34405187243615193,
"grad_norm": 0.55078125,
"learning_rate": 0.0002997101691750248,
"loss": 0.1676,
"step": 13000
},
{
"epoch": 0.34537514886859866,
"grad_norm": 0.51171875,
"learning_rate": 0.00029970195481568706,
"loss": 0.1611,
"step": 13050
},
{
"epoch": 0.3466984253010454,
"grad_norm": 0.6328125,
"learning_rate": 0.0002996936257876847,
"loss": 0.1686,
"step": 13100
},
{
"epoch": 0.3480217017334921,
"grad_norm": 0.55078125,
"learning_rate": 0.00029968518209739755,
"loss": 0.1608,
"step": 13150
},
{
"epoch": 0.34934497816593885,
"grad_norm": 0.625,
"learning_rate": 0.0002996766237512933,
"loss": 0.1686,
"step": 13200
},
{
"epoch": 0.3506682545983856,
"grad_norm": 0.58203125,
"learning_rate": 0.00029966795075592753,
"loss": 0.1846,
"step": 13250
},
{
"epoch": 0.35199153103083236,
"grad_norm": 0.59765625,
"learning_rate": 0.0002996591631179435,
"loss": 0.17,
"step": 13300
},
{
"epoch": 0.3533148074632791,
"grad_norm": 0.52734375,
"learning_rate": 0.0002996502608440724,
"loss": 0.1615,
"step": 13350
},
{
"epoch": 0.3546380838957258,
"grad_norm": 0.52734375,
"learning_rate": 0.0002996412439411332,
"loss": 0.1666,
"step": 13400
},
{
"epoch": 0.35596136032817255,
"grad_norm": 1.0234375,
"learning_rate": 0.0002996321124160326,
"loss": 0.1908,
"step": 13450
},
{
"epoch": 0.3572846367606193,
"grad_norm": 0.49609375,
"learning_rate": 0.0002996228662757653,
"loss": 0.1604,
"step": 13500
},
{
"epoch": 0.3572846367606193,
"eval_cer": 0.052984483717169,
"eval_loss": 0.2499123215675354,
"eval_runtime": 129.0848,
"eval_samples_per_second": 19.592,
"eval_steps_per_second": 0.155,
"eval_wer": 0.13105309218203035,
"step": 13500
},
{
"epoch": 0.358607913193066,
"grad_norm": 0.55078125,
"learning_rate": 0.0002996135055274135,
"loss": 0.1627,
"step": 13550
},
{
"epoch": 0.3599311896255128,
"grad_norm": 0.51953125,
"learning_rate": 0.00029960403017814744,
"loss": 0.192,
"step": 13600
},
{
"epoch": 0.3612544660579595,
"grad_norm": 0.6484375,
"learning_rate": 0.00029959444023522504,
"loss": 0.1543,
"step": 13650
},
{
"epoch": 0.36257774249040625,
"grad_norm": 0.52734375,
"learning_rate": 0.000299584735705992,
"loss": 0.1754,
"step": 13700
},
{
"epoch": 0.363901018922853,
"grad_norm": 0.62890625,
"learning_rate": 0.00029957491659788173,
"loss": 0.175,
"step": 13750
},
{
"epoch": 0.3652242953552997,
"grad_norm": 0.490234375,
"learning_rate": 0.0002995649829184155,
"loss": 0.1673,
"step": 13800
},
{
"epoch": 0.36654757178774644,
"grad_norm": 0.515625,
"learning_rate": 0.00029955493467520236,
"loss": 0.162,
"step": 13850
},
{
"epoch": 0.3678708482201932,
"grad_norm": 0.5859375,
"learning_rate": 0.00029954477187593904,
"loss": 0.1672,
"step": 13900
},
{
"epoch": 0.36919412465263995,
"grad_norm": 0.53125,
"learning_rate": 0.00029953449452840993,
"loss": 0.1821,
"step": 13950
},
{
"epoch": 0.3705174010850867,
"grad_norm": 0.57421875,
"learning_rate": 0.0002995241026404875,
"loss": 0.1619,
"step": 14000
},
{
"epoch": 0.3718406775175334,
"grad_norm": 0.498046875,
"learning_rate": 0.0002995135962201315,
"loss": 0.1708,
"step": 14050
},
{
"epoch": 0.37316395394998014,
"grad_norm": 0.53125,
"learning_rate": 0.0002995029752753897,
"loss": 0.1717,
"step": 14100
},
{
"epoch": 0.37448723038242687,
"grad_norm": 0.58203125,
"learning_rate": 0.00029949223981439756,
"loss": 0.1787,
"step": 14150
},
{
"epoch": 0.3758105068148736,
"grad_norm": 0.52734375,
"learning_rate": 0.0002994813898453783,
"loss": 0.158,
"step": 14200
},
{
"epoch": 0.3771337832473204,
"grad_norm": 0.46484375,
"learning_rate": 0.00029947042537664263,
"loss": 0.1773,
"step": 14250
},
{
"epoch": 0.3784570596797671,
"grad_norm": 0.4921875,
"learning_rate": 0.0002994593464165891,
"loss": 0.1711,
"step": 14300
},
{
"epoch": 0.37978033611221385,
"grad_norm": 0.53515625,
"learning_rate": 0.00029944815297370414,
"loss": 0.1647,
"step": 14350
},
{
"epoch": 0.3811036125446606,
"grad_norm": 0.640625,
"learning_rate": 0.0002994368450565616,
"loss": 0.1713,
"step": 14400
},
{
"epoch": 0.3824268889771073,
"grad_norm": 0.515625,
"learning_rate": 0.00029942542267382305,
"loss": 0.1574,
"step": 14450
},
{
"epoch": 0.38375016540955403,
"grad_norm": 0.69140625,
"learning_rate": 0.00029941388583423794,
"loss": 0.1604,
"step": 14500
},
{
"epoch": 0.3850734418420008,
"grad_norm": 0.50390625,
"learning_rate": 0.0002994022345466432,
"loss": 0.166,
"step": 14550
},
{
"epoch": 0.38639671827444755,
"grad_norm": 0.455078125,
"learning_rate": 0.00029939046881996345,
"loss": 0.1752,
"step": 14600
},
{
"epoch": 0.3877199947068943,
"grad_norm": 0.515625,
"learning_rate": 0.000299378588663211,
"loss": 0.1578,
"step": 14650
},
{
"epoch": 0.389043271139341,
"grad_norm": 0.48828125,
"learning_rate": 0.00029936659408548585,
"loss": 0.1617,
"step": 14700
},
{
"epoch": 0.39036654757178774,
"grad_norm": 0.412109375,
"learning_rate": 0.00029935448509597554,
"loss": 0.1744,
"step": 14750
},
{
"epoch": 0.39168982400423447,
"grad_norm": 0.50390625,
"learning_rate": 0.00029934226170395535,
"loss": 0.1676,
"step": 14800
},
{
"epoch": 0.3930131004366812,
"grad_norm": 0.52734375,
"learning_rate": 0.00029932992391878814,
"loss": 0.1588,
"step": 14850
},
{
"epoch": 0.394336376869128,
"grad_norm": 0.57421875,
"learning_rate": 0.0002993174717499244,
"loss": 0.1616,
"step": 14900
},
{
"epoch": 0.3956596533015747,
"grad_norm": 0.4921875,
"learning_rate": 0.0002993049052069023,
"loss": 0.169,
"step": 14950
},
{
"epoch": 0.39698292973402144,
"grad_norm": 0.79296875,
"learning_rate": 0.00029929222429934753,
"loss": 0.1694,
"step": 15000
},
{
"epoch": 0.39698292973402144,
"eval_cer": 0.055393635123068885,
"eval_loss": 0.2413298487663269,
"eval_runtime": 128.1516,
"eval_samples_per_second": 19.734,
"eval_steps_per_second": 0.156,
"eval_wer": 0.13995040840140024,
"step": 15000
},
{
"epoch": 0.39830620616646817,
"grad_norm": 0.5078125,
"learning_rate": 0.0002992794290369733,
"loss": 0.1575,
"step": 15050
},
{
"epoch": 0.3996294825989149,
"grad_norm": 0.498046875,
"learning_rate": 0.0002992665194295807,
"loss": 0.1689,
"step": 15100
},
{
"epoch": 0.4009527590313616,
"grad_norm": 0.56640625,
"learning_rate": 0.00029925349548705815,
"loss": 0.1648,
"step": 15150
},
{
"epoch": 0.4022760354638084,
"grad_norm": 0.61328125,
"learning_rate": 0.0002992403572193816,
"loss": 0.1529,
"step": 15200
},
{
"epoch": 0.40359931189625514,
"grad_norm": 1.03125,
"learning_rate": 0.0002992271046366149,
"loss": 0.1612,
"step": 15250
},
{
"epoch": 0.40492258832870187,
"grad_norm": 0.498046875,
"learning_rate": 0.00029921373774890916,
"loss": 0.1633,
"step": 15300
},
{
"epoch": 0.4062458647611486,
"grad_norm": 0.5546875,
"learning_rate": 0.00029920025656650313,
"loss": 0.1653,
"step": 15350
},
{
"epoch": 0.40756914119359533,
"grad_norm": 0.609375,
"learning_rate": 0.00029918666109972315,
"loss": 0.1697,
"step": 15400
},
{
"epoch": 0.40889241762604206,
"grad_norm": 0.5390625,
"learning_rate": 0.00029917295135898313,
"loss": 0.1736,
"step": 15450
},
{
"epoch": 0.41021569405848884,
"grad_norm": 0.54296875,
"learning_rate": 0.00029915912735478434,
"loss": 0.1663,
"step": 15500
},
{
"epoch": 0.4115389704909356,
"grad_norm": 0.5,
"learning_rate": 0.00029914518909771574,
"loss": 0.1557,
"step": 15550
},
{
"epoch": 0.4128622469233823,
"grad_norm": 0.51171875,
"learning_rate": 0.00029913113659845375,
"loss": 0.147,
"step": 15600
},
{
"epoch": 0.41418552335582903,
"grad_norm": 0.546875,
"learning_rate": 0.00029911696986776237,
"loss": 0.1564,
"step": 15650
},
{
"epoch": 0.41550879978827576,
"grad_norm": 0.56640625,
"learning_rate": 0.00029910268891649285,
"loss": 0.1541,
"step": 15700
},
{
"epoch": 0.4168320762207225,
"grad_norm": 0.55078125,
"learning_rate": 0.0002990882937555843,
"loss": 0.17,
"step": 15750
},
{
"epoch": 0.4181553526531692,
"grad_norm": 0.5625,
"learning_rate": 0.000299073784396063,
"loss": 0.161,
"step": 15800
},
{
"epoch": 0.419478629085616,
"grad_norm": 0.56640625,
"learning_rate": 0.00029905916084904286,
"loss": 0.162,
"step": 15850
},
{
"epoch": 0.42080190551806274,
"grad_norm": 0.5234375,
"learning_rate": 0.0002990444231257253,
"loss": 0.15,
"step": 15900
},
{
"epoch": 0.42212518195050946,
"grad_norm": 0.55859375,
"learning_rate": 0.000299029571237399,
"loss": 0.1605,
"step": 15950
},
{
"epoch": 0.4234484583829562,
"grad_norm": 0.52734375,
"learning_rate": 0.00029901460519544026,
"loss": 0.156,
"step": 16000
},
{
"epoch": 0.4247717348154029,
"grad_norm": 0.5859375,
"learning_rate": 0.0002989995250113128,
"loss": 0.1642,
"step": 16050
},
{
"epoch": 0.42609501124784965,
"grad_norm": 0.52734375,
"learning_rate": 0.00029898433069656765,
"loss": 0.1515,
"step": 16100
},
{
"epoch": 0.42741828768029644,
"grad_norm": 0.5234375,
"learning_rate": 0.0002989690222628434,
"loss": 0.1491,
"step": 16150
},
{
"epoch": 0.42874156411274317,
"grad_norm": 0.87109375,
"learning_rate": 0.00029895359972186594,
"loss": 0.1596,
"step": 16200
},
{
"epoch": 0.4300648405451899,
"grad_norm": 0.57421875,
"learning_rate": 0.00029893806308544876,
"loss": 0.1605,
"step": 16250
},
{
"epoch": 0.4313881169776366,
"grad_norm": 0.44921875,
"learning_rate": 0.00029892241236549247,
"loss": 0.1636,
"step": 16300
},
{
"epoch": 0.43271139341008336,
"grad_norm": 0.494140625,
"learning_rate": 0.00029890664757398525,
"loss": 0.156,
"step": 16350
},
{
"epoch": 0.4340346698425301,
"grad_norm": 0.6171875,
"learning_rate": 0.00029889076872300263,
"loss": 0.1624,
"step": 16400
},
{
"epoch": 0.43535794627497687,
"grad_norm": 0.53125,
"learning_rate": 0.00029887477582470745,
"loss": 0.1548,
"step": 16450
},
{
"epoch": 0.4366812227074236,
"grad_norm": 0.56640625,
"learning_rate": 0.00029885866889135007,
"loss": 0.1643,
"step": 16500
},
{
"epoch": 0.4366812227074236,
"eval_cer": 0.043830831522257974,
"eval_loss": 0.23458540439605713,
"eval_runtime": 111.6706,
"eval_samples_per_second": 22.647,
"eval_steps_per_second": 0.179,
"eval_wer": 0.12106184364060676,
"step": 16500
},
{
"epoch": 0.43800449913987033,
"grad_norm": 0.5,
"learning_rate": 0.0002988424479352679,
"loss": 0.1565,
"step": 16550
},
{
"epoch": 0.43932777557231706,
"grad_norm": 0.46484375,
"learning_rate": 0.0002988261129688859,
"loss": 0.1727,
"step": 16600
},
{
"epoch": 0.4406510520047638,
"grad_norm": 0.55078125,
"learning_rate": 0.0002988096640047164,
"loss": 0.1578,
"step": 16650
},
{
"epoch": 0.4419743284372105,
"grad_norm": 0.46484375,
"learning_rate": 0.000298793101055359,
"loss": 0.1491,
"step": 16700
},
{
"epoch": 0.44329760486965725,
"grad_norm": 0.515625,
"learning_rate": 0.0002987764241335005,
"loss": 0.1616,
"step": 16750
},
{
"epoch": 0.44462088130210403,
"grad_norm": 0.51953125,
"learning_rate": 0.0002987596332519151,
"loss": 0.1539,
"step": 16800
},
{
"epoch": 0.44594415773455076,
"grad_norm": 0.51953125,
"learning_rate": 0.0002987427284234643,
"loss": 0.1558,
"step": 16850
},
{
"epoch": 0.4472674341669975,
"grad_norm": 0.68359375,
"learning_rate": 0.00029872570966109677,
"loss": 0.1489,
"step": 16900
},
{
"epoch": 0.4485907105994442,
"grad_norm": 0.51953125,
"learning_rate": 0.0002987085769778486,
"loss": 0.1629,
"step": 16950
},
{
"epoch": 0.44991398703189095,
"grad_norm": 0.6640625,
"learning_rate": 0.0002986913303868431,
"loss": 0.3016,
"step": 17000
},
{
"epoch": 0.4512372634643377,
"grad_norm": 0.55078125,
"learning_rate": 0.00029867396990129085,
"loss": 0.1673,
"step": 17050
},
{
"epoch": 0.45256053989678446,
"grad_norm": 0.58984375,
"learning_rate": 0.00029865649553448954,
"loss": 0.1649,
"step": 17100
},
{
"epoch": 0.4538838163292312,
"grad_norm": 0.65234375,
"learning_rate": 0.0002986389072998242,
"loss": 0.1577,
"step": 17150
},
{
"epoch": 0.4552070927616779,
"grad_norm": 0.5703125,
"learning_rate": 0.00029862120521076705,
"loss": 0.1677,
"step": 17200
},
{
"epoch": 0.45653036919412465,
"grad_norm": 0.5234375,
"learning_rate": 0.0002986033892808776,
"loss": 0.151,
"step": 17250
},
{
"epoch": 0.4578536456265714,
"grad_norm": 0.92578125,
"learning_rate": 0.00029858545952380245,
"loss": 0.1642,
"step": 17300
},
{
"epoch": 0.4591769220590181,
"grad_norm": 0.49609375,
"learning_rate": 0.0002985674159532754,
"loss": 0.1544,
"step": 17350
},
{
"epoch": 0.46050019849146484,
"grad_norm": 0.5,
"learning_rate": 0.0002985492585831175,
"loss": 0.1686,
"step": 17400
},
{
"epoch": 0.4618234749239116,
"grad_norm": 0.58203125,
"learning_rate": 0.0002985309874272369,
"loss": 0.1577,
"step": 17450
},
{
"epoch": 0.46314675135635835,
"grad_norm": 0.58984375,
"learning_rate": 0.000298512602499629,
"loss": 0.1566,
"step": 17500
},
{
"epoch": 0.4644700277888051,
"grad_norm": 0.57421875,
"learning_rate": 0.0002984941038143762,
"loss": 0.1552,
"step": 17550
},
{
"epoch": 0.4657933042212518,
"grad_norm": 0.51171875,
"learning_rate": 0.00029847549138564813,
"loss": 0.1583,
"step": 17600
},
{
"epoch": 0.46711658065369854,
"grad_norm": 0.50390625,
"learning_rate": 0.0002984567652277016,
"loss": 0.1421,
"step": 17650
},
{
"epoch": 0.4684398570861453,
"grad_norm": 0.67578125,
"learning_rate": 0.00029843792535488037,
"loss": 0.1549,
"step": 17700
},
{
"epoch": 0.46976313351859206,
"grad_norm": 0.56640625,
"learning_rate": 0.0002984189717816155,
"loss": 0.159,
"step": 17750
},
{
"epoch": 0.4710864099510388,
"grad_norm": 0.609375,
"learning_rate": 0.000298399904522425,
"loss": 0.1471,
"step": 17800
},
{
"epoch": 0.4724096863834855,
"grad_norm": 0.498046875,
"learning_rate": 0.0002983807235919139,
"loss": 0.153,
"step": 17850
},
{
"epoch": 0.47373296281593225,
"grad_norm": 0.53125,
"learning_rate": 0.00029836142900477466,
"loss": 0.1473,
"step": 17900
},
{
"epoch": 0.475056239248379,
"grad_norm": 0.546875,
"learning_rate": 0.00029834202077578625,
"loss": 0.155,
"step": 17950
},
{
"epoch": 0.4763795156808257,
"grad_norm": 0.5078125,
"learning_rate": 0.0002983224989198152,
"loss": 0.146,
"step": 18000
},
{
"epoch": 0.4763795156808257,
"eval_cer": 0.043162558754654044,
"eval_loss": 0.22735626995563507,
"eval_runtime": 134.6423,
"eval_samples_per_second": 18.783,
"eval_steps_per_second": 0.149,
"eval_wer": 0.11829054842473746,
"step": 18000
},
{
"epoch": 0.4777027921132725,
"grad_norm": 0.447265625,
"learning_rate": 0.00029830286345181473,
"loss": 0.1641,
"step": 18050
},
{
"epoch": 0.4790260685457192,
"grad_norm": 0.5078125,
"learning_rate": 0.0002982831143868253,
"loss": 0.1573,
"step": 18100
},
{
"epoch": 0.48034934497816595,
"grad_norm": 0.53125,
"learning_rate": 0.00029826325173997424,
"loss": 0.149,
"step": 18150
},
{
"epoch": 0.4816726214106127,
"grad_norm": 0.47265625,
"learning_rate": 0.0002982432755264759,
"loss": 0.1431,
"step": 18200
},
{
"epoch": 0.4829958978430594,
"grad_norm": 0.515625,
"learning_rate": 0.00029822318576163174,
"loss": 0.1417,
"step": 18250
},
{
"epoch": 0.48431917427550614,
"grad_norm": 0.546875,
"learning_rate": 0.00029820298246083,
"loss": 0.1555,
"step": 18300
},
{
"epoch": 0.48564245070795287,
"grad_norm": 0.52734375,
"learning_rate": 0.0002981826656395461,
"loss": 0.1489,
"step": 18350
},
{
"epoch": 0.48696572714039965,
"grad_norm": 0.5390625,
"learning_rate": 0.00029816223531334236,
"loss": 0.1438,
"step": 18400
},
{
"epoch": 0.4882890035728464,
"grad_norm": 0.5703125,
"learning_rate": 0.0002981416914978679,
"loss": 0.1751,
"step": 18450
},
{
"epoch": 0.4896122800052931,
"grad_norm": 0.5234375,
"learning_rate": 0.0002981210342088588,
"loss": 0.1499,
"step": 18500
},
{
"epoch": 0.49093555643773984,
"grad_norm": 0.55859375,
"learning_rate": 0.00029810026346213826,
"loss": 0.1436,
"step": 18550
},
{
"epoch": 0.49225883287018657,
"grad_norm": 0.5703125,
"learning_rate": 0.00029807937927361623,
"loss": 0.1462,
"step": 18600
},
{
"epoch": 0.4935821093026333,
"grad_norm": 0.546875,
"learning_rate": 0.00029805838165928954,
"loss": 0.1518,
"step": 18650
},
{
"epoch": 0.4949053857350801,
"grad_norm": 0.4140625,
"learning_rate": 0.00029803727063524194,
"loss": 0.1433,
"step": 18700
},
{
"epoch": 0.4962286621675268,
"grad_norm": 0.466796875,
"learning_rate": 0.000298016046217644,
"loss": 0.1457,
"step": 18750
},
{
"epoch": 0.49755193859997354,
"grad_norm": 0.55078125,
"learning_rate": 0.0002979947084227533,
"loss": 0.1422,
"step": 18800
},
{
"epoch": 0.49887521503242027,
"grad_norm": 0.7734375,
"learning_rate": 0.00029797325726691403,
"loss": 0.1656,
"step": 18850
},
{
"epoch": 0.5001984914648671,
"grad_norm": 0.46875,
"learning_rate": 0.0002979516927665575,
"loss": 0.1438,
"step": 18900
},
{
"epoch": 0.5015217678973137,
"grad_norm": 0.44140625,
"learning_rate": 0.00029793001493820157,
"loss": 0.1491,
"step": 18950
},
{
"epoch": 0.5028450443297605,
"grad_norm": 0.515625,
"learning_rate": 0.000297908223798451,
"loss": 0.163,
"step": 19000
},
{
"epoch": 0.5041683207622072,
"grad_norm": 0.416015625,
"learning_rate": 0.0002978863193639975,
"loss": 0.152,
"step": 19050
},
{
"epoch": 0.505491597194654,
"grad_norm": 0.46875,
"learning_rate": 0.00029786430165161927,
"loss": 0.1483,
"step": 19100
},
{
"epoch": 0.5068148736271006,
"grad_norm": 0.50390625,
"learning_rate": 0.0002978421706781815,
"loss": 0.156,
"step": 19150
},
{
"epoch": 0.5081381500595474,
"grad_norm": 0.46875,
"learning_rate": 0.000297819926460636,
"loss": 0.1438,
"step": 19200
},
{
"epoch": 0.5094614264919942,
"grad_norm": 0.58203125,
"learning_rate": 0.0002977975690160215,
"loss": 0.1462,
"step": 19250
},
{
"epoch": 0.5107847029244409,
"grad_norm": 0.50390625,
"learning_rate": 0.0002977750983614633,
"loss": 0.16,
"step": 19300
},
{
"epoch": 0.5121079793568877,
"grad_norm": 0.484375,
"learning_rate": 0.00029775251451417343,
"loss": 0.1568,
"step": 19350
},
{
"epoch": 0.5134312557893344,
"grad_norm": 0.51171875,
"learning_rate": 0.0002977298174914507,
"loss": 0.161,
"step": 19400
},
{
"epoch": 0.5147545322217811,
"grad_norm": 1.2421875,
"learning_rate": 0.0002977070073106806,
"loss": 0.1666,
"step": 19450
},
{
"epoch": 0.5160778086542279,
"grad_norm": 0.4375,
"learning_rate": 0.0002976840839893352,
"loss": 0.1483,
"step": 19500
},
{
"epoch": 0.5160778086542279,
"eval_cer": 0.04413408134957405,
"eval_loss": 0.22323232889175415,
"eval_runtime": 111.5308,
"eval_samples_per_second": 22.675,
"eval_steps_per_second": 0.179,
"eval_wer": 0.11942094515752626,
"step": 19500
},
{
"epoch": 0.5174010850866746,
"grad_norm": 0.5546875,
"learning_rate": 0.00029766104754497334,
"loss": 0.1511,
"step": 19550
},
{
"epoch": 0.5187243615191214,
"grad_norm": 0.51171875,
"learning_rate": 0.0002976378979952404,
"loss": 0.1402,
"step": 19600
},
{
"epoch": 0.520047637951568,
"grad_norm": 0.546875,
"learning_rate": 0.00029761463535786856,
"loss": 0.1436,
"step": 19650
},
{
"epoch": 0.5213709143840148,
"grad_norm": 0.48828125,
"learning_rate": 0.00029759125965067655,
"loss": 0.152,
"step": 19700
},
{
"epoch": 0.5226941908164615,
"grad_norm": 0.453125,
"learning_rate": 0.00029756777089156954,
"loss": 0.1526,
"step": 19750
},
{
"epoch": 0.5240174672489083,
"grad_norm": 0.470703125,
"learning_rate": 0.0002975441690985396,
"loss": 0.1408,
"step": 19800
},
{
"epoch": 0.5253407436813551,
"grad_norm": 0.59375,
"learning_rate": 0.0002975204542896651,
"loss": 0.1391,
"step": 19850
},
{
"epoch": 0.5266640201138018,
"grad_norm": 0.53125,
"learning_rate": 0.0002974966264831112,
"loss": 0.1611,
"step": 19900
},
{
"epoch": 0.5279872965462485,
"grad_norm": 0.5546875,
"learning_rate": 0.00029747268569712946,
"loss": 0.1466,
"step": 19950
},
{
"epoch": 0.5293105729786952,
"grad_norm": 0.47265625,
"learning_rate": 0.00029744863195005805,
"loss": 0.1471,
"step": 20000
},
{
"epoch": 0.530633849411142,
"grad_norm": 0.5390625,
"learning_rate": 0.00029742446526032166,
"loss": 0.1517,
"step": 20050
},
{
"epoch": 0.5319571258435887,
"grad_norm": 0.5078125,
"learning_rate": 0.00029740018564643157,
"loss": 0.1489,
"step": 20100
},
{
"epoch": 0.5332804022760355,
"grad_norm": 0.48046875,
"learning_rate": 0.00029737579312698526,
"loss": 0.1572,
"step": 20150
},
{
"epoch": 0.5346036787084822,
"grad_norm": 0.474609375,
"learning_rate": 0.0002973512877206671,
"loss": 0.143,
"step": 20200
},
{
"epoch": 0.5359269551409289,
"grad_norm": 0.498046875,
"learning_rate": 0.00029732666944624767,
"loss": 0.1455,
"step": 20250
},
{
"epoch": 0.5372502315733757,
"grad_norm": 0.462890625,
"learning_rate": 0.00029730193832258405,
"loss": 0.1581,
"step": 20300
},
{
"epoch": 0.5385735080058224,
"grad_norm": 0.546875,
"learning_rate": 0.0002972770943686198,
"loss": 0.1361,
"step": 20350
},
{
"epoch": 0.5398967844382692,
"grad_norm": 0.53125,
"learning_rate": 0.00029725213760338487,
"loss": 0.1553,
"step": 20400
},
{
"epoch": 0.541220060870716,
"grad_norm": 0.4765625,
"learning_rate": 0.00029722706804599566,
"loss": 0.1556,
"step": 20450
},
{
"epoch": 0.5425433373031626,
"grad_norm": 0.498046875,
"learning_rate": 0.00029720188571565493,
"loss": 0.1612,
"step": 20500
},
{
"epoch": 0.5438666137356094,
"grad_norm": 0.5,
"learning_rate": 0.00029717659063165184,
"loss": 0.1431,
"step": 20550
},
{
"epoch": 0.5451898901680561,
"grad_norm": 0.515625,
"learning_rate": 0.0002971511828133619,
"loss": 0.1526,
"step": 20600
},
{
"epoch": 0.5465131666005029,
"grad_norm": 0.4375,
"learning_rate": 0.00029712566228024695,
"loss": 0.1467,
"step": 20650
},
{
"epoch": 0.5478364430329495,
"grad_norm": 0.474609375,
"learning_rate": 0.00029710002905185533,
"loss": 0.1445,
"step": 20700
},
{
"epoch": 0.5491597194653963,
"grad_norm": 0.5703125,
"learning_rate": 0.00029707428314782147,
"loss": 0.1477,
"step": 20750
},
{
"epoch": 0.5504829958978431,
"grad_norm": 0.49609375,
"learning_rate": 0.0002970484245878662,
"loss": 0.155,
"step": 20800
},
{
"epoch": 0.5518062723302898,
"grad_norm": 0.515625,
"learning_rate": 0.00029702245339179663,
"loss": 0.146,
"step": 20850
},
{
"epoch": 0.5531295487627366,
"grad_norm": 0.5625,
"learning_rate": 0.00029699636957950627,
"loss": 0.1406,
"step": 20900
},
{
"epoch": 0.5544528251951832,
"grad_norm": 0.72265625,
"learning_rate": 0.00029697017317097476,
"loss": 0.1482,
"step": 20950
},
{
"epoch": 0.55577610162763,
"grad_norm": 0.416015625,
"learning_rate": 0.000296943864186268,
"loss": 0.15,
"step": 21000
},
{
"epoch": 0.55577610162763,
"eval_cer": 0.07418389294157948,
"eval_loss": 0.21479840576648712,
"eval_runtime": 175.2228,
"eval_samples_per_second": 14.433,
"eval_steps_per_second": 0.114,
"eval_wer": 0.1202960910151692,
"step": 21000
},
{
"epoch": 0.5570993780600767,
"grad_norm": 0.453125,
"learning_rate": 0.0002969174426455381,
"loss": 0.1374,
"step": 21050
},
{
"epoch": 0.5584226544925235,
"grad_norm": 0.50390625,
"learning_rate": 0.0002968909085690235,
"loss": 0.1431,
"step": 21100
},
{
"epoch": 0.5597459309249703,
"grad_norm": 0.484375,
"learning_rate": 0.00029686426197704876,
"loss": 0.1466,
"step": 21150
},
{
"epoch": 0.5610692073574169,
"grad_norm": 0.466796875,
"learning_rate": 0.00029683750289002454,
"loss": 0.1447,
"step": 21200
},
{
"epoch": 0.5623924837898637,
"grad_norm": 0.478515625,
"learning_rate": 0.00029681063132844785,
"loss": 0.1361,
"step": 21250
},
{
"epoch": 0.5637157602223104,
"grad_norm": 0.484375,
"learning_rate": 0.00029678364731290175,
"loss": 0.143,
"step": 21300
},
{
"epoch": 0.5650390366547572,
"grad_norm": 0.515625,
"learning_rate": 0.0002967565508640554,
"loss": 0.1401,
"step": 21350
},
{
"epoch": 0.566362313087204,
"grad_norm": 0.44140625,
"learning_rate": 0.00029672934200266417,
"loss": 0.1404,
"step": 21400
},
{
"epoch": 0.5676855895196506,
"grad_norm": 0.53125,
"learning_rate": 0.00029670202074956947,
"loss": 0.1378,
"step": 21450
},
{
"epoch": 0.5690088659520974,
"grad_norm": 0.390625,
"learning_rate": 0.00029667458712569883,
"loss": 0.1569,
"step": 21500
},
{
"epoch": 0.5703321423845441,
"grad_norm": 0.54296875,
"learning_rate": 0.0002966470411520659,
"loss": 0.1547,
"step": 21550
},
{
"epoch": 0.5716554188169909,
"grad_norm": 0.5234375,
"learning_rate": 0.0002966193828497703,
"loss": 0.1372,
"step": 21600
},
{
"epoch": 0.5729786952494376,
"grad_norm": 0.92578125,
"learning_rate": 0.0002965916122399977,
"loss": 0.1566,
"step": 21650
},
{
"epoch": 0.5743019716818843,
"grad_norm": 0.5,
"learning_rate": 0.00029656372934401987,
"loss": 0.1493,
"step": 21700
},
{
"epoch": 0.5756252481143311,
"grad_norm": 0.57421875,
"learning_rate": 0.00029653573418319455,
"loss": 0.139,
"step": 21750
},
{
"epoch": 0.5769485245467778,
"grad_norm": 0.546875,
"learning_rate": 0.0002965076267789654,
"loss": 0.1449,
"step": 21800
},
{
"epoch": 0.5782718009792246,
"grad_norm": 0.54296875,
"learning_rate": 0.00029647940715286225,
"loss": 0.145,
"step": 21850
},
{
"epoch": 0.5795950774116713,
"grad_norm": 0.431640625,
"learning_rate": 0.0002964510753265006,
"loss": 0.1548,
"step": 21900
},
{
"epoch": 0.580918353844118,
"grad_norm": 0.490234375,
"learning_rate": 0.00029642263132158216,
"loss": 0.1496,
"step": 21950
},
{
"epoch": 0.5822416302765647,
"grad_norm": 0.5078125,
"learning_rate": 0.00029639407515989443,
"loss": 0.138,
"step": 22000
},
{
"epoch": 0.5835649067090115,
"grad_norm": 0.41796875,
"learning_rate": 0.0002963654068633109,
"loss": 0.139,
"step": 22050
},
{
"epoch": 0.5848881831414583,
"grad_norm": 0.4453125,
"learning_rate": 0.00029633662645379077,
"loss": 0.143,
"step": 22100
},
{
"epoch": 0.586211459573905,
"grad_norm": 0.546875,
"learning_rate": 0.00029630773395337946,
"loss": 0.154,
"step": 22150
},
{
"epoch": 0.5875347360063518,
"grad_norm": 0.4375,
"learning_rate": 0.0002962787293842078,
"loss": 0.1394,
"step": 22200
},
{
"epoch": 0.5888580124387984,
"grad_norm": 0.56640625,
"learning_rate": 0.0002962496127684929,
"loss": 0.1426,
"step": 22250
},
{
"epoch": 0.5901812888712452,
"grad_norm": 0.7109375,
"learning_rate": 0.0002962203841285374,
"loss": 0.1391,
"step": 22300
},
{
"epoch": 0.5915045653036919,
"grad_norm": 0.498046875,
"learning_rate": 0.00029619104348672985,
"loss": 0.1356,
"step": 22350
},
{
"epoch": 0.5928278417361387,
"grad_norm": 0.4765625,
"learning_rate": 0.0002961615908655446,
"loss": 0.1336,
"step": 22400
},
{
"epoch": 0.5941511181685855,
"grad_norm": 0.50390625,
"learning_rate": 0.0002961320262875418,
"loss": 0.1366,
"step": 22450
},
{
"epoch": 0.5954743946010321,
"grad_norm": 0.48828125,
"learning_rate": 0.00029610234977536727,
"loss": 0.1455,
"step": 22500
},
{
"epoch": 0.5954743946010321,
"eval_cer": 0.036670766155072976,
"eval_loss": 0.22347043454647064,
"eval_runtime": 125.5065,
"eval_samples_per_second": 20.15,
"eval_steps_per_second": 0.159,
"eval_wer": 0.11056009334889148,
"step": 22500
},
{
"epoch": 0.5967976710334789,
"grad_norm": 0.466796875,
"learning_rate": 0.00029607256135175266,
"loss": 0.1498,
"step": 22550
},
{
"epoch": 0.5981209474659256,
"grad_norm": 0.498046875,
"learning_rate": 0.0002960426610395153,
"loss": 0.1375,
"step": 22600
},
{
"epoch": 0.5994442238983724,
"grad_norm": 0.55078125,
"learning_rate": 0.00029601264886155814,
"loss": 0.1413,
"step": 22650
},
{
"epoch": 0.6007675003308192,
"grad_norm": 0.51953125,
"learning_rate": 0.0002959825248408699,
"loss": 0.145,
"step": 22700
},
{
"epoch": 0.6020907767632658,
"grad_norm": 0.466796875,
"learning_rate": 0.0002959522890005251,
"loss": 0.1416,
"step": 22750
},
{
"epoch": 0.6034140531957126,
"grad_norm": 0.46875,
"learning_rate": 0.00029592194136368366,
"loss": 0.134,
"step": 22800
},
{
"epoch": 0.6047373296281593,
"grad_norm": 0.453125,
"learning_rate": 0.00029589148195359135,
"loss": 0.1442,
"step": 22850
},
{
"epoch": 0.6060606060606061,
"grad_norm": 0.46484375,
"learning_rate": 0.0002958609107935794,
"loss": 0.152,
"step": 22900
},
{
"epoch": 0.6073838824930528,
"grad_norm": 0.47265625,
"learning_rate": 0.00029583022790706466,
"loss": 0.1338,
"step": 22950
},
{
"epoch": 0.6087071589254995,
"grad_norm": 0.546875,
"learning_rate": 0.00029579943331754967,
"loss": 0.1446,
"step": 23000
},
{
"epoch": 0.6100304353579463,
"grad_norm": 0.44140625,
"learning_rate": 0.0002957685270486225,
"loss": 0.1435,
"step": 23050
},
{
"epoch": 0.611353711790393,
"grad_norm": 0.439453125,
"learning_rate": 0.0002957375091239566,
"loss": 0.1292,
"step": 23100
},
{
"epoch": 0.6126769882228398,
"grad_norm": 0.53515625,
"learning_rate": 0.00029570637956731114,
"loss": 0.1556,
"step": 23150
},
{
"epoch": 0.6140002646552865,
"grad_norm": 0.5625,
"learning_rate": 0.0002956751384025308,
"loss": 0.1523,
"step": 23200
},
{
"epoch": 0.6153235410877332,
"grad_norm": 0.45703125,
"learning_rate": 0.00029564378565354553,
"loss": 0.1533,
"step": 23250
},
{
"epoch": 0.6166468175201799,
"grad_norm": 0.53125,
"learning_rate": 0.000295612321344371,
"loss": 0.1399,
"step": 23300
},
{
"epoch": 0.6179700939526267,
"grad_norm": 0.48046875,
"learning_rate": 0.00029558074549910823,
"loss": 0.1396,
"step": 23350
},
{
"epoch": 0.6192933703850735,
"grad_norm": 0.46484375,
"learning_rate": 0.00029554905814194366,
"loss": 0.1468,
"step": 23400
},
{
"epoch": 0.6206166468175202,
"grad_norm": 0.6796875,
"learning_rate": 0.00029551725929714915,
"loss": 0.1447,
"step": 23450
},
{
"epoch": 0.6219399232499669,
"grad_norm": 0.515625,
"learning_rate": 0.000295485348989082,
"loss": 0.1527,
"step": 23500
},
{
"epoch": 0.6232631996824136,
"grad_norm": 0.44921875,
"learning_rate": 0.00029545332724218484,
"loss": 0.1515,
"step": 23550
},
{
"epoch": 0.6245864761148604,
"grad_norm": 0.5625,
"learning_rate": 0.00029542119408098563,
"loss": 0.1417,
"step": 23600
},
{
"epoch": 0.6259097525473072,
"grad_norm": 0.46875,
"learning_rate": 0.00029538894953009783,
"loss": 0.1379,
"step": 23650
},
{
"epoch": 0.6272330289797539,
"grad_norm": 0.43359375,
"learning_rate": 0.00029535659361422004,
"loss": 0.1314,
"step": 23700
},
{
"epoch": 0.6285563054122006,
"grad_norm": 0.5,
"learning_rate": 0.00029532412635813627,
"loss": 0.1349,
"step": 23750
},
{
"epoch": 0.6298795818446473,
"grad_norm": 0.396484375,
"learning_rate": 0.00029529154778671577,
"loss": 0.1515,
"step": 23800
},
{
"epoch": 0.6312028582770941,
"grad_norm": 0.443359375,
"learning_rate": 0.000295258857924913,
"loss": 0.1452,
"step": 23850
},
{
"epoch": 0.6325261347095408,
"grad_norm": 0.546875,
"learning_rate": 0.0002952260567977678,
"loss": 0.1431,
"step": 23900
},
{
"epoch": 0.6338494111419876,
"grad_norm": 0.41015625,
"learning_rate": 0.0002951931444304052,
"loss": 0.1405,
"step": 23950
},
{
"epoch": 0.6351726875744343,
"grad_norm": 0.4765625,
"learning_rate": 0.0002951601208480354,
"loss": 0.1367,
"step": 24000
},
{
"epoch": 0.6351726875744343,
"eval_cer": 0.04273576270139439,
"eval_loss": 0.2082057148218155,
"eval_runtime": 120.662,
"eval_samples_per_second": 20.959,
"eval_steps_per_second": 0.166,
"eval_wer": 0.11376896149358226,
"step": 24000
},
{
"epoch": 0.636495964006881,
"grad_norm": 0.58203125,
"learning_rate": 0.0002951269860759537,
"loss": 0.1605,
"step": 24050
},
{
"epoch": 0.6378192404393278,
"grad_norm": 0.470703125,
"learning_rate": 0.0002950937401395408,
"loss": 0.1345,
"step": 24100
},
{
"epoch": 0.6391425168717745,
"grad_norm": 0.435546875,
"learning_rate": 0.0002950603830642622,
"loss": 0.1339,
"step": 24150
},
{
"epoch": 0.6404657933042213,
"grad_norm": 0.404296875,
"learning_rate": 0.00029502691487566903,
"loss": 0.1347,
"step": 24200
},
{
"epoch": 0.6417890697366679,
"grad_norm": 0.5,
"learning_rate": 0.000294993335599397,
"loss": 0.1482,
"step": 24250
},
{
"epoch": 0.6431123461691147,
"grad_norm": 0.453125,
"learning_rate": 0.0002949596452611673,
"loss": 0.1414,
"step": 24300
},
{
"epoch": 0.6444356226015615,
"grad_norm": 0.431640625,
"learning_rate": 0.000294925843886786,
"loss": 0.1357,
"step": 24350
},
{
"epoch": 0.6457588990340082,
"grad_norm": 0.671875,
"learning_rate": 0.0002948919315021443,
"loss": 0.1431,
"step": 24400
},
{
"epoch": 0.647082175466455,
"grad_norm": 0.447265625,
"learning_rate": 0.0002948579081332183,
"loss": 0.131,
"step": 24450
},
{
"epoch": 0.6484054518989016,
"grad_norm": 0.46875,
"learning_rate": 0.0002948237738060693,
"loss": 0.1476,
"step": 24500
},
{
"epoch": 0.6497287283313484,
"grad_norm": 0.4296875,
"learning_rate": 0.00029478952854684344,
"loss": 0.1305,
"step": 24550
},
{
"epoch": 0.6510520047637952,
"grad_norm": 0.46875,
"learning_rate": 0.000294755172381772,
"loss": 0.1459,
"step": 24600
},
{
"epoch": 0.6523752811962419,
"grad_norm": 0.470703125,
"learning_rate": 0.00029472070533717105,
"loss": 0.1484,
"step": 24650
},
{
"epoch": 0.6536985576286887,
"grad_norm": 0.51171875,
"learning_rate": 0.0002946861274394416,
"loss": 0.1533,
"step": 24700
},
{
"epoch": 0.6550218340611353,
"grad_norm": 0.490234375,
"learning_rate": 0.0002946514387150697,
"loss": 0.1465,
"step": 24750
},
{
"epoch": 0.6563451104935821,
"grad_norm": 0.470703125,
"learning_rate": 0.0002946166391906261,
"loss": 0.1529,
"step": 24800
},
{
"epoch": 0.6576683869260288,
"grad_norm": 0.462890625,
"learning_rate": 0.00029458172889276665,
"loss": 0.136,
"step": 24850
},
{
"epoch": 0.6589916633584756,
"grad_norm": 0.498046875,
"learning_rate": 0.00029454670784823193,
"loss": 0.1409,
"step": 24900
},
{
"epoch": 0.6603149397909224,
"grad_norm": 0.51953125,
"learning_rate": 0.0002945115760838472,
"loss": 0.1424,
"step": 24950
},
{
"epoch": 0.661638216223369,
"grad_norm": 0.427734375,
"learning_rate": 0.00029447633362652284,
"loss": 0.1382,
"step": 25000
},
{
"epoch": 0.6629614926558158,
"grad_norm": 0.54296875,
"learning_rate": 0.0002944409805032538,
"loss": 0.1477,
"step": 25050
},
{
"epoch": 0.6642847690882625,
"grad_norm": 0.47265625,
"learning_rate": 0.00029440551674111986,
"loss": 0.1273,
"step": 25100
},
{
"epoch": 0.6656080455207093,
"grad_norm": 0.5,
"learning_rate": 0.00029436994236728555,
"loss": 0.134,
"step": 25150
},
{
"epoch": 0.666931321953156,
"grad_norm": 0.57421875,
"learning_rate": 0.0002943342574090001,
"loss": 0.1328,
"step": 25200
},
{
"epoch": 0.6682545983856027,
"grad_norm": 0.578125,
"learning_rate": 0.00029429846189359745,
"loss": 0.1326,
"step": 25250
},
{
"epoch": 0.6695778748180495,
"grad_norm": 0.54296875,
"learning_rate": 0.00029426255584849624,
"loss": 0.141,
"step": 25300
},
{
"epoch": 0.6709011512504962,
"grad_norm": 0.4609375,
"learning_rate": 0.00029422653930119986,
"loss": 0.1269,
"step": 25350
},
{
"epoch": 0.672224427682943,
"grad_norm": 0.4609375,
"learning_rate": 0.0002941904122792962,
"loss": 0.1463,
"step": 25400
},
{
"epoch": 0.6735477041153897,
"grad_norm": 0.58203125,
"learning_rate": 0.00029415417481045775,
"loss": 0.1468,
"step": 25450
},
{
"epoch": 0.6748709805478365,
"grad_norm": 0.470703125,
"learning_rate": 0.0002941178269224418,
"loss": 0.1389,
"step": 25500
},
{
"epoch": 0.6748709805478365,
"eval_cer": 0.038456570693712055,
"eval_loss": 0.20660458505153656,
"eval_runtime": 118.1521,
"eval_samples_per_second": 21.405,
"eval_steps_per_second": 0.169,
"eval_wer": 0.10811697782963828,
"step": 25500
},
{
"epoch": 0.6761942569802831,
"grad_norm": 0.5234375,
"learning_rate": 0.00029408136864309003,
"loss": 0.1431,
"step": 25550
},
{
"epoch": 0.6775175334127299,
"grad_norm": 0.443359375,
"learning_rate": 0.00029404480000032876,
"loss": 0.1378,
"step": 25600
},
{
"epoch": 0.6788408098451767,
"grad_norm": 0.4609375,
"learning_rate": 0.00029400812102216886,
"loss": 0.1453,
"step": 25650
},
{
"epoch": 0.6801640862776234,
"grad_norm": 0.423828125,
"learning_rate": 0.00029397133173670556,
"loss": 0.1377,
"step": 25700
},
{
"epoch": 0.6814873627100702,
"grad_norm": 0.4453125,
"learning_rate": 0.0002939344321721189,
"loss": 0.1581,
"step": 25750
},
{
"epoch": 0.6828106391425168,
"grad_norm": 0.51171875,
"learning_rate": 0.000293897422356673,
"loss": 0.1432,
"step": 25800
},
{
"epoch": 0.6841339155749636,
"grad_norm": 0.455078125,
"learning_rate": 0.0002938603023187168,
"loss": 0.134,
"step": 25850
},
{
"epoch": 0.6854571920074104,
"grad_norm": 0.515625,
"learning_rate": 0.0002938230720866834,
"loss": 0.134,
"step": 25900
},
{
"epoch": 0.6867804684398571,
"grad_norm": 0.375,
"learning_rate": 0.0002937857316890904,
"loss": 0.1342,
"step": 25950
},
{
"epoch": 0.6881037448723039,
"grad_norm": 0.431640625,
"learning_rate": 0.00029374828115453984,
"loss": 0.1308,
"step": 26000
},
{
"epoch": 0.6894270213047505,
"grad_norm": 0.46875,
"learning_rate": 0.00029371072051171803,
"loss": 0.1352,
"step": 26050
},
{
"epoch": 0.6907502977371973,
"grad_norm": 0.41015625,
"learning_rate": 0.0002936730497893957,
"loss": 0.1275,
"step": 26100
},
{
"epoch": 0.692073574169644,
"grad_norm": 0.5078125,
"learning_rate": 0.00029363526901642783,
"loss": 0.1436,
"step": 26150
},
{
"epoch": 0.6933968506020908,
"grad_norm": 0.474609375,
"learning_rate": 0.00029359737822175376,
"loss": 0.1345,
"step": 26200
},
{
"epoch": 0.6947201270345376,
"grad_norm": 0.447265625,
"learning_rate": 0.00029355937743439704,
"loss": 0.1342,
"step": 26250
},
{
"epoch": 0.6960434034669842,
"grad_norm": 0.412109375,
"learning_rate": 0.00029352126668346544,
"loss": 0.1356,
"step": 26300
},
{
"epoch": 0.697366679899431,
"grad_norm": 0.46484375,
"learning_rate": 0.0002934830459981511,
"loss": 0.1397,
"step": 26350
},
{
"epoch": 0.6986899563318777,
"grad_norm": 0.46484375,
"learning_rate": 0.00029344471540773036,
"loss": 0.1475,
"step": 26400
},
{
"epoch": 0.7000132327643245,
"grad_norm": 0.45703125,
"learning_rate": 0.0002934062749415635,
"loss": 0.1326,
"step": 26450
},
{
"epoch": 0.7013365091967712,
"grad_norm": 0.41015625,
"learning_rate": 0.00029336772462909533,
"loss": 0.1494,
"step": 26500
},
{
"epoch": 0.7026597856292179,
"grad_norm": 0.51171875,
"learning_rate": 0.0002933290644998544,
"loss": 0.1326,
"step": 26550
},
{
"epoch": 0.7039830620616647,
"grad_norm": 0.45703125,
"learning_rate": 0.0002932902945834538,
"loss": 0.1393,
"step": 26600
},
{
"epoch": 0.7053063384941114,
"grad_norm": 0.609375,
"learning_rate": 0.0002932514149095904,
"loss": 0.1376,
"step": 26650
},
{
"epoch": 0.7066296149265582,
"grad_norm": 0.470703125,
"learning_rate": 0.0002932124255080452,
"loss": 0.1345,
"step": 26700
},
{
"epoch": 0.7079528913590049,
"grad_norm": 0.51953125,
"learning_rate": 0.0002931733264086834,
"loss": 0.1355,
"step": 26750
},
{
"epoch": 0.7092761677914516,
"grad_norm": 0.490234375,
"learning_rate": 0.00029313411764145394,
"loss": 0.1358,
"step": 26800
},
{
"epoch": 0.7105994442238984,
"grad_norm": 0.486328125,
"learning_rate": 0.0002930947992363901,
"loss": 0.133,
"step": 26850
},
{
"epoch": 0.7119227206563451,
"grad_norm": 0.486328125,
"learning_rate": 0.00029305537122360895,
"loss": 0.1293,
"step": 26900
},
{
"epoch": 0.7132459970887919,
"grad_norm": 0.51953125,
"learning_rate": 0.00029301583363331156,
"loss": 0.1333,
"step": 26950
},
{
"epoch": 0.7145692735212386,
"grad_norm": 0.67578125,
"learning_rate": 0.00029297618649578287,
"loss": 0.1349,
"step": 27000
},
{
"epoch": 0.7145692735212386,
"eval_cer": 0.038091547753424194,
"eval_loss": 0.20265592634677887,
"eval_runtime": 122.8325,
"eval_samples_per_second": 20.589,
"eval_steps_per_second": 0.163,
"eval_wer": 0.10917444574095683,
"step": 27000
},
{
"epoch": 0.7158925499536853,
"grad_norm": 0.4921875,
"learning_rate": 0.0002929364298413918,
"loss": 0.1419,
"step": 27050
},
{
"epoch": 0.717215826386132,
"grad_norm": 0.5078125,
"learning_rate": 0.00029289656370059115,
"loss": 0.1369,
"step": 27100
},
{
"epoch": 0.7185391028185788,
"grad_norm": 0.4609375,
"learning_rate": 0.00029285658810391765,
"loss": 0.1345,
"step": 27150
},
{
"epoch": 0.7198623792510256,
"grad_norm": 0.466796875,
"learning_rate": 0.00029281650308199164,
"loss": 0.1357,
"step": 27200
},
{
"epoch": 0.7211856556834723,
"grad_norm": 0.390625,
"learning_rate": 0.00029277630866551753,
"loss": 0.1405,
"step": 27250
},
{
"epoch": 0.722508932115919,
"grad_norm": 0.46484375,
"learning_rate": 0.00029273600488528346,
"loss": 0.1423,
"step": 27300
},
{
"epoch": 0.7238322085483657,
"grad_norm": 0.482421875,
"learning_rate": 0.00029269559177216126,
"loss": 0.1449,
"step": 27350
},
{
"epoch": 0.7251554849808125,
"grad_norm": 0.466796875,
"learning_rate": 0.0002926550693571066,
"loss": 0.143,
"step": 27400
},
{
"epoch": 0.7264787614132592,
"grad_norm": 0.46875,
"learning_rate": 0.0002926144376711587,
"loss": 0.1349,
"step": 27450
},
{
"epoch": 0.727802037845706,
"grad_norm": 0.478515625,
"learning_rate": 0.00029257369674544077,
"loss": 0.1281,
"step": 27500
},
{
"epoch": 0.7291253142781527,
"grad_norm": 0.50390625,
"learning_rate": 0.00029253284661115946,
"loss": 0.1504,
"step": 27550
},
{
"epoch": 0.7304485907105994,
"grad_norm": 0.5234375,
"learning_rate": 0.0002924918872996052,
"loss": 0.1454,
"step": 27600
},
{
"epoch": 0.7317718671430462,
"grad_norm": 0.5625,
"learning_rate": 0.00029245081884215183,
"loss": 0.1291,
"step": 27650
},
{
"epoch": 0.7330951435754929,
"grad_norm": 0.478515625,
"learning_rate": 0.00029240964127025715,
"loss": 0.1334,
"step": 27700
},
{
"epoch": 0.7344184200079397,
"grad_norm": 0.427734375,
"learning_rate": 0.00029236835461546226,
"loss": 0.1351,
"step": 27750
},
{
"epoch": 0.7357416964403865,
"grad_norm": 0.462890625,
"learning_rate": 0.0002923269589093919,
"loss": 0.1372,
"step": 27800
},
{
"epoch": 0.7370649728728331,
"grad_norm": 0.4921875,
"learning_rate": 0.00029228545418375434,
"loss": 0.1306,
"step": 27850
},
{
"epoch": 0.7383882493052799,
"grad_norm": 0.53125,
"learning_rate": 0.0002922438404703414,
"loss": 0.1327,
"step": 27900
},
{
"epoch": 0.7397115257377266,
"grad_norm": 0.486328125,
"learning_rate": 0.0002922021178010283,
"loss": 0.1334,
"step": 27950
},
{
"epoch": 0.7410348021701734,
"grad_norm": 0.46484375,
"learning_rate": 0.0002921602862077738,
"loss": 0.1239,
"step": 28000
},
{
"epoch": 0.74235807860262,
"grad_norm": 0.4765625,
"learning_rate": 0.00029211834572262,
"loss": 0.1456,
"step": 28050
},
{
"epoch": 0.7436813550350668,
"grad_norm": 0.50390625,
"learning_rate": 0.0002920762963776924,
"loss": 0.1368,
"step": 28100
},
{
"epoch": 0.7450046314675136,
"grad_norm": 0.447265625,
"learning_rate": 0.00029203413820520015,
"loss": 0.1421,
"step": 28150
},
{
"epoch": 0.7463279078999603,
"grad_norm": 0.474609375,
"learning_rate": 0.0002919918712374354,
"loss": 0.1339,
"step": 28200
},
{
"epoch": 0.7476511843324071,
"grad_norm": 0.4375,
"learning_rate": 0.0002919494955067738,
"loss": 0.1368,
"step": 28250
},
{
"epoch": 0.7489744607648537,
"grad_norm": 0.7734375,
"learning_rate": 0.00029190701104567435,
"loss": 0.137,
"step": 28300
},
{
"epoch": 0.7502977371973005,
"grad_norm": 0.49609375,
"learning_rate": 0.0002918644178866793,
"loss": 0.1391,
"step": 28350
},
{
"epoch": 0.7516210136297472,
"grad_norm": 0.5,
"learning_rate": 0.00029182171606241406,
"loss": 0.131,
"step": 28400
},
{
"epoch": 0.752944290062194,
"grad_norm": 0.44921875,
"learning_rate": 0.00029177890560558745,
"loss": 0.1301,
"step": 28450
},
{
"epoch": 0.7542675664946408,
"grad_norm": 0.39453125,
"learning_rate": 0.00029173598654899137,
"loss": 0.131,
"step": 28500
},
{
"epoch": 0.7542675664946408,
"eval_cer": 0.04311201711676803,
"eval_loss": 0.20271959900856018,
"eval_runtime": 112.6571,
"eval_samples_per_second": 22.449,
"eval_steps_per_second": 0.178,
"eval_wer": 0.11314906651108518,
"step": 28500
},
{
"epoch": 0.7555908429270874,
"grad_norm": 0.412109375,
"learning_rate": 0.000291692958925501,
"loss": 0.1268,
"step": 28550
},
{
"epoch": 0.7569141193595342,
"grad_norm": 0.431640625,
"learning_rate": 0.0002916498227680745,
"loss": 0.1309,
"step": 28600
},
{
"epoch": 0.7582373957919809,
"grad_norm": 0.4921875,
"learning_rate": 0.0002916065781097534,
"loss": 0.1373,
"step": 28650
},
{
"epoch": 0.7595606722244277,
"grad_norm": 0.42578125,
"learning_rate": 0.00029156322498366223,
"loss": 0.1306,
"step": 28700
},
{
"epoch": 0.7608839486568745,
"grad_norm": 0.49609375,
"learning_rate": 0.0002915197634230086,
"loss": 0.1433,
"step": 28750
},
{
"epoch": 0.7622072250893211,
"grad_norm": 0.39453125,
"learning_rate": 0.00029147619346108313,
"loss": 0.1342,
"step": 28800
},
{
"epoch": 0.7635305015217679,
"grad_norm": 0.54296875,
"learning_rate": 0.00029143251513125964,
"loss": 0.1373,
"step": 28850
},
{
"epoch": 0.7648537779542146,
"grad_norm": 0.51953125,
"learning_rate": 0.0002913887284669947,
"loss": 0.1289,
"step": 28900
},
{
"epoch": 0.7661770543866614,
"grad_norm": 0.5078125,
"learning_rate": 0.0002913448335018281,
"loss": 0.136,
"step": 28950
},
{
"epoch": 0.7675003308191081,
"grad_norm": 0.48046875,
"learning_rate": 0.0002913008302693825,
"loss": 0.136,
"step": 29000
},
{
"epoch": 0.7688236072515549,
"grad_norm": 0.451171875,
"learning_rate": 0.00029125671880336345,
"loss": 0.1337,
"step": 29050
},
{
"epoch": 0.7701468836840016,
"grad_norm": 0.408203125,
"learning_rate": 0.00029121249913755947,
"loss": 0.1303,
"step": 29100
},
{
"epoch": 0.7714701601164483,
"grad_norm": 0.453125,
"learning_rate": 0.00029116817130584187,
"loss": 0.1353,
"step": 29150
},
{
"epoch": 0.7727934365488951,
"grad_norm": 0.494140625,
"learning_rate": 0.00029112373534216494,
"loss": 0.1254,
"step": 29200
},
{
"epoch": 0.7741167129813418,
"grad_norm": 0.54296875,
"learning_rate": 0.0002910791912805657,
"loss": 0.1278,
"step": 29250
},
{
"epoch": 0.7754399894137886,
"grad_norm": 0.439453125,
"learning_rate": 0.00029103453915516395,
"loss": 0.1466,
"step": 29300
},
{
"epoch": 0.7767632658462352,
"grad_norm": 1.109375,
"learning_rate": 0.00029098977900016235,
"loss": 0.1372,
"step": 29350
},
{
"epoch": 0.778086542278682,
"grad_norm": 0.478515625,
"learning_rate": 0.00029094491084984635,
"loss": 0.1476,
"step": 29400
},
{
"epoch": 0.7794098187111288,
"grad_norm": 0.50390625,
"learning_rate": 0.00029089993473858396,
"loss": 0.1412,
"step": 29450
},
{
"epoch": 0.7807330951435755,
"grad_norm": 0.39453125,
"learning_rate": 0.000290854850700826,
"loss": 0.1296,
"step": 29500
},
{
"epoch": 0.7820563715760223,
"grad_norm": 0.447265625,
"learning_rate": 0.00029080965877110586,
"loss": 0.1395,
"step": 29550
},
{
"epoch": 0.7833796480084689,
"grad_norm": 0.5234375,
"learning_rate": 0.0002907643589840397,
"loss": 0.1372,
"step": 29600
},
{
"epoch": 0.7847029244409157,
"grad_norm": 0.51171875,
"learning_rate": 0.0002907189513743263,
"loss": 0.1438,
"step": 29650
},
{
"epoch": 0.7860262008733624,
"grad_norm": 0.80078125,
"learning_rate": 0.00029067343597674686,
"loss": 0.1471,
"step": 29700
},
{
"epoch": 0.7873494773058092,
"grad_norm": 0.79296875,
"learning_rate": 0.00029062781282616527,
"loss": 0.1402,
"step": 29750
},
{
"epoch": 0.788672753738256,
"grad_norm": 0.490234375,
"learning_rate": 0.00029058208195752806,
"loss": 0.1309,
"step": 29800
},
{
"epoch": 0.7899960301707026,
"grad_norm": 0.37890625,
"learning_rate": 0.00029053624340586397,
"loss": 0.1306,
"step": 29850
},
{
"epoch": 0.7913193066031494,
"grad_norm": 0.5390625,
"learning_rate": 0.00029049029720628456,
"loss": 0.1362,
"step": 29900
},
{
"epoch": 0.7926425830355961,
"grad_norm": 0.419921875,
"learning_rate": 0.00029044424339398357,
"loss": 0.1369,
"step": 29950
},
{
"epoch": 0.7939658594680429,
"grad_norm": 0.41015625,
"learning_rate": 0.00029039808200423736,
"loss": 0.1336,
"step": 30000
},
{
"epoch": 0.7939658594680429,
"eval_cer": 0.037906228414508816,
"eval_loss": 0.2005736529827118,
"eval_runtime": 140.39,
"eval_samples_per_second": 18.014,
"eval_steps_per_second": 0.142,
"eval_wer": 0.11282088681446908,
"step": 30000
},
{
"epoch": 0.6094098560129791,
"grad_norm": 0.7734375,
"learning_rate": 0.00029578268099337873,
"loss": 0.3185,
"step": 30050
},
{
"epoch": 0.610423849117826,
"grad_norm": 0.57421875,
"learning_rate": 0.0002957589619350814,
"loss": 0.2563,
"step": 30100
},
{
"epoch": 0.6114378422226728,
"grad_norm": 0.76953125,
"learning_rate": 0.00029573517731969906,
"loss": 0.223,
"step": 30150
},
{
"epoch": 0.6124518353275198,
"grad_norm": 0.59375,
"learning_rate": 0.0002957113271579292,
"loss": 0.2166,
"step": 30200
},
{
"epoch": 0.6134658284323666,
"grad_norm": 0.65234375,
"learning_rate": 0.00029568741146049875,
"loss": 0.2127,
"step": 30250
},
{
"epoch": 0.6144798215372136,
"grad_norm": 0.6484375,
"learning_rate": 0.0002956634302381641,
"loss": 0.2099,
"step": 30300
},
{
"epoch": 0.6154938146420604,
"grad_norm": 0.6015625,
"learning_rate": 0.0002956393835017111,
"loss": 0.197,
"step": 30350
},
{
"epoch": 0.6165078077469073,
"grad_norm": 0.71875,
"learning_rate": 0.0002956152712619552,
"loss": 0.2084,
"step": 30400
},
{
"epoch": 0.6175218008517542,
"grad_norm": 0.53125,
"learning_rate": 0.00029559109352974113,
"loss": 0.1977,
"step": 30450
},
{
"epoch": 0.6185357939566011,
"grad_norm": 0.55078125,
"learning_rate": 0.0002955668503159432,
"loss": 0.2103,
"step": 30500
},
{
"epoch": 0.619549787061448,
"grad_norm": 0.60546875,
"learning_rate": 0.00029554254163146503,
"loss": 0.2018,
"step": 30550
},
{
"epoch": 0.6205637801662949,
"grad_norm": 0.6953125,
"learning_rate": 0.00029551816748723994,
"loss": 0.2006,
"step": 30600
},
{
"epoch": 0.6215777732711417,
"grad_norm": 0.61328125,
"learning_rate": 0.00029549372789423043,
"loss": 0.1851,
"step": 30650
},
{
"epoch": 0.6225917663759887,
"grad_norm": 0.578125,
"learning_rate": 0.00029546922286342853,
"loss": 0.2051,
"step": 30700
},
{
"epoch": 0.6236057594808355,
"grad_norm": 0.671875,
"learning_rate": 0.00029544465240585575,
"loss": 0.2019,
"step": 30750
},
{
"epoch": 0.6246197525856824,
"grad_norm": 0.60546875,
"learning_rate": 0.000295420016532563,
"loss": 0.1788,
"step": 30800
},
{
"epoch": 0.6256337456905293,
"grad_norm": 0.65234375,
"learning_rate": 0.00029539531525463056,
"loss": 0.1929,
"step": 30850
},
{
"epoch": 0.6266477387953762,
"grad_norm": 0.625,
"learning_rate": 0.0002953705485831682,
"loss": 0.1908,
"step": 30900
},
{
"epoch": 0.6276617319002231,
"grad_norm": 0.62109375,
"learning_rate": 0.0002953457165293151,
"loss": 0.1935,
"step": 30950
},
{
"epoch": 0.62867572500507,
"grad_norm": 0.61328125,
"learning_rate": 0.0002953208191042397,
"loss": 0.1895,
"step": 31000
},
{
"epoch": 0.6296897181099168,
"grad_norm": 0.5546875,
"learning_rate": 0.00029529585631914005,
"loss": 0.1893,
"step": 31050
},
{
"epoch": 0.6307037112147638,
"grad_norm": 0.63671875,
"learning_rate": 0.0002952708281852435,
"loss": 0.1987,
"step": 31100
},
{
"epoch": 0.6317177043196106,
"grad_norm": 0.5546875,
"learning_rate": 0.0002952457347138068,
"loss": 0.1742,
"step": 31150
},
{
"epoch": 0.6327316974244576,
"grad_norm": 0.6796875,
"learning_rate": 0.000295220575916116,
"loss": 0.1945,
"step": 31200
},
{
"epoch": 0.6337456905293044,
"grad_norm": 0.640625,
"learning_rate": 0.00029519535180348673,
"loss": 0.1767,
"step": 31250
},
{
"epoch": 0.6347596836341513,
"grad_norm": 0.59765625,
"learning_rate": 0.00029517006238726385,
"loss": 0.1791,
"step": 31300
},
{
"epoch": 0.6357736767389982,
"grad_norm": 0.64453125,
"learning_rate": 0.00029514470767882153,
"loss": 0.1845,
"step": 31350
},
{
"epoch": 0.636787669843845,
"grad_norm": 0.57421875,
"learning_rate": 0.0002951192876895635,
"loss": 0.1796,
"step": 31400
},
{
"epoch": 0.6378016629486919,
"grad_norm": 0.5234375,
"learning_rate": 0.0002950938024309227,
"loss": 0.1785,
"step": 31450
},
{
"epoch": 0.6388156560535388,
"grad_norm": 0.5859375,
"learning_rate": 0.0002950682519143615,
"loss": 0.1928,
"step": 31500
},
{
"epoch": 0.6388156560535388,
"eval_cer": 0.03631643837754322,
"eval_loss": 0.20944587886333466,
"eval_runtime": 136.9355,
"eval_samples_per_second": 20.506,
"eval_steps_per_second": 0.161,
"eval_wer": 0.11327781163247343,
"step": 31500
},
{
"epoch": 0.6398296491583857,
"grad_norm": 0.51171875,
"learning_rate": 0.00029504263615137157,
"loss": 0.1756,
"step": 31550
},
{
"epoch": 0.6408436422632326,
"grad_norm": 0.5234375,
"learning_rate": 0.00029501695515347394,
"loss": 0.1728,
"step": 31600
},
{
"epoch": 0.6418576353680795,
"grad_norm": 0.58984375,
"learning_rate": 0.000294991208932219,
"loss": 0.1881,
"step": 31650
},
{
"epoch": 0.6428716284729263,
"grad_norm": 0.5859375,
"learning_rate": 0.0002949653974991865,
"loss": 0.1809,
"step": 31700
},
{
"epoch": 0.6438856215777733,
"grad_norm": 0.58203125,
"learning_rate": 0.00029493952086598546,
"loss": 0.1769,
"step": 31750
},
{
"epoch": 0.6448996146826201,
"grad_norm": 0.68359375,
"learning_rate": 0.0002949135790442542,
"loss": 0.1814,
"step": 31800
},
{
"epoch": 0.6459136077874671,
"grad_norm": 0.63671875,
"learning_rate": 0.0002948875720456605,
"loss": 0.178,
"step": 31850
},
{
"epoch": 0.6469276008923139,
"grad_norm": 0.5390625,
"learning_rate": 0.0002948614998819013,
"loss": 0.1779,
"step": 31900
},
{
"epoch": 0.6479415939971608,
"grad_norm": 0.51171875,
"learning_rate": 0.0002948353625647029,
"loss": 0.1719,
"step": 31950
},
{
"epoch": 0.6489555871020077,
"grad_norm": 0.67578125,
"learning_rate": 0.000294809160105821,
"loss": 0.168,
"step": 32000
},
{
"epoch": 0.6499695802068546,
"grad_norm": 0.53515625,
"learning_rate": 0.0002947828925170404,
"loss": 0.1754,
"step": 32050
},
{
"epoch": 0.6509835733117015,
"grad_norm": 0.5859375,
"learning_rate": 0.0002947565598101754,
"loss": 0.1804,
"step": 32100
},
{
"epoch": 0.6519975664165484,
"grad_norm": 0.5859375,
"learning_rate": 0.0002947301619970695,
"loss": 0.1624,
"step": 32150
},
{
"epoch": 0.6530115595213952,
"grad_norm": 0.56640625,
"learning_rate": 0.0002947036990895954,
"loss": 0.1862,
"step": 32200
},
{
"epoch": 0.6540255526262422,
"grad_norm": 0.51171875,
"learning_rate": 0.00029467717109965514,
"loss": 0.1796,
"step": 32250
},
{
"epoch": 0.655039545731089,
"grad_norm": 0.65234375,
"learning_rate": 0.00029465057803918014,
"loss": 0.1796,
"step": 32300
},
{
"epoch": 0.6560535388359359,
"grad_norm": 0.5703125,
"learning_rate": 0.000294623919920131,
"loss": 0.1871,
"step": 32350
},
{
"epoch": 0.6570675319407828,
"grad_norm": 0.6015625,
"learning_rate": 0.0002945971967544974,
"loss": 0.1733,
"step": 32400
},
{
"epoch": 0.6580815250456297,
"grad_norm": 0.62109375,
"learning_rate": 0.0002945704085542987,
"loss": 0.1643,
"step": 32450
},
{
"epoch": 0.6590955181504766,
"grad_norm": 0.60546875,
"learning_rate": 0.0002945435553315831,
"loss": 0.175,
"step": 32500
},
{
"epoch": 0.6601095112553235,
"grad_norm": 0.5703125,
"learning_rate": 0.0002945166370984282,
"loss": 0.1808,
"step": 32550
},
{
"epoch": 0.6611235043601703,
"grad_norm": 0.625,
"learning_rate": 0.0002944896538669409,
"loss": 0.1634,
"step": 32600
},
{
"epoch": 0.6621374974650173,
"grad_norm": 0.63671875,
"learning_rate": 0.0002944626056492572,
"loss": 0.1746,
"step": 32650
},
{
"epoch": 0.6631514905698641,
"grad_norm": 0.5234375,
"learning_rate": 0.0002944354924575426,
"loss": 0.1633,
"step": 32700
},
{
"epoch": 0.6641654836747111,
"grad_norm": 0.5234375,
"learning_rate": 0.0002944083143039913,
"loss": 0.1617,
"step": 32750
},
{
"epoch": 0.6651794767795579,
"grad_norm": 0.53515625,
"learning_rate": 0.0002943810712008274,
"loss": 0.1649,
"step": 32800
},
{
"epoch": 0.6661934698844048,
"grad_norm": 0.703125,
"learning_rate": 0.0002943537631603036,
"loss": 0.1709,
"step": 32850
},
{
"epoch": 0.6672074629892517,
"grad_norm": 0.58984375,
"learning_rate": 0.0002943263901947022,
"loss": 0.1606,
"step": 32900
},
{
"epoch": 0.6682214560940986,
"grad_norm": 0.58984375,
"learning_rate": 0.00029429895231633453,
"loss": 0.1711,
"step": 32950
},
{
"epoch": 0.6692354491989454,
"grad_norm": 0.65234375,
"learning_rate": 0.0002942714495375411,
"loss": 0.1683,
"step": 33000
},
{
"epoch": 0.6692354491989454,
"eval_cer": 0.03620719253798894,
"eval_loss": 0.19835558533668518,
"eval_runtime": 121.7996,
"eval_samples_per_second": 23.054,
"eval_steps_per_second": 0.181,
"eval_wer": 0.10728553050308078,
"step": 33000
},
{
"epoch": 0.6702494423037924,
"grad_norm": 0.60546875,
"learning_rate": 0.0002942438818706917,
"loss": 0.1757,
"step": 33050
},
{
"epoch": 0.6712634354086392,
"grad_norm": 0.578125,
"learning_rate": 0.0002942162493281853,
"loss": 0.1617,
"step": 33100
},
{
"epoch": 0.6722774285134862,
"grad_norm": 0.70703125,
"learning_rate": 0.00029418855192245,
"loss": 0.171,
"step": 33150
},
{
"epoch": 0.673291421618333,
"grad_norm": 0.59375,
"learning_rate": 0.00029416078966594306,
"loss": 0.1655,
"step": 33200
},
{
"epoch": 0.6743054147231798,
"grad_norm": 0.5625,
"learning_rate": 0.00029413296257115094,
"loss": 0.1581,
"step": 33250
},
{
"epoch": 0.6753194078280268,
"grad_norm": 0.5703125,
"learning_rate": 0.00029410507065058924,
"loss": 0.1796,
"step": 33300
},
{
"epoch": 0.6763334009328736,
"grad_norm": 0.6015625,
"learning_rate": 0.00029407711391680276,
"loss": 0.1639,
"step": 33350
},
{
"epoch": 0.6773473940377206,
"grad_norm": 0.5859375,
"learning_rate": 0.00029404909238236543,
"loss": 0.1702,
"step": 33400
},
{
"epoch": 0.6783613871425674,
"grad_norm": 0.58203125,
"learning_rate": 0.0002940210060598803,
"loss": 0.1678,
"step": 33450
},
{
"epoch": 0.6793753802474143,
"grad_norm": 0.55859375,
"learning_rate": 0.0002939928549619796,
"loss": 0.1799,
"step": 33500
},
{
"epoch": 0.6803893733522612,
"grad_norm": 0.5625,
"learning_rate": 0.0002939646391013246,
"loss": 0.1712,
"step": 33550
},
{
"epoch": 0.6814033664571081,
"grad_norm": 0.59765625,
"learning_rate": 0.00029393635849060587,
"loss": 0.1714,
"step": 33600
},
{
"epoch": 0.6824173595619549,
"grad_norm": 0.5625,
"learning_rate": 0.00029390801314254294,
"loss": 0.1745,
"step": 33650
},
{
"epoch": 0.6834313526668019,
"grad_norm": 0.48828125,
"learning_rate": 0.0002938796030698845,
"loss": 0.1775,
"step": 33700
},
{
"epoch": 0.6844453457716487,
"grad_norm": 0.62890625,
"learning_rate": 0.0002938511282854084,
"loss": 0.1827,
"step": 33750
},
{
"epoch": 0.6854593388764957,
"grad_norm": 0.54296875,
"learning_rate": 0.0002938225888019216,
"loss": 0.1627,
"step": 33800
},
{
"epoch": 0.6864733319813425,
"grad_norm": 0.60546875,
"learning_rate": 0.0002937939846322601,
"loss": 0.1854,
"step": 33850
},
{
"epoch": 0.6874873250861894,
"grad_norm": 0.5,
"learning_rate": 0.00029376531578928895,
"loss": 0.1605,
"step": 33900
},
{
"epoch": 0.6885013181910363,
"grad_norm": 0.55859375,
"learning_rate": 0.0002937365822859024,
"loss": 0.1625,
"step": 33950
},
{
"epoch": 0.6895153112958832,
"grad_norm": 0.59375,
"learning_rate": 0.00029370778413502377,
"loss": 0.1674,
"step": 34000
},
{
"epoch": 0.6905293044007301,
"grad_norm": 0.64453125,
"learning_rate": 0.0002936789213496054,
"loss": 0.1667,
"step": 34050
},
{
"epoch": 0.691543297505577,
"grad_norm": 0.54296875,
"learning_rate": 0.0002936499939426287,
"loss": 0.1615,
"step": 34100
},
{
"epoch": 0.6925572906104238,
"grad_norm": 0.5234375,
"learning_rate": 0.0002936210019271042,
"loss": 0.1538,
"step": 34150
},
{
"epoch": 0.6935712837152708,
"grad_norm": 0.625,
"learning_rate": 0.00029359194531607135,
"loss": 0.1781,
"step": 34200
},
{
"epoch": 0.6945852768201176,
"grad_norm": 0.515625,
"learning_rate": 0.0002935628241225989,
"loss": 0.164,
"step": 34250
},
{
"epoch": 0.6955992699249645,
"grad_norm": 0.5859375,
"learning_rate": 0.00029353363835978453,
"loss": 0.1556,
"step": 34300
},
{
"epoch": 0.6966132630298114,
"grad_norm": 0.5546875,
"learning_rate": 0.0002935043880407548,
"loss": 0.1611,
"step": 34350
},
{
"epoch": 0.6976272561346583,
"grad_norm": 0.5234375,
"learning_rate": 0.00029347507317866546,
"loss": 0.1599,
"step": 34400
},
{
"epoch": 0.6986412492395052,
"grad_norm": 0.51953125,
"learning_rate": 0.0002934456937867013,
"loss": 0.1635,
"step": 34450
},
{
"epoch": 0.6996552423443521,
"grad_norm": 0.6015625,
"learning_rate": 0.00029341624987807614,
"loss": 0.153,
"step": 34500
},
{
"epoch": 0.6996552423443521,
"eval_cer": 0.03746091860144517,
"eval_loss": 0.1974794715642929,
"eval_runtime": 130.0224,
"eval_samples_per_second": 21.596,
"eval_steps_per_second": 0.169,
"eval_wer": 0.11148351276322026,
"step": 34500
},
{
"epoch": 0.7006692354491989,
"grad_norm": 0.57421875,
"learning_rate": 0.0002933867414660327,
"loss": 0.16,
"step": 34550
},
{
"epoch": 0.7016832285540459,
"grad_norm": 0.5859375,
"learning_rate": 0.0002933571685638429,
"loss": 0.1684,
"step": 34600
},
{
"epoch": 0.7026972216588927,
"grad_norm": 0.59375,
"learning_rate": 0.0002933275311848075,
"loss": 0.1581,
"step": 34650
},
{
"epoch": 0.7037112147637397,
"grad_norm": 0.59765625,
"learning_rate": 0.00029329782934225625,
"loss": 0.1547,
"step": 34700
},
{
"epoch": 0.7047252078685865,
"grad_norm": 0.64453125,
"learning_rate": 0.00029326806304954807,
"loss": 0.1759,
"step": 34750
},
{
"epoch": 0.7057392009734333,
"grad_norm": 0.546875,
"learning_rate": 0.0002932382323200707,
"loss": 0.1586,
"step": 34800
},
{
"epoch": 0.7067531940782803,
"grad_norm": 0.45703125,
"learning_rate": 0.00029320833716724095,
"loss": 0.1551,
"step": 34850
},
{
"epoch": 0.7077671871831271,
"grad_norm": 0.5390625,
"learning_rate": 0.0002931783776045045,
"loss": 0.1769,
"step": 34900
},
{
"epoch": 0.708781180287974,
"grad_norm": 0.53125,
"learning_rate": 0.0002931483536453362,
"loss": 0.1638,
"step": 34950
},
{
"epoch": 0.7097951733928209,
"grad_norm": 0.50390625,
"learning_rate": 0.0002931182653032396,
"loss": 0.1739,
"step": 35000
},
{
"epoch": 0.7108091664976678,
"grad_norm": 0.58984375,
"learning_rate": 0.0002930881125917474,
"loss": 0.1675,
"step": 35050
},
{
"epoch": 0.7118231596025147,
"grad_norm": 0.54296875,
"learning_rate": 0.0002930578955244212,
"loss": 0.1586,
"step": 35100
},
{
"epoch": 0.7128371527073616,
"grad_norm": 0.640625,
"learning_rate": 0.00029302761411485154,
"loss": 0.1505,
"step": 35150
},
{
"epoch": 0.7138511458122084,
"grad_norm": 0.53125,
"learning_rate": 0.0002929972683766579,
"loss": 0.1574,
"step": 35200
},
{
"epoch": 0.7148651389170554,
"grad_norm": 0.56640625,
"learning_rate": 0.0002929668583234887,
"loss": 0.156,
"step": 35250
},
{
"epoch": 0.7158791320219022,
"grad_norm": 0.52734375,
"learning_rate": 0.0002929363839690212,
"loss": 0.1539,
"step": 35300
},
{
"epoch": 0.7168931251267492,
"grad_norm": 0.5625,
"learning_rate": 0.0002929058453269618,
"loss": 0.1543,
"step": 35350
},
{
"epoch": 0.717907118231596,
"grad_norm": 0.515625,
"learning_rate": 0.00029287524241104556,
"loss": 0.159,
"step": 35400
},
{
"epoch": 0.7189211113364429,
"grad_norm": 0.55078125,
"learning_rate": 0.0002928445752350366,
"loss": 0.152,
"step": 35450
},
{
"epoch": 0.7199351044412898,
"grad_norm": 0.53125,
"learning_rate": 0.00029281384381272786,
"loss": 0.1588,
"step": 35500
},
{
"epoch": 0.7209490975461367,
"grad_norm": 0.5390625,
"learning_rate": 0.00029278304815794125,
"loss": 0.1541,
"step": 35550
},
{
"epoch": 0.7219630906509835,
"grad_norm": 0.48046875,
"learning_rate": 0.0002927521882845276,
"loss": 0.149,
"step": 35600
},
{
"epoch": 0.7229770837558305,
"grad_norm": 0.5078125,
"learning_rate": 0.0002927212642063664,
"loss": 0.1593,
"step": 35650
},
{
"epoch": 0.7239910768606773,
"grad_norm": 0.5234375,
"learning_rate": 0.0002926902759373663,
"loss": 0.1538,
"step": 35700
},
{
"epoch": 0.7250050699655243,
"grad_norm": 0.6328125,
"learning_rate": 0.0002926592234914647,
"loss": 0.1649,
"step": 35750
},
{
"epoch": 0.7260190630703711,
"grad_norm": 0.5078125,
"learning_rate": 0.00029262810688262777,
"loss": 0.1607,
"step": 35800
},
{
"epoch": 0.727033056175218,
"grad_norm": 0.5546875,
"learning_rate": 0.00029259692612485075,
"loss": 0.1637,
"step": 35850
},
{
"epoch": 0.7280470492800649,
"grad_norm": 0.53125,
"learning_rate": 0.00029256568123215755,
"loss": 0.1569,
"step": 35900
},
{
"epoch": 0.7290610423849118,
"grad_norm": 0.58203125,
"learning_rate": 0.00029253437221860094,
"loss": 0.1652,
"step": 35950
},
{
"epoch": 0.7300750354897587,
"grad_norm": 0.7890625,
"learning_rate": 0.0002925029990982627,
"loss": 0.1693,
"step": 36000
},
{
"epoch": 0.7300750354897587,
"eval_cer": 0.0336737294968969,
"eval_loss": 0.1916583925485611,
"eval_runtime": 128.7475,
"eval_samples_per_second": 21.81,
"eval_steps_per_second": 0.171,
"eval_wer": 0.10501726589477961,
"step": 36000
},
{
"epoch": 0.7310890285946056,
"grad_norm": 0.6328125,
"learning_rate": 0.0002924715618852532,
"loss": 0.1507,
"step": 36050
},
{
"epoch": 0.7321030216994524,
"grad_norm": 0.4921875,
"learning_rate": 0.00029244006059371184,
"loss": 0.1471,
"step": 36100
},
{
"epoch": 0.7331170148042994,
"grad_norm": 0.546875,
"learning_rate": 0.0002924084952378067,
"loss": 0.1509,
"step": 36150
},
{
"epoch": 0.7341310079091462,
"grad_norm": 0.50390625,
"learning_rate": 0.0002923768658317347,
"loss": 0.1499,
"step": 36200
},
{
"epoch": 0.7351450010139932,
"grad_norm": 0.6015625,
"learning_rate": 0.0002923451723897217,
"loss": 0.1729,
"step": 36250
},
{
"epoch": 0.73615899411884,
"grad_norm": 0.54296875,
"learning_rate": 0.0002923134149260222,
"loss": 0.1592,
"step": 36300
},
{
"epoch": 0.7371729872236868,
"grad_norm": 0.478515625,
"learning_rate": 0.0002922815934549195,
"loss": 0.1567,
"step": 36350
},
{
"epoch": 0.7381869803285338,
"grad_norm": 0.53125,
"learning_rate": 0.00029224970799072585,
"loss": 0.1517,
"step": 36400
},
{
"epoch": 0.7392009734333806,
"grad_norm": 0.640625,
"learning_rate": 0.00029221775854778215,
"loss": 0.154,
"step": 36450
},
{
"epoch": 0.7402149665382275,
"grad_norm": 0.5390625,
"learning_rate": 0.00029218574514045805,
"loss": 0.1487,
"step": 36500
},
{
"epoch": 0.7412289596430744,
"grad_norm": 0.8359375,
"learning_rate": 0.00029215366778315204,
"loss": 0.1556,
"step": 36550
},
{
"epoch": 0.7422429527479213,
"grad_norm": 0.5234375,
"learning_rate": 0.00029212152649029136,
"loss": 0.1538,
"step": 36600
},
{
"epoch": 0.7432569458527682,
"grad_norm": 0.51953125,
"learning_rate": 0.00029208932127633197,
"loss": 0.1568,
"step": 36650
},
{
"epoch": 0.7442709389576151,
"grad_norm": 0.5,
"learning_rate": 0.00029205705215575863,
"loss": 0.1575,
"step": 36700
},
{
"epoch": 0.7452849320624619,
"grad_norm": 0.5234375,
"learning_rate": 0.00029202471914308476,
"loss": 0.1544,
"step": 36750
},
{
"epoch": 0.7462989251673089,
"grad_norm": 0.55859375,
"learning_rate": 0.0002919923222528527,
"loss": 0.1535,
"step": 36800
},
{
"epoch": 0.7473129182721557,
"grad_norm": 0.50390625,
"learning_rate": 0.00029195986149963324,
"loss": 0.1521,
"step": 36850
},
{
"epoch": 0.7483269113770027,
"grad_norm": 0.6796875,
"learning_rate": 0.00029192733689802617,
"loss": 0.1489,
"step": 36900
},
{
"epoch": 0.7493409044818495,
"grad_norm": 0.51171875,
"learning_rate": 0.0002918947484626598,
"loss": 0.1578,
"step": 36950
},
{
"epoch": 0.7503548975866964,
"grad_norm": 0.5,
"learning_rate": 0.00029186209620819125,
"loss": 0.1551,
"step": 37000
},
{
"epoch": 0.7513688906915433,
"grad_norm": 0.6640625,
"learning_rate": 0.00029182938014930636,
"loss": 0.1525,
"step": 37050
},
{
"epoch": 0.7523828837963902,
"grad_norm": 0.5546875,
"learning_rate": 0.00029179660030071954,
"loss": 0.1497,
"step": 37100
},
{
"epoch": 0.753396876901237,
"grad_norm": 0.515625,
"learning_rate": 0.000291763756677174,
"loss": 0.1476,
"step": 37150
},
{
"epoch": 0.754410870006084,
"grad_norm": 0.462890625,
"learning_rate": 0.0002917308492934417,
"loss": 0.1577,
"step": 37200
},
{
"epoch": 0.7554248631109308,
"grad_norm": 0.52734375,
"learning_rate": 0.0002916978781643231,
"loss": 0.1644,
"step": 37250
},
{
"epoch": 0.7564388562157778,
"grad_norm": 0.5625,
"learning_rate": 0.0002916648433046474,
"loss": 0.1537,
"step": 37300
},
{
"epoch": 0.7574528493206246,
"grad_norm": 0.5234375,
"learning_rate": 0.00029163174472927254,
"loss": 0.1487,
"step": 37350
},
{
"epoch": 0.7584668424254715,
"grad_norm": 0.51171875,
"learning_rate": 0.00029159858245308496,
"loss": 0.159,
"step": 37400
},
{
"epoch": 0.7594808355303184,
"grad_norm": 0.546875,
"learning_rate": 0.000291565356491,
"loss": 0.1509,
"step": 37450
},
{
"epoch": 0.7604948286351653,
"grad_norm": 0.5625,
"learning_rate": 0.00029153206685796133,
"loss": 0.1477,
"step": 37500
},
{
"epoch": 0.7604948286351653,
"eval_cer": 0.036925093769345614,
"eval_loss": 0.19335635006427765,
"eval_runtime": 124.8684,
"eval_samples_per_second": 22.488,
"eval_steps_per_second": 0.176,
"eval_wer": 0.10775949624212879,
"step": 37500
},
{
"epoch": 0.7615088217400122,
"grad_norm": 0.515625,
"learning_rate": 0.0002914987135689415,
"loss": 0.1557,
"step": 37550
},
{
"epoch": 0.7625228148448591,
"grad_norm": 0.58203125,
"learning_rate": 0.00029146529663894163,
"loss": 0.1481,
"step": 37600
},
{
"epoch": 0.7635368079497059,
"grad_norm": 0.5859375,
"learning_rate": 0.00029143181608299137,
"loss": 0.1521,
"step": 37650
},
{
"epoch": 0.7645508010545529,
"grad_norm": 0.55859375,
"learning_rate": 0.0002913982719161491,
"loss": 0.1447,
"step": 37700
},
{
"epoch": 0.7655647941593997,
"grad_norm": 0.5859375,
"learning_rate": 0.0002913646641535018,
"loss": 0.1512,
"step": 37750
},
{
"epoch": 0.7665787872642466,
"grad_norm": 0.4921875,
"learning_rate": 0.00029133099281016493,
"loss": 0.1573,
"step": 37800
},
{
"epoch": 0.7675927803690935,
"grad_norm": 0.490234375,
"learning_rate": 0.0002912972579012828,
"loss": 0.1612,
"step": 37850
},
{
"epoch": 0.7686067734739404,
"grad_norm": 0.5703125,
"learning_rate": 0.0002912634594420279,
"loss": 0.153,
"step": 37900
},
{
"epoch": 0.7696207665787873,
"grad_norm": 0.51953125,
"learning_rate": 0.00029122959744760175,
"loss": 0.1449,
"step": 37950
},
{
"epoch": 0.7706347596836342,
"grad_norm": 0.58984375,
"learning_rate": 0.00029119567193323416,
"loss": 0.1518,
"step": 38000
},
{
"epoch": 0.771648752788481,
"grad_norm": 0.4921875,
"learning_rate": 0.00029116168291418366,
"loss": 0.1431,
"step": 38050
},
{
"epoch": 0.772662745893328,
"grad_norm": 0.58203125,
"learning_rate": 0.00029112763040573723,
"loss": 0.1513,
"step": 38100
},
{
"epoch": 0.7736767389981748,
"grad_norm": 0.484375,
"learning_rate": 0.0002910935144232104,
"loss": 0.1471,
"step": 38150
},
{
"epoch": 0.7746907321030218,
"grad_norm": 0.470703125,
"learning_rate": 0.0002910593349819473,
"loss": 0.1581,
"step": 38200
},
{
"epoch": 0.7757047252078686,
"grad_norm": 0.546875,
"learning_rate": 0.00029102509209732075,
"loss": 0.1545,
"step": 38250
},
{
"epoch": 0.7767187183127154,
"grad_norm": 0.54296875,
"learning_rate": 0.00029099078578473186,
"loss": 0.156,
"step": 38300
},
{
"epoch": 0.7777327114175624,
"grad_norm": 0.51953125,
"learning_rate": 0.0002909564160596103,
"loss": 0.1599,
"step": 38350
},
{
"epoch": 0.7787467045224092,
"grad_norm": 0.63671875,
"learning_rate": 0.00029092198293741443,
"loss": 0.147,
"step": 38400
},
{
"epoch": 0.7797606976272561,
"grad_norm": 0.52734375,
"learning_rate": 0.00029088748643363093,
"loss": 0.1582,
"step": 38450
},
{
"epoch": 0.780774690732103,
"grad_norm": 0.51953125,
"learning_rate": 0.00029085292656377517,
"loss": 0.1536,
"step": 38500
},
{
"epoch": 0.7817886838369499,
"grad_norm": 0.498046875,
"learning_rate": 0.0002908183033433908,
"loss": 0.1633,
"step": 38550
},
{
"epoch": 0.7828026769417968,
"grad_norm": 0.5390625,
"learning_rate": 0.00029078361678805024,
"loss": 0.1404,
"step": 38600
},
{
"epoch": 0.7838166700466437,
"grad_norm": 0.55078125,
"learning_rate": 0.0002907488669133541,
"loss": 0.1606,
"step": 38650
},
{
"epoch": 0.7848306631514905,
"grad_norm": 0.53125,
"learning_rate": 0.0002907140537349317,
"loss": 0.1494,
"step": 38700
},
{
"epoch": 0.7858446562563375,
"grad_norm": 0.4609375,
"learning_rate": 0.0002906791772684406,
"loss": 0.1413,
"step": 38750
},
{
"epoch": 0.7868586493611843,
"grad_norm": 0.53125,
"learning_rate": 0.0002906442375295672,
"loss": 0.152,
"step": 38800
},
{
"epoch": 0.7878726424660313,
"grad_norm": 0.546875,
"learning_rate": 0.00029060923453402593,
"loss": 0.1441,
"step": 38850
},
{
"epoch": 0.7888866355708781,
"grad_norm": 0.53125,
"learning_rate": 0.00029057416829756,
"loss": 0.1444,
"step": 38900
},
{
"epoch": 0.789900628675725,
"grad_norm": 0.5,
"learning_rate": 0.0002905390388359408,
"loss": 0.1516,
"step": 38950
},
{
"epoch": 0.7909146217805719,
"grad_norm": 0.46484375,
"learning_rate": 0.0002905038461649684,
"loss": 0.1629,
"step": 39000
},
{
"epoch": 0.7909146217805719,
"eval_cer": 0.033736155690927916,
"eval_loss": 0.1899910569190979,
"eval_runtime": 125.9975,
"eval_samples_per_second": 22.286,
"eval_steps_per_second": 0.175,
"eval_wer": 0.10474642832960931,
"step": 39000
},
{
"epoch": 0.7919286148854188,
"grad_norm": 0.45703125,
"learning_rate": 0.0002904685903004711,
"loss": 0.1465,
"step": 39050
},
{
"epoch": 0.7929426079902656,
"grad_norm": 0.54296875,
"learning_rate": 0.0002904332712583058,
"loss": 0.1409,
"step": 39100
},
{
"epoch": 0.7939566010951126,
"grad_norm": 0.546875,
"learning_rate": 0.0002903978890543576,
"loss": 0.1453,
"step": 39150
},
{
"epoch": 0.7949705941999594,
"grad_norm": 0.462890625,
"learning_rate": 0.00029036244370454024,
"loss": 0.1445,
"step": 39200
},
{
"epoch": 0.7959845873048064,
"grad_norm": 0.61328125,
"learning_rate": 0.0002903269352247957,
"loss": 0.1488,
"step": 39250
},
{
"epoch": 0.7969985804096532,
"grad_norm": 0.58984375,
"learning_rate": 0.0002902913636310944,
"loss": 0.1614,
"step": 39300
},
{
"epoch": 0.7980125735145001,
"grad_norm": 0.490234375,
"learning_rate": 0.00029025572893943524,
"loss": 0.1469,
"step": 39350
},
{
"epoch": 0.799026566619347,
"grad_norm": 0.486328125,
"learning_rate": 0.00029022003116584524,
"loss": 0.1457,
"step": 39400
},
{
"epoch": 0.8000405597241939,
"grad_norm": 0.55859375,
"learning_rate": 0.00029018427032638007,
"loss": 0.1575,
"step": 39450
},
{
"epoch": 0.8010545528290408,
"grad_norm": 0.54296875,
"learning_rate": 0.00029014844643712365,
"loss": 0.1588,
"step": 39500
},
{
"epoch": 0.8020685459338877,
"grad_norm": 0.447265625,
"learning_rate": 0.0002901125595141882,
"loss": 0.1606,
"step": 39550
},
{
"epoch": 0.8030825390387345,
"grad_norm": 0.5703125,
"learning_rate": 0.0002900766095737145,
"loss": 0.1488,
"step": 39600
},
{
"epoch": 0.8040965321435815,
"grad_norm": 0.54296875,
"learning_rate": 0.00029004059663187133,
"loss": 0.1525,
"step": 39650
},
{
"epoch": 0.8051105252484283,
"grad_norm": 0.82421875,
"learning_rate": 0.00029000452070485614,
"loss": 0.1462,
"step": 39700
},
{
"epoch": 0.8061245183532751,
"grad_norm": 0.4609375,
"learning_rate": 0.00028996838180889454,
"loss": 0.14,
"step": 39750
},
{
"epoch": 0.8071385114581221,
"grad_norm": 0.61328125,
"learning_rate": 0.0002899321799602405,
"loss": 0.1386,
"step": 39800
},
{
"epoch": 0.8081525045629689,
"grad_norm": 0.494140625,
"learning_rate": 0.00028989591517517634,
"loss": 0.1514,
"step": 39850
},
{
"epoch": 0.8091664976678159,
"grad_norm": 0.6640625,
"learning_rate": 0.00028985958747001253,
"loss": 0.1427,
"step": 39900
},
{
"epoch": 0.8101804907726627,
"grad_norm": 0.4921875,
"learning_rate": 0.000289823196861088,
"loss": 0.1426,
"step": 39950
},
{
"epoch": 0.8111944838775096,
"grad_norm": 0.5,
"learning_rate": 0.00028978674336476996,
"loss": 0.1632,
"step": 40000
},
{
"epoch": 0.8122084769823565,
"grad_norm": 0.5078125,
"learning_rate": 0.0002897502269974538,
"loss": 0.1505,
"step": 40050
},
{
"epoch": 0.8132224700872034,
"grad_norm": 0.515625,
"learning_rate": 0.00028971364777556337,
"loss": 0.1429,
"step": 40100
},
{
"epoch": 0.8142364631920503,
"grad_norm": 0.53515625,
"learning_rate": 0.00028967700571555055,
"loss": 0.1628,
"step": 40150
},
{
"epoch": 0.8152504562968972,
"grad_norm": 0.54296875,
"learning_rate": 0.00028964030083389563,
"loss": 0.1501,
"step": 40200
},
{
"epoch": 0.816264449401744,
"grad_norm": 0.57421875,
"learning_rate": 0.0002896035331471072,
"loss": 0.1397,
"step": 40250
},
{
"epoch": 0.817278442506591,
"grad_norm": 0.5078125,
"learning_rate": 0.000289566702671722,
"loss": 0.1568,
"step": 40300
},
{
"epoch": 0.8182924356114378,
"grad_norm": 0.515625,
"learning_rate": 0.00028952980942430505,
"loss": 0.1536,
"step": 40350
},
{
"epoch": 0.8193064287162848,
"grad_norm": 0.55859375,
"learning_rate": 0.00028949285342144953,
"loss": 0.142,
"step": 40400
},
{
"epoch": 0.8203204218211316,
"grad_norm": 0.462890625,
"learning_rate": 0.00028945583467977694,
"loss": 0.1432,
"step": 40450
},
{
"epoch": 0.8213344149259785,
"grad_norm": 0.5546875,
"learning_rate": 0.000289418753215937,
"loss": 0.1536,
"step": 40500
},
{
"epoch": 0.8213344149259785,
"eval_cer": 0.03131714067222607,
"eval_loss": 0.1901644915342331,
"eval_runtime": 128.9949,
"eval_samples_per_second": 21.768,
"eval_steps_per_second": 0.171,
"eval_wer": 0.10068386485205498,
"step": 40500
},
{
"epoch": 0.8223484080308254,
"grad_norm": 0.51953125,
"learning_rate": 0.00028938160904660754,
"loss": 0.1552,
"step": 40550
},
{
"epoch": 0.8233624011356723,
"grad_norm": 0.5625,
"learning_rate": 0.0002893444021884948,
"loss": 0.1527,
"step": 40600
},
{
"epoch": 0.8243763942405191,
"grad_norm": 0.53515625,
"learning_rate": 0.00028930713265833287,
"loss": 0.1427,
"step": 40650
},
{
"epoch": 0.8253903873453661,
"grad_norm": 0.53125,
"learning_rate": 0.0002892698004728843,
"loss": 0.1537,
"step": 40700
},
{
"epoch": 0.8264043804502129,
"grad_norm": 0.5078125,
"learning_rate": 0.0002892324056489399,
"loss": 0.1366,
"step": 40750
},
{
"epoch": 0.8274183735550599,
"grad_norm": 0.59765625,
"learning_rate": 0.0002891949482033183,
"loss": 0.155,
"step": 40800
},
{
"epoch": 0.8284323666599067,
"grad_norm": 0.57421875,
"learning_rate": 0.00028915742815286663,
"loss": 0.1468,
"step": 40850
},
{
"epoch": 0.8294463597647536,
"grad_norm": 0.56640625,
"learning_rate": 0.00028911984551445994,
"loss": 0.1511,
"step": 40900
},
{
"epoch": 0.8304603528696005,
"grad_norm": 0.53515625,
"learning_rate": 0.00028908220030500167,
"loss": 0.1611,
"step": 40950
},
{
"epoch": 0.8314743459744474,
"grad_norm": 0.5703125,
"learning_rate": 0.00028904449254142314,
"loss": 0.1406,
"step": 41000
},
{
"epoch": 0.8324883390792943,
"grad_norm": 0.49609375,
"learning_rate": 0.000289006722240684,
"loss": 0.1424,
"step": 41050
},
{
"epoch": 0.8335023321841412,
"grad_norm": 0.50390625,
"learning_rate": 0.00028896888941977195,
"loss": 0.1418,
"step": 41100
},
{
"epoch": 0.834516325288988,
"grad_norm": 0.404296875,
"learning_rate": 0.00028893099409570274,
"loss": 0.1542,
"step": 41150
},
{
"epoch": 0.835530318393835,
"grad_norm": 0.494140625,
"learning_rate": 0.00028889303628552046,
"loss": 0.1377,
"step": 41200
},
{
"epoch": 0.8365443114986818,
"grad_norm": 0.5390625,
"learning_rate": 0.00028885501600629706,
"loss": 0.1413,
"step": 41250
},
{
"epoch": 0.8375583046035286,
"grad_norm": 0.50390625,
"learning_rate": 0.00028881693327513264,
"loss": 0.1439,
"step": 41300
},
{
"epoch": 0.8385722977083756,
"grad_norm": 0.50390625,
"learning_rate": 0.0002887787881091555,
"loss": 0.1466,
"step": 41350
},
{
"epoch": 0.8395862908132224,
"grad_norm": 0.58203125,
"learning_rate": 0.000288740580525522,
"loss": 0.1419,
"step": 41400
},
{
"epoch": 0.8406002839180694,
"grad_norm": 0.46484375,
"learning_rate": 0.0002887023105414164,
"loss": 0.1413,
"step": 41450
},
{
"epoch": 0.8416142770229162,
"grad_norm": 0.578125,
"learning_rate": 0.0002886639781740512,
"loss": 0.1376,
"step": 41500
},
{
"epoch": 0.8426282701277631,
"grad_norm": 0.51953125,
"learning_rate": 0.0002886255834406669,
"loss": 0.1384,
"step": 41550
},
{
"epoch": 0.84364226323261,
"grad_norm": 0.5546875,
"learning_rate": 0.0002885871263585321,
"loss": 0.1469,
"step": 41600
},
{
"epoch": 0.8446562563374569,
"grad_norm": 0.48828125,
"learning_rate": 0.0002885486069449434,
"loss": 0.148,
"step": 41650
},
{
"epoch": 0.8456702494423038,
"grad_norm": 0.5859375,
"learning_rate": 0.0002885100252172253,
"loss": 0.1373,
"step": 41700
},
{
"epoch": 0.8466842425471507,
"grad_norm": 0.484375,
"learning_rate": 0.0002884713811927306,
"loss": 0.1521,
"step": 41750
},
{
"epoch": 0.8476982356519975,
"grad_norm": 0.56640625,
"learning_rate": 0.00028843267488883994,
"loss": 0.1516,
"step": 41800
},
{
"epoch": 0.8487122287568445,
"grad_norm": 0.609375,
"learning_rate": 0.000288393906322962,
"loss": 0.1398,
"step": 41850
},
{
"epoch": 0.8497262218616913,
"grad_norm": 0.5390625,
"learning_rate": 0.00028835507551253347,
"loss": 0.1478,
"step": 41900
},
{
"epoch": 0.8507402149665382,
"grad_norm": 0.447265625,
"learning_rate": 0.00028831618247501903,
"loss": 0.151,
"step": 41950
},
{
"epoch": 0.8517542080713851,
"grad_norm": 0.65625,
"learning_rate": 0.0002882772272279114,
"loss": 0.1449,
"step": 42000
},
{
"epoch": 0.8517542080713851,
"eval_cer": 0.037216416008157026,
"eval_loss": 0.18569107353687286,
"eval_runtime": 126.3528,
"eval_samples_per_second": 22.223,
"eval_steps_per_second": 0.174,
"eval_wer": 0.10640530841627734,
"step": 42000
},
{
"epoch": 0.852768201176232,
"grad_norm": 0.5234375,
"learning_rate": 0.00028823820978873116,
"loss": 0.1369,
"step": 42050
},
{
"epoch": 0.8537821942810789,
"grad_norm": 0.453125,
"learning_rate": 0.000288199130175027,
"loss": 0.1542,
"step": 42100
},
{
"epoch": 0.8547961873859258,
"grad_norm": 0.50390625,
"learning_rate": 0.0002881599884043755,
"loss": 0.1407,
"step": 42150
},
{
"epoch": 0.8558101804907726,
"grad_norm": 0.52734375,
"learning_rate": 0.0002881207844943812,
"loss": 0.1549,
"step": 42200
},
{
"epoch": 0.8568241735956196,
"grad_norm": 0.7890625,
"learning_rate": 0.0002880815184626765,
"loss": 0.1517,
"step": 42250
},
{
"epoch": 0.8578381667004664,
"grad_norm": 0.482421875,
"learning_rate": 0.00028804219032692205,
"loss": 0.1339,
"step": 42300
},
{
"epoch": 0.8588521598053134,
"grad_norm": 0.453125,
"learning_rate": 0.00028800280010480607,
"loss": 0.1433,
"step": 42350
},
{
"epoch": 0.8598661529101602,
"grad_norm": 0.56640625,
"learning_rate": 0.00028796334781404484,
"loss": 0.1443,
"step": 42400
},
{
"epoch": 0.8608801460150071,
"grad_norm": 0.50390625,
"learning_rate": 0.00028792383347238257,
"loss": 0.1509,
"step": 42450
},
{
"epoch": 0.861894139119854,
"grad_norm": 0.46875,
"learning_rate": 0.0002878842570975914,
"loss": 0.1417,
"step": 42500
},
{
"epoch": 0.8629081322247009,
"grad_norm": 0.55859375,
"learning_rate": 0.0002878446187074714,
"loss": 0.1432,
"step": 42550
},
{
"epoch": 0.8639221253295477,
"grad_norm": 0.49609375,
"learning_rate": 0.00028780491831985035,
"loss": 0.1363,
"step": 42600
},
{
"epoch": 0.8649361184343947,
"grad_norm": 0.5625,
"learning_rate": 0.00028776515595258423,
"loss": 0.1327,
"step": 42650
},
{
"epoch": 0.8659501115392415,
"grad_norm": 0.53125,
"learning_rate": 0.0002877253316235565,
"loss": 0.1462,
"step": 42700
},
{
"epoch": 0.8669641046440885,
"grad_norm": 0.4609375,
"learning_rate": 0.00028768544535067884,
"loss": 0.1597,
"step": 42750
},
{
"epoch": 0.8679780977489353,
"grad_norm": 0.490234375,
"learning_rate": 0.00028764549715189057,
"loss": 0.1422,
"step": 42800
},
{
"epoch": 0.8689920908537822,
"grad_norm": 0.46484375,
"learning_rate": 0.000287605487045159,
"loss": 0.1407,
"step": 42850
},
{
"epoch": 0.8700060839586291,
"grad_norm": 0.490234375,
"learning_rate": 0.0002875654150484792,
"loss": 0.1473,
"step": 42900
},
{
"epoch": 0.871020077063476,
"grad_norm": 0.55078125,
"learning_rate": 0.00028752528117987404,
"loss": 0.1529,
"step": 42950
},
{
"epoch": 0.8720340701683229,
"grad_norm": 0.47265625,
"learning_rate": 0.00028748508545739434,
"loss": 0.1508,
"step": 43000
},
{
"epoch": 0.8730480632731697,
"grad_norm": 0.4765625,
"learning_rate": 0.00028744482789911865,
"loss": 0.1389,
"step": 43050
},
{
"epoch": 0.8740620563780166,
"grad_norm": 0.53125,
"learning_rate": 0.0002874045085231534,
"loss": 0.1504,
"step": 43100
},
{
"epoch": 0.8750760494828635,
"grad_norm": 0.5234375,
"learning_rate": 0.0002873641273476327,
"loss": 0.1356,
"step": 43150
},
{
"epoch": 0.8760900425877104,
"grad_norm": 0.5234375,
"learning_rate": 0.0002873236843907186,
"loss": 0.1497,
"step": 43200
},
{
"epoch": 0.8771040356925572,
"grad_norm": 0.46875,
"learning_rate": 0.0002872831796706009,
"loss": 0.1367,
"step": 43250
},
{
"epoch": 0.8781180287974042,
"grad_norm": 0.51171875,
"learning_rate": 0.00028724261320549707,
"loss": 0.154,
"step": 43300
},
{
"epoch": 0.879132021902251,
"grad_norm": 0.5,
"learning_rate": 0.00028720198501365246,
"loss": 0.1381,
"step": 43350
},
{
"epoch": 0.880146015007098,
"grad_norm": 0.5234375,
"learning_rate": 0.00028716129511334015,
"loss": 0.1431,
"step": 43400
},
{
"epoch": 0.8811600081119448,
"grad_norm": 0.44140625,
"learning_rate": 0.00028712054352286097,
"loss": 0.1467,
"step": 43450
},
{
"epoch": 0.8821740012167917,
"grad_norm": 0.490234375,
"learning_rate": 0.0002870797302605436,
"loss": 0.1482,
"step": 43500
},
{
"epoch": 0.8821740012167917,
"eval_cer": 0.030463982687135523,
"eval_loss": 0.18255223333835602,
"eval_runtime": 128.3696,
"eval_samples_per_second": 21.874,
"eval_steps_per_second": 0.171,
"eval_wer": 0.09966822398266639,
"step": 43500
},
{
"epoch": 0.8831879943216386,
"grad_norm": 0.9609375,
"learning_rate": 0.00028703885534474424,
"loss": 0.1583,
"step": 43550
},
{
"epoch": 0.8842019874264855,
"grad_norm": 0.5859375,
"learning_rate": 0.000286997918793847,
"loss": 0.1471,
"step": 43600
},
{
"epoch": 0.8852159805313324,
"grad_norm": 0.59375,
"learning_rate": 0.0002869569206262636,
"loss": 0.1374,
"step": 43650
},
{
"epoch": 0.8862299736361793,
"grad_norm": 0.51171875,
"learning_rate": 0.0002869158608604336,
"loss": 0.1507,
"step": 43700
},
{
"epoch": 0.8872439667410261,
"grad_norm": 0.56640625,
"learning_rate": 0.00028687473951482417,
"loss": 0.1357,
"step": 43750
},
{
"epoch": 0.8882579598458731,
"grad_norm": 0.416015625,
"learning_rate": 0.00028683355660793017,
"loss": 0.1379,
"step": 43800
},
{
"epoch": 0.8892719529507199,
"grad_norm": 0.5234375,
"learning_rate": 0.00028679231215827423,
"loss": 0.1396,
"step": 43850
},
{
"epoch": 0.8902859460555668,
"grad_norm": 0.515625,
"learning_rate": 0.0002867510061844066,
"loss": 0.1507,
"step": 43900
},
{
"epoch": 0.8912999391604137,
"grad_norm": 0.5703125,
"learning_rate": 0.00028670963870490505,
"loss": 0.1629,
"step": 43950
},
{
"epoch": 0.8923139322652606,
"grad_norm": 0.453125,
"learning_rate": 0.0002866682097383754,
"loss": 0.1376,
"step": 44000
},
{
"epoch": 0.8933279253701075,
"grad_norm": 0.494140625,
"learning_rate": 0.0002866267193034507,
"loss": 0.1442,
"step": 44050
},
{
"epoch": 0.8943419184749544,
"grad_norm": 0.51171875,
"learning_rate": 0.000286585167418792,
"loss": 0.1448,
"step": 44100
},
{
"epoch": 0.8953559115798012,
"grad_norm": 0.455078125,
"learning_rate": 0.0002865435541030877,
"loss": 0.1503,
"step": 44150
},
{
"epoch": 0.8963699046846482,
"grad_norm": 0.490234375,
"learning_rate": 0.00028650187937505404,
"loss": 0.1459,
"step": 44200
},
{
"epoch": 0.897383897789495,
"grad_norm": 0.490234375,
"learning_rate": 0.00028646014325343477,
"loss": 0.1412,
"step": 44250
},
{
"epoch": 0.898397890894342,
"grad_norm": 0.47265625,
"learning_rate": 0.0002864183457570013,
"loss": 0.141,
"step": 44300
},
{
"epoch": 0.8994118839991888,
"grad_norm": 0.53515625,
"learning_rate": 0.0002863764869045525,
"loss": 0.1498,
"step": 44350
},
{
"epoch": 0.9004258771040357,
"grad_norm": 0.462890625,
"learning_rate": 0.0002863345667149152,
"loss": 0.1488,
"step": 44400
},
{
"epoch": 0.9014398702088826,
"grad_norm": 0.478515625,
"learning_rate": 0.0002862925852069434,
"loss": 0.1432,
"step": 44450
},
{
"epoch": 0.9024538633137295,
"grad_norm": 0.443359375,
"learning_rate": 0.0002862505423995189,
"loss": 0.1402,
"step": 44500
},
{
"epoch": 0.9034678564185763,
"grad_norm": 0.478515625,
"learning_rate": 0.000286208438311551,
"loss": 0.1542,
"step": 44550
},
{
"epoch": 0.9044818495234233,
"grad_norm": 0.44921875,
"learning_rate": 0.00028616627296197667,
"loss": 0.1386,
"step": 44600
},
{
"epoch": 0.9054958426282701,
"grad_norm": 0.55078125,
"learning_rate": 0.00028612404636976026,
"loss": 0.1499,
"step": 44650
},
{
"epoch": 0.906509835733117,
"grad_norm": 0.470703125,
"learning_rate": 0.00028608175855389384,
"loss": 0.1374,
"step": 44700
},
{
"epoch": 0.9075238288379639,
"grad_norm": 0.470703125,
"learning_rate": 0.0002860394095333968,
"loss": 0.1417,
"step": 44750
},
{
"epoch": 0.9085378219428107,
"grad_norm": 0.498046875,
"learning_rate": 0.0002859969993273164,
"loss": 0.136,
"step": 44800
},
{
"epoch": 0.9095518150476577,
"grad_norm": 0.546875,
"learning_rate": 0.000285954527954727,
"loss": 0.1446,
"step": 44850
},
{
"epoch": 0.9105658081525045,
"grad_norm": 0.53125,
"learning_rate": 0.0002859119954347308,
"loss": 0.1469,
"step": 44900
},
{
"epoch": 0.9115798012573515,
"grad_norm": 0.6015625,
"learning_rate": 0.00028586940178645736,
"loss": 0.1435,
"step": 44950
},
{
"epoch": 0.9125937943621983,
"grad_norm": 0.498046875,
"learning_rate": 0.00028582674702906375,
"loss": 0.1398,
"step": 45000
},
{
"epoch": 0.9125937943621983,
"eval_cer": 0.03009462770578535,
"eval_loss": 0.18028628826141357,
"eval_runtime": 128.6346,
"eval_samples_per_second": 21.829,
"eval_steps_per_second": 0.171,
"eval_wer": 0.09557180580946577,
"step": 45000
},
{
"epoch": 0.9136077874670452,
"grad_norm": 0.478515625,
"learning_rate": 0.0002857840311817345,
"loss": 0.1439,
"step": 45050
},
{
"epoch": 0.9146217805718921,
"grad_norm": 0.5078125,
"learning_rate": 0.0002857412542636817,
"loss": 0.132,
"step": 45100
},
{
"epoch": 0.915635773676739,
"grad_norm": 0.51171875,
"learning_rate": 0.00028569841629414485,
"loss": 0.1418,
"step": 45150
},
{
"epoch": 0.9166497667815859,
"grad_norm": 0.453125,
"learning_rate": 0.0002856555172923909,
"loss": 0.135,
"step": 45200
},
{
"epoch": 0.9176637598864328,
"grad_norm": 0.404296875,
"learning_rate": 0.00028561255727771424,
"loss": 0.1546,
"step": 45250
},
{
"epoch": 0.9186777529912796,
"grad_norm": 0.443359375,
"learning_rate": 0.0002855695362694367,
"loss": 0.1422,
"step": 45300
},
{
"epoch": 0.9196917460961266,
"grad_norm": 0.5625,
"learning_rate": 0.00028552645428690755,
"loss": 0.152,
"step": 45350
},
{
"epoch": 0.9207057392009734,
"grad_norm": 0.515625,
"learning_rate": 0.00028548331134950364,
"loss": 0.1614,
"step": 45400
},
{
"epoch": 0.9217197323058203,
"grad_norm": 0.59375,
"learning_rate": 0.00028544010747662903,
"loss": 0.1406,
"step": 45450
},
{
"epoch": 0.9227337254106672,
"grad_norm": 0.4921875,
"learning_rate": 0.00028539684268771517,
"loss": 0.1474,
"step": 45500
},
{
"epoch": 0.9237477185155141,
"grad_norm": 0.466796875,
"learning_rate": 0.00028535351700222105,
"loss": 0.1485,
"step": 45550
},
{
"epoch": 0.924761711620361,
"grad_norm": 0.458984375,
"learning_rate": 0.00028531013043963304,
"loss": 0.1368,
"step": 45600
},
{
"epoch": 0.9257757047252079,
"grad_norm": 0.474609375,
"learning_rate": 0.0002852666830194648,
"loss": 0.1446,
"step": 45650
},
{
"epoch": 0.9267896978300547,
"grad_norm": 0.427734375,
"learning_rate": 0.0002852231747612573,
"loss": 0.1343,
"step": 45700
},
{
"epoch": 0.9278036909349017,
"grad_norm": 0.58203125,
"learning_rate": 0.00028517960568457914,
"loss": 0.1394,
"step": 45750
},
{
"epoch": 0.9288176840397485,
"grad_norm": 0.54296875,
"learning_rate": 0.000285135975809026,
"loss": 0.1463,
"step": 45800
},
{
"epoch": 0.9298316771445955,
"grad_norm": 0.484375,
"learning_rate": 0.00028509228515422106,
"loss": 0.1455,
"step": 45850
},
{
"epoch": 0.9308456702494423,
"grad_norm": 0.48046875,
"learning_rate": 0.00028504853373981475,
"loss": 0.1413,
"step": 45900
},
{
"epoch": 0.9318596633542892,
"grad_norm": 0.57421875,
"learning_rate": 0.0002850047215854849,
"loss": 0.1314,
"step": 45950
},
{
"epoch": 0.9328736564591361,
"grad_norm": 0.4765625,
"learning_rate": 0.0002849608487109366,
"loss": 0.1345,
"step": 46000
},
{
"epoch": 0.933887649563983,
"grad_norm": 0.46484375,
"learning_rate": 0.00028491691513590235,
"loss": 0.1321,
"step": 46050
},
{
"epoch": 0.9349016426688298,
"grad_norm": 0.546875,
"learning_rate": 0.00028487292088014176,
"loss": 0.1289,
"step": 46100
},
{
"epoch": 0.9359156357736768,
"grad_norm": 0.490234375,
"learning_rate": 0.0002848288659634419,
"loss": 0.1321,
"step": 46150
},
{
"epoch": 0.9369296288785236,
"grad_norm": 0.51171875,
"learning_rate": 0.00028478475040561715,
"loss": 0.1545,
"step": 46200
},
{
"epoch": 0.9379436219833706,
"grad_norm": 0.466796875,
"learning_rate": 0.0002847405742265089,
"loss": 0.1353,
"step": 46250
},
{
"epoch": 0.9389576150882174,
"grad_norm": 0.470703125,
"learning_rate": 0.00028469633744598617,
"loss": 0.1407,
"step": 46300
},
{
"epoch": 0.9399716081930642,
"grad_norm": 0.5078125,
"learning_rate": 0.000284652040083945,
"loss": 0.1375,
"step": 46350
},
{
"epoch": 0.9409856012979112,
"grad_norm": 0.486328125,
"learning_rate": 0.0002846076821603087,
"loss": 0.1499,
"step": 46400
},
{
"epoch": 0.941999594402758,
"grad_norm": 0.6171875,
"learning_rate": 0.00028456326369502783,
"loss": 0.1354,
"step": 46450
},
{
"epoch": 0.943013587507605,
"grad_norm": 0.48828125,
"learning_rate": 0.0002845187847080803,
"loss": 0.1431,
"step": 46500
},
{
"epoch": 0.943013587507605,
"eval_cer": 0.03134315158640565,
"eval_loss": 0.18719163537025452,
"eval_runtime": 129.4245,
"eval_samples_per_second": 21.696,
"eval_steps_per_second": 0.17,
"eval_wer": 0.09970207867831268,
"step": 46500
},
{
"epoch": 0.9440275806124518,
"grad_norm": 0.5234375,
"learning_rate": 0.00028447424521947103,
"loss": 0.1401,
"step": 46550
},
{
"epoch": 0.9450415737172987,
"grad_norm": 0.4609375,
"learning_rate": 0.00028442964524923235,
"loss": 0.1431,
"step": 46600
},
{
"epoch": 0.9460555668221456,
"grad_norm": 0.546875,
"learning_rate": 0.0002843849848174237,
"loss": 0.1404,
"step": 46650
},
{
"epoch": 0.9470695599269925,
"grad_norm": 0.47265625,
"learning_rate": 0.00028434026394413164,
"loss": 0.1339,
"step": 46700
},
{
"epoch": 0.9480835530318393,
"grad_norm": 0.451171875,
"learning_rate": 0.00028429548264947014,
"loss": 0.1372,
"step": 46750
},
{
"epoch": 0.9490975461366863,
"grad_norm": 0.474609375,
"learning_rate": 0.00028425064095358003,
"loss": 0.14,
"step": 46800
},
{
"epoch": 0.9501115392415331,
"grad_norm": 0.46484375,
"learning_rate": 0.0002842057388766296,
"loss": 0.1387,
"step": 46850
},
{
"epoch": 0.9511255323463801,
"grad_norm": 0.478515625,
"learning_rate": 0.00028416077643881406,
"loss": 0.1414,
"step": 46900
},
{
"epoch": 0.9521395254512269,
"grad_norm": 0.55859375,
"learning_rate": 0.000284115753660356,
"loss": 0.132,
"step": 46950
},
{
"epoch": 0.9531535185560738,
"grad_norm": 0.47265625,
"learning_rate": 0.000284070670561505,
"loss": 0.1377,
"step": 47000
},
{
"epoch": 0.9541675116609207,
"grad_norm": 0.5390625,
"learning_rate": 0.0002840255271625377,
"loss": 0.1322,
"step": 47050
},
{
"epoch": 0.9551815047657676,
"grad_norm": 0.5,
"learning_rate": 0.00028398032348375815,
"loss": 0.1324,
"step": 47100
},
{
"epoch": 0.9561954978706145,
"grad_norm": 0.474609375,
"learning_rate": 0.00028393505954549714,
"loss": 0.1445,
"step": 47150
},
{
"epoch": 0.9572094909754614,
"grad_norm": 0.4609375,
"learning_rate": 0.00028388973536811276,
"loss": 0.1346,
"step": 47200
},
{
"epoch": 0.9582234840803082,
"grad_norm": 0.5234375,
"learning_rate": 0.0002838443509719903,
"loss": 0.1389,
"step": 47250
},
{
"epoch": 0.9592374771851552,
"grad_norm": 0.46484375,
"learning_rate": 0.00028379890637754196,
"loss": 0.1317,
"step": 47300
},
{
"epoch": 0.960251470290002,
"grad_norm": 0.48046875,
"learning_rate": 0.000283753401605207,
"loss": 0.1411,
"step": 47350
},
{
"epoch": 0.9612654633948489,
"grad_norm": 0.466796875,
"learning_rate": 0.0002837078366754519,
"loss": 0.1434,
"step": 47400
},
{
"epoch": 0.9622794564996958,
"grad_norm": 0.46484375,
"learning_rate": 0.0002836622116087701,
"loss": 0.1378,
"step": 47450
},
{
"epoch": 0.9632934496045427,
"grad_norm": 0.53515625,
"learning_rate": 0.000283616526425682,
"loss": 0.1385,
"step": 47500
},
{
"epoch": 0.9643074427093896,
"grad_norm": 0.478515625,
"learning_rate": 0.00028357078114673527,
"loss": 0.1385,
"step": 47550
},
{
"epoch": 0.9653214358142365,
"grad_norm": 0.5234375,
"learning_rate": 0.0002835249757925044,
"loss": 0.1381,
"step": 47600
},
{
"epoch": 0.9663354289190833,
"grad_norm": 0.6171875,
"learning_rate": 0.000283479110383591,
"loss": 0.142,
"step": 47650
},
{
"epoch": 0.9673494220239303,
"grad_norm": 0.478515625,
"learning_rate": 0.0002834331849406236,
"loss": 0.1537,
"step": 47700
},
{
"epoch": 0.9683634151287771,
"grad_norm": 0.490234375,
"learning_rate": 0.0002833871994842579,
"loss": 0.1318,
"step": 47750
},
{
"epoch": 0.9693774082336241,
"grad_norm": 0.494140625,
"learning_rate": 0.00028334115403517643,
"loss": 0.1472,
"step": 47800
},
{
"epoch": 0.9703914013384709,
"grad_norm": 0.50390625,
"learning_rate": 0.00028329504861408875,
"loss": 0.1366,
"step": 47850
},
{
"epoch": 0.9714053944433177,
"grad_norm": 0.546875,
"learning_rate": 0.00028324888324173143,
"loss": 0.1294,
"step": 47900
},
{
"epoch": 0.9724193875481647,
"grad_norm": 0.478515625,
"learning_rate": 0.00028320265793886797,
"loss": 0.1353,
"step": 47950
},
{
"epoch": 0.9734333806530115,
"grad_norm": 0.6484375,
"learning_rate": 0.00028315637272628875,
"loss": 0.1313,
"step": 48000
},
{
"epoch": 0.9734333806530115,
"eval_cer": 0.030838539851321616,
"eval_loss": 0.18022538721561432,
"eval_runtime": 129.1875,
"eval_samples_per_second": 21.736,
"eval_steps_per_second": 0.17,
"eval_wer": 0.0979416345047058,
"step": 48000
},
{
"epoch": 0.9744473737578584,
"grad_norm": 0.51171875,
"learning_rate": 0.00028311002762481134,
"loss": 0.1304,
"step": 48050
},
{
"epoch": 0.9754613668627053,
"grad_norm": 0.498046875,
"learning_rate": 0.00028306362265528,
"loss": 0.127,
"step": 48100
},
{
"epoch": 0.9764753599675522,
"grad_norm": 0.5546875,
"learning_rate": 0.00028301715783856594,
"loss": 0.1405,
"step": 48150
},
{
"epoch": 0.9774893530723991,
"grad_norm": 0.51171875,
"learning_rate": 0.00028297063319556744,
"loss": 0.1317,
"step": 48200
},
{
"epoch": 0.978503346177246,
"grad_norm": 0.498046875,
"learning_rate": 0.0002829240487472095,
"loss": 0.134,
"step": 48250
},
{
"epoch": 0.9795173392820928,
"grad_norm": 0.408203125,
"learning_rate": 0.00028287740451444425,
"loss": 0.131,
"step": 48300
},
{
"epoch": 0.9805313323869398,
"grad_norm": 0.578125,
"learning_rate": 0.0002828307005182504,
"loss": 0.1492,
"step": 48350
},
{
"epoch": 0.9815453254917866,
"grad_norm": 0.55859375,
"learning_rate": 0.0002827839367796339,
"loss": 0.1392,
"step": 48400
},
{
"epoch": 0.9825593185966336,
"grad_norm": 0.5078125,
"learning_rate": 0.0002827371133196272,
"loss": 0.1289,
"step": 48450
},
{
"epoch": 0.9835733117014804,
"grad_norm": 0.5234375,
"learning_rate": 0.0002826902301592899,
"loss": 0.1473,
"step": 48500
},
{
"epoch": 0.9845873048063273,
"grad_norm": 0.416015625,
"learning_rate": 0.0002826432873197083,
"loss": 0.1496,
"step": 48550
},
{
"epoch": 0.9856012979111742,
"grad_norm": 0.609375,
"learning_rate": 0.0002825962848219956,
"loss": 0.1412,
"step": 48600
},
{
"epoch": 0.9866152910160211,
"grad_norm": 0.4375,
"learning_rate": 0.00028254922268729183,
"loss": 0.1349,
"step": 48650
},
{
"epoch": 0.9876292841208679,
"grad_norm": 0.466796875,
"learning_rate": 0.0002825021009367638,
"loss": 0.1315,
"step": 48700
},
{
"epoch": 0.9886432772257149,
"grad_norm": 0.58203125,
"learning_rate": 0.0002824549195916052,
"loss": 0.1331,
"step": 48750
},
{
"epoch": 0.9896572703305617,
"grad_norm": 0.53125,
"learning_rate": 0.00028240767867303637,
"loss": 0.1371,
"step": 48800
},
{
"epoch": 0.9906712634354087,
"grad_norm": 0.462890625,
"learning_rate": 0.0002823603782023047,
"loss": 0.1465,
"step": 48850
},
{
"epoch": 0.9916852565402555,
"grad_norm": 0.42578125,
"learning_rate": 0.0002823130182006842,
"loss": 0.1319,
"step": 48900
},
{
"epoch": 0.9926992496451024,
"grad_norm": 0.490234375,
"learning_rate": 0.00028226559868947557,
"loss": 0.1367,
"step": 48950
},
{
"epoch": 0.9937132427499493,
"grad_norm": 0.53125,
"learning_rate": 0.00028221811969000654,
"loss": 0.1442,
"step": 49000
},
{
"epoch": 0.9947272358547962,
"grad_norm": 0.486328125,
"learning_rate": 0.0002821705812236313,
"loss": 0.127,
"step": 49050
},
{
"epoch": 0.9957412289596431,
"grad_norm": 0.53515625,
"learning_rate": 0.000282122983311731,
"loss": 0.1355,
"step": 49100
},
{
"epoch": 0.99675522206449,
"grad_norm": 0.466796875,
"learning_rate": 0.00028207532597571345,
"loss": 0.1403,
"step": 49150
},
{
"epoch": 0.9977692151693368,
"grad_norm": 0.515625,
"learning_rate": 0.0002820276092370132,
"loss": 0.1313,
"step": 49200
},
{
"epoch": 0.9987832082741838,
"grad_norm": 0.453125,
"learning_rate": 0.0002819798331170914,
"loss": 0.1301,
"step": 49250
},
{
"epoch": 0.9997972013790306,
"grad_norm": 0.49609375,
"learning_rate": 0.0002819319976374361,
"loss": 0.1437,
"step": 49300
},
{
"epoch": 1.0008111944838776,
"grad_norm": 0.51953125,
"learning_rate": 0.000281884102819562,
"loss": 0.1182,
"step": 49350
},
{
"epoch": 1.0018251875887243,
"grad_norm": 0.439453125,
"learning_rate": 0.00028183614868501037,
"loss": 0.1136,
"step": 49400
},
{
"epoch": 1.0028391806935713,
"grad_norm": 0.4140625,
"learning_rate": 0.0002817881352553493,
"loss": 0.1285,
"step": 49450
},
{
"epoch": 1.0038531737984182,
"grad_norm": 0.5390625,
"learning_rate": 0.00028174006255217345,
"loss": 0.1181,
"step": 49500
},
{
"epoch": 1.0038531737984182,
"eval_cer": 0.03714358544845417,
"eval_loss": 0.17951156198978424,
"eval_runtime": 123.1021,
"eval_samples_per_second": 22.81,
"eval_steps_per_second": 0.179,
"eval_wer": 0.10264743719953957,
"step": 49500
},
{
"epoch": 1.0048671669032652,
"grad_norm": 0.56640625,
"learning_rate": 0.00028169193059710413,
"loss": 0.1281,
"step": 49550
},
{
"epoch": 1.005881160008112,
"grad_norm": 0.443359375,
"learning_rate": 0.00028164373941178943,
"loss": 0.1142,
"step": 49600
},
{
"epoch": 1.0068951531129589,
"grad_norm": 0.53515625,
"learning_rate": 0.00028159548901790396,
"loss": 0.12,
"step": 49650
},
{
"epoch": 1.0079091462178058,
"grad_norm": 0.54296875,
"learning_rate": 0.00028154717943714894,
"loss": 0.1161,
"step": 49700
},
{
"epoch": 1.0089231393226525,
"grad_norm": 0.4765625,
"learning_rate": 0.00028149881069125234,
"loss": 0.125,
"step": 49750
},
{
"epoch": 1.0099371324274995,
"grad_norm": 0.52734375,
"learning_rate": 0.00028145038280196866,
"loss": 0.1093,
"step": 49800
},
{
"epoch": 1.0109511255323465,
"grad_norm": 0.423828125,
"learning_rate": 0.000281401895791079,
"loss": 0.1106,
"step": 49850
},
{
"epoch": 1.0119651186371932,
"grad_norm": 0.439453125,
"learning_rate": 0.000281353349680391,
"loss": 0.1161,
"step": 49900
},
{
"epoch": 1.0129791117420401,
"grad_norm": 0.486328125,
"learning_rate": 0.000281304744491739,
"loss": 0.1113,
"step": 49950
},
{
"epoch": 1.013993104846887,
"grad_norm": 0.53515625,
"learning_rate": 0.00028125608024698383,
"loss": 0.1167,
"step": 50000
},
{
"epoch": 1.0150070979517338,
"grad_norm": 0.57421875,
"learning_rate": 0.0002812073569680128,
"loss": 0.1201,
"step": 50050
},
{
"epoch": 1.0160210910565808,
"grad_norm": 0.421875,
"learning_rate": 0.0002811585746767401,
"loss": 0.1118,
"step": 50100
},
{
"epoch": 1.0170350841614277,
"grad_norm": 0.498046875,
"learning_rate": 0.000281109733395106,
"loss": 0.125,
"step": 50150
},
{
"epoch": 1.0180490772662747,
"grad_norm": 0.53125,
"learning_rate": 0.00028106083314507774,
"loss": 0.1188,
"step": 50200
},
{
"epoch": 1.0190630703711214,
"grad_norm": 0.451171875,
"learning_rate": 0.0002810118739486487,
"loss": 0.1068,
"step": 50250
},
{
"epoch": 1.0200770634759684,
"grad_norm": 0.51953125,
"learning_rate": 0.00028096285582783904,
"loss": 0.1147,
"step": 50300
},
{
"epoch": 1.0210910565808153,
"grad_norm": 0.515625,
"learning_rate": 0.00028091377880469525,
"loss": 0.119,
"step": 50350
},
{
"epoch": 1.022105049685662,
"grad_norm": 0.5078125,
"learning_rate": 0.00028086464290129044,
"loss": 0.1162,
"step": 50400
},
{
"epoch": 1.023119042790509,
"grad_norm": 0.41796875,
"learning_rate": 0.00028081544813972424,
"loss": 0.1157,
"step": 50450
},
{
"epoch": 1.024133035895356,
"grad_norm": 0.47265625,
"learning_rate": 0.00028076619454212254,
"loss": 0.1114,
"step": 50500
},
{
"epoch": 1.0251470290002027,
"grad_norm": 0.423828125,
"learning_rate": 0.0002807168821306378,
"loss": 0.1102,
"step": 50550
},
{
"epoch": 1.0261610221050497,
"grad_norm": 0.515625,
"learning_rate": 0.0002806675109274491,
"loss": 0.1135,
"step": 50600
},
{
"epoch": 1.0271750152098966,
"grad_norm": 0.4609375,
"learning_rate": 0.0002806180809547617,
"loss": 0.1136,
"step": 50650
},
{
"epoch": 1.0281890083147434,
"grad_norm": 0.59375,
"learning_rate": 0.0002805685922348075,
"loss": 0.1217,
"step": 50700
},
{
"epoch": 1.0292030014195903,
"grad_norm": 0.5078125,
"learning_rate": 0.0002805190447898446,
"loss": 0.1211,
"step": 50750
},
{
"epoch": 1.0302169945244373,
"grad_norm": 0.55859375,
"learning_rate": 0.0002804694386421577,
"loss": 0.131,
"step": 50800
},
{
"epoch": 1.0312309876292842,
"grad_norm": 0.466796875,
"learning_rate": 0.0002804197738140578,
"loss": 0.1207,
"step": 50850
},
{
"epoch": 1.032244980734131,
"grad_norm": 0.5625,
"learning_rate": 0.0002803700503278825,
"loss": 0.1171,
"step": 50900
},
{
"epoch": 1.033258973838978,
"grad_norm": 0.4453125,
"learning_rate": 0.0002803202682059954,
"loss": 0.1112,
"step": 50950
},
{
"epoch": 1.0342729669438249,
"grad_norm": 0.41015625,
"learning_rate": 0.00028027042747078685,
"loss": 0.1101,
"step": 51000
},
{
"epoch": 1.0342729669438249,
"eval_cer": 0.030817731119977942,
"eval_loss": 0.18131904304027557,
"eval_runtime": 126.3098,
"eval_samples_per_second": 22.231,
"eval_steps_per_second": 0.174,
"eval_wer": 0.09763694224388923,
"step": 51000
},
{
"epoch": 1.0352869600486716,
"grad_norm": 0.470703125,
"learning_rate": 0.0002802205281446733,
"loss": 0.1144,
"step": 51050
},
{
"epoch": 1.0363009531535186,
"grad_norm": 0.50390625,
"learning_rate": 0.0002801705702500977,
"loss": 0.1174,
"step": 51100
},
{
"epoch": 1.0373149462583655,
"grad_norm": 0.515625,
"learning_rate": 0.0002801205538095293,
"loss": 0.1229,
"step": 51150
},
{
"epoch": 1.0383289393632122,
"grad_norm": 0.49609375,
"learning_rate": 0.00028007047884546365,
"loss": 0.1153,
"step": 51200
},
{
"epoch": 1.0393429324680592,
"grad_norm": 0.458984375,
"learning_rate": 0.00028002034538042263,
"loss": 0.1263,
"step": 51250
},
{
"epoch": 1.0403569255729062,
"grad_norm": 0.48046875,
"learning_rate": 0.00027997015343695454,
"loss": 0.1095,
"step": 51300
},
{
"epoch": 1.0413709186777529,
"grad_norm": 0.46875,
"learning_rate": 0.0002799199030376339,
"loss": 0.1166,
"step": 51350
},
{
"epoch": 1.0423849117825998,
"grad_norm": 0.5234375,
"learning_rate": 0.0002798695942050613,
"loss": 0.1277,
"step": 51400
},
{
"epoch": 1.0433989048874468,
"grad_norm": 0.498046875,
"learning_rate": 0.00027981922696186407,
"loss": 0.1101,
"step": 51450
},
{
"epoch": 1.0444128979922938,
"grad_norm": 0.515625,
"learning_rate": 0.00027976880133069555,
"loss": 0.1212,
"step": 51500
},
{
"epoch": 1.0454268910971405,
"grad_norm": 0.4921875,
"learning_rate": 0.00027971831733423515,
"loss": 0.1169,
"step": 51550
},
{
"epoch": 1.0464408842019874,
"grad_norm": 0.47265625,
"learning_rate": 0.00027966777499518895,
"loss": 0.1097,
"step": 51600
},
{
"epoch": 1.0474548773068344,
"grad_norm": 0.51171875,
"learning_rate": 0.0002796171743362889,
"loss": 0.1184,
"step": 51650
},
{
"epoch": 1.0484688704116811,
"grad_norm": 0.494140625,
"learning_rate": 0.00027956651538029343,
"loss": 0.1165,
"step": 51700
},
{
"epoch": 1.049482863516528,
"grad_norm": 0.490234375,
"learning_rate": 0.00027951579814998717,
"loss": 0.1223,
"step": 51750
},
{
"epoch": 1.050496856621375,
"grad_norm": 0.50390625,
"learning_rate": 0.00027946502266818076,
"loss": 0.1151,
"step": 51800
},
{
"epoch": 1.0515108497262218,
"grad_norm": 0.478515625,
"learning_rate": 0.00027941418895771115,
"loss": 0.1116,
"step": 51850
},
{
"epoch": 1.0525248428310687,
"grad_norm": 0.462890625,
"learning_rate": 0.0002793632970414416,
"loss": 0.1182,
"step": 51900
},
{
"epoch": 1.0535388359359157,
"grad_norm": 0.51953125,
"learning_rate": 0.00027931234694226136,
"loss": 0.117,
"step": 51950
},
{
"epoch": 1.0545528290407624,
"grad_norm": 0.486328125,
"learning_rate": 0.00027926133868308597,
"loss": 0.1206,
"step": 52000
},
{
"epoch": 1.0555668221456094,
"grad_norm": 0.73046875,
"learning_rate": 0.00027921027228685713,
"loss": 0.1191,
"step": 52050
},
{
"epoch": 1.0565808152504563,
"grad_norm": 0.48046875,
"learning_rate": 0.0002791591477765426,
"loss": 0.1166,
"step": 52100
},
{
"epoch": 1.0575948083553033,
"grad_norm": 0.4609375,
"learning_rate": 0.00027910796517513633,
"loss": 0.122,
"step": 52150
},
{
"epoch": 1.05860880146015,
"grad_norm": 0.50390625,
"learning_rate": 0.0002790567245056584,
"loss": 0.1101,
"step": 52200
},
{
"epoch": 1.059622794564997,
"grad_norm": 0.47265625,
"learning_rate": 0.00027900542579115497,
"loss": 0.1227,
"step": 52250
},
{
"epoch": 1.060636787669844,
"grad_norm": 0.63671875,
"learning_rate": 0.00027895406905469847,
"loss": 0.1106,
"step": 52300
},
{
"epoch": 1.0616507807746907,
"grad_norm": 0.8828125,
"learning_rate": 0.0002789026543193871,
"loss": 0.1257,
"step": 52350
},
{
"epoch": 1.0626647738795376,
"grad_norm": 0.49609375,
"learning_rate": 0.0002788511816083455,
"loss": 0.1105,
"step": 52400
},
{
"epoch": 1.0636787669843846,
"grad_norm": 0.435546875,
"learning_rate": 0.0002787996509447241,
"loss": 0.1144,
"step": 52450
},
{
"epoch": 1.0646927600892313,
"grad_norm": 0.498046875,
"learning_rate": 0.00027874806235169964,
"loss": 0.1158,
"step": 52500
},
{
"epoch": 1.0646927600892313,
"eval_cer": 0.031348353769241576,
"eval_loss": 0.18024787306785583,
"eval_runtime": 126.6593,
"eval_samples_per_second": 22.17,
"eval_steps_per_second": 0.174,
"eval_wer": 0.09773850633082809,
"step": 52500
},
{
"epoch": 1.0657067531940783,
"grad_norm": 0.443359375,
"learning_rate": 0.0002786964158524747,
"loss": 0.1189,
"step": 52550
},
{
"epoch": 1.0667207462989252,
"grad_norm": 0.52734375,
"learning_rate": 0.0002786447114702781,
"loss": 0.1121,
"step": 52600
},
{
"epoch": 1.0677347394037722,
"grad_norm": 0.484375,
"learning_rate": 0.0002785929492283644,
"loss": 0.1214,
"step": 52650
},
{
"epoch": 1.068748732508619,
"grad_norm": 0.50390625,
"learning_rate": 0.00027854112915001456,
"loss": 0.1259,
"step": 52700
},
{
"epoch": 1.0697627256134659,
"grad_norm": 0.58203125,
"learning_rate": 0.00027848925125853534,
"loss": 0.1195,
"step": 52750
},
{
"epoch": 1.0707767187183128,
"grad_norm": 0.5234375,
"learning_rate": 0.0002784373155772595,
"loss": 0.1177,
"step": 52800
},
{
"epoch": 1.0717907118231595,
"grad_norm": 0.427734375,
"learning_rate": 0.0002783853221295458,
"loss": 0.1259,
"step": 52850
},
{
"epoch": 1.0728047049280065,
"grad_norm": 0.5078125,
"learning_rate": 0.00027833327093877903,
"loss": 0.1113,
"step": 52900
},
{
"epoch": 1.0738186980328535,
"grad_norm": 0.451171875,
"learning_rate": 0.00027828116202836996,
"loss": 0.12,
"step": 52950
},
{
"epoch": 1.0748326911377002,
"grad_norm": 0.439453125,
"learning_rate": 0.00027822899542175516,
"loss": 0.1148,
"step": 53000
},
{
"epoch": 1.0758466842425471,
"grad_norm": 0.5,
"learning_rate": 0.00027817677114239743,
"loss": 0.1225,
"step": 53050
},
{
"epoch": 1.076860677347394,
"grad_norm": 0.50390625,
"learning_rate": 0.0002781244892137853,
"loss": 0.1148,
"step": 53100
},
{
"epoch": 1.0778746704522408,
"grad_norm": 0.515625,
"learning_rate": 0.0002780721496594332,
"loss": 0.1228,
"step": 53150
},
{
"epoch": 1.0788886635570878,
"grad_norm": 0.48046875,
"learning_rate": 0.0002780197525028817,
"loss": 0.1193,
"step": 53200
},
{
"epoch": 1.0799026566619347,
"grad_norm": 0.435546875,
"learning_rate": 0.000277967297767697,
"loss": 0.119,
"step": 53250
},
{
"epoch": 1.0809166497667815,
"grad_norm": 0.5625,
"learning_rate": 0.00027791478547747146,
"loss": 0.1126,
"step": 53300
},
{
"epoch": 1.0819306428716284,
"grad_norm": 0.5625,
"learning_rate": 0.00027786221565582304,
"loss": 0.1292,
"step": 53350
},
{
"epoch": 1.0829446359764754,
"grad_norm": 0.5703125,
"learning_rate": 0.0002778095883263959,
"loss": 0.1131,
"step": 53400
},
{
"epoch": 1.0839586290813223,
"grad_norm": 0.478515625,
"learning_rate": 0.00027775690351285984,
"loss": 0.1225,
"step": 53450
},
{
"epoch": 1.084972622186169,
"grad_norm": 0.47265625,
"learning_rate": 0.00027770416123891055,
"loss": 0.1154,
"step": 53500
},
{
"epoch": 1.085986615291016,
"grad_norm": 0.490234375,
"learning_rate": 0.00027765136152826963,
"loss": 0.1089,
"step": 53550
},
{
"epoch": 1.087000608395863,
"grad_norm": 0.515625,
"learning_rate": 0.0002775985044046844,
"loss": 0.1189,
"step": 53600
},
{
"epoch": 1.0880146015007097,
"grad_norm": 0.484375,
"learning_rate": 0.00027754558989192814,
"loss": 0.1189,
"step": 53650
},
{
"epoch": 1.0890285946055567,
"grad_norm": 0.5,
"learning_rate": 0.00027749261801379986,
"loss": 0.1354,
"step": 53700
},
{
"epoch": 1.0900425877104036,
"grad_norm": 0.5546875,
"learning_rate": 0.0002774395887941244,
"loss": 0.1146,
"step": 53750
},
{
"epoch": 1.0910565808152504,
"grad_norm": 0.453125,
"learning_rate": 0.0002773865022567524,
"loss": 0.1173,
"step": 53800
},
{
"epoch": 1.0920705739200973,
"grad_norm": 0.5,
"learning_rate": 0.0002773333584255601,
"loss": 0.1121,
"step": 53850
},
{
"epoch": 1.0930845670249443,
"grad_norm": 0.55859375,
"learning_rate": 0.0002772801573244499,
"loss": 0.1281,
"step": 53900
},
{
"epoch": 1.0940985601297912,
"grad_norm": 0.486328125,
"learning_rate": 0.0002772268989773495,
"loss": 0.1216,
"step": 53950
},
{
"epoch": 1.095112553234638,
"grad_norm": 0.49609375,
"learning_rate": 0.0002771735834082127,
"loss": 0.1193,
"step": 54000
},
{
"epoch": 1.095112553234638,
"eval_cer": 0.031686495653576244,
"eval_loss": 0.18309062719345093,
"eval_runtime": 129.5836,
"eval_samples_per_second": 21.669,
"eval_steps_per_second": 0.17,
"eval_wer": 0.09777236102647437,
"step": 54000
},
{
"epoch": 1.096126546339485,
"grad_norm": 0.47265625,
"learning_rate": 0.0002771202106410189,
"loss": 0.1112,
"step": 54050
},
{
"epoch": 1.0971405394443319,
"grad_norm": 0.5234375,
"learning_rate": 0.0002770667806997732,
"loss": 0.1232,
"step": 54100
},
{
"epoch": 1.0981545325491786,
"grad_norm": 0.5234375,
"learning_rate": 0.00027701329360850643,
"loss": 0.1179,
"step": 54150
},
{
"epoch": 1.0991685256540256,
"grad_norm": 0.435546875,
"learning_rate": 0.00027695974939127523,
"loss": 0.114,
"step": 54200
},
{
"epoch": 1.1001825187588725,
"grad_norm": 0.482421875,
"learning_rate": 0.0002769061480721617,
"loss": 0.1169,
"step": 54250
},
{
"epoch": 1.1011965118637193,
"grad_norm": 0.51953125,
"learning_rate": 0.00027685248967527387,
"loss": 0.124,
"step": 54300
},
{
"epoch": 1.1022105049685662,
"grad_norm": 0.56640625,
"learning_rate": 0.0002767987742247453,
"loss": 0.1214,
"step": 54350
},
{
"epoch": 1.1032244980734132,
"grad_norm": 0.43359375,
"learning_rate": 0.00027674500174473533,
"loss": 0.124,
"step": 54400
},
{
"epoch": 1.10423849117826,
"grad_norm": 0.490234375,
"learning_rate": 0.00027669117225942875,
"loss": 0.1162,
"step": 54450
},
{
"epoch": 1.1052524842831069,
"grad_norm": 0.58203125,
"learning_rate": 0.0002766372857930361,
"loss": 0.1246,
"step": 54500
},
{
"epoch": 1.1062664773879538,
"grad_norm": 0.498046875,
"learning_rate": 0.0002765833423697937,
"loss": 0.1143,
"step": 54550
},
{
"epoch": 1.1072804704928005,
"grad_norm": 0.435546875,
"learning_rate": 0.0002765293420139632,
"loss": 0.1126,
"step": 54600
},
{
"epoch": 1.1082944635976475,
"grad_norm": 0.578125,
"learning_rate": 0.00027647528474983204,
"loss": 0.1118,
"step": 54650
},
{
"epoch": 1.1093084567024944,
"grad_norm": 0.51171875,
"learning_rate": 0.0002764211706017132,
"loss": 0.1158,
"step": 54700
},
{
"epoch": 1.1103224498073414,
"grad_norm": 0.466796875,
"learning_rate": 0.0002763669995939453,
"loss": 0.1147,
"step": 54750
},
{
"epoch": 1.1113364429121881,
"grad_norm": 0.423828125,
"learning_rate": 0.00027631277175089254,
"loss": 0.1136,
"step": 54800
},
{
"epoch": 1.112350436017035,
"grad_norm": 0.51953125,
"learning_rate": 0.00027625848709694447,
"loss": 0.1218,
"step": 54850
},
{
"epoch": 1.113364429121882,
"grad_norm": 0.466796875,
"learning_rate": 0.00027620414565651647,
"loss": 0.115,
"step": 54900
},
{
"epoch": 1.1143784222267288,
"grad_norm": 0.484375,
"learning_rate": 0.00027614974745404934,
"loss": 0.1181,
"step": 54950
},
{
"epoch": 1.1153924153315757,
"grad_norm": 0.515625,
"learning_rate": 0.0002760952925140094,
"loss": 0.1186,
"step": 55000
},
{
"epoch": 1.1164064084364227,
"grad_norm": 0.51953125,
"learning_rate": 0.00027604078086088845,
"loss": 0.1163,
"step": 55050
},
{
"epoch": 1.1174204015412694,
"grad_norm": 0.53125,
"learning_rate": 0.000275986212519204,
"loss": 0.1153,
"step": 55100
},
{
"epoch": 1.1184343946461164,
"grad_norm": 0.431640625,
"learning_rate": 0.00027593158751349877,
"loss": 0.1123,
"step": 55150
},
{
"epoch": 1.1194483877509633,
"grad_norm": 0.5078125,
"learning_rate": 0.0002758769058683411,
"loss": 0.1121,
"step": 55200
},
{
"epoch": 1.1204623808558103,
"grad_norm": 0.80078125,
"learning_rate": 0.0002758221676083249,
"loss": 0.1217,
"step": 55250
},
{
"epoch": 1.121476373960657,
"grad_norm": 0.484375,
"learning_rate": 0.0002757673727580693,
"loss": 0.1168,
"step": 55300
},
{
"epoch": 1.122490367065504,
"grad_norm": 0.51171875,
"learning_rate": 0.00027571252134221924,
"loss": 0.1136,
"step": 55350
},
{
"epoch": 1.123504360170351,
"grad_norm": 0.451171875,
"learning_rate": 0.00027565761338544477,
"loss": 0.1174,
"step": 55400
},
{
"epoch": 1.1245183532751977,
"grad_norm": 0.5234375,
"learning_rate": 0.0002756026489124415,
"loss": 0.1198,
"step": 55450
},
{
"epoch": 1.1255323463800446,
"grad_norm": 0.4609375,
"learning_rate": 0.0002755476279479304,
"loss": 0.1197,
"step": 55500
},
{
"epoch": 1.1255323463800446,
"eval_cer": 0.03313270248196143,
"eval_loss": 0.17757254838943481,
"eval_runtime": 123.3499,
"eval_samples_per_second": 22.765,
"eval_steps_per_second": 0.178,
"eval_wer": 0.09929582233055725,
"step": 55500
},
{
"epoch": 1.1265463394848916,
"grad_norm": 0.4921875,
"learning_rate": 0.0002754925505166581,
"loss": 0.1192,
"step": 55550
},
{
"epoch": 1.1275603325897383,
"grad_norm": 0.478515625,
"learning_rate": 0.0002754374166433962,
"loss": 0.1162,
"step": 55600
},
{
"epoch": 1.1285743256945853,
"grad_norm": 0.455078125,
"learning_rate": 0.00027538222635294205,
"loss": 0.1115,
"step": 55650
},
{
"epoch": 1.1295883187994322,
"grad_norm": 0.48046875,
"learning_rate": 0.0002753269796701181,
"loss": 0.1084,
"step": 55700
},
{
"epoch": 1.130602311904279,
"grad_norm": 0.57421875,
"learning_rate": 0.00027527167661977247,
"loss": 0.1251,
"step": 55750
},
{
"epoch": 1.131616305009126,
"grad_norm": 0.494140625,
"learning_rate": 0.0002752163172267784,
"loss": 0.1282,
"step": 55800
},
{
"epoch": 1.1326302981139729,
"grad_norm": 0.51171875,
"learning_rate": 0.00027516090151603445,
"loss": 0.1097,
"step": 55850
},
{
"epoch": 1.1336442912188196,
"grad_norm": 0.47265625,
"learning_rate": 0.0002751054295124647,
"loss": 0.1191,
"step": 55900
},
{
"epoch": 1.1346582843236666,
"grad_norm": 0.55078125,
"learning_rate": 0.0002750499012410184,
"loss": 0.1082,
"step": 55950
},
{
"epoch": 1.1356722774285135,
"grad_norm": 0.462890625,
"learning_rate": 0.0002749943167266702,
"loss": 0.1229,
"step": 56000
},
{
"epoch": 1.1366862705333605,
"grad_norm": 0.4921875,
"learning_rate": 0.00027493867599441985,
"loss": 0.1107,
"step": 56050
},
{
"epoch": 1.1377002636382072,
"grad_norm": 0.56640625,
"learning_rate": 0.0002748829790692927,
"loss": 0.1143,
"step": 56100
},
{
"epoch": 1.1387142567430542,
"grad_norm": 0.5234375,
"learning_rate": 0.0002748272259763391,
"loss": 0.1139,
"step": 56150
},
{
"epoch": 1.139728249847901,
"grad_norm": 0.5234375,
"learning_rate": 0.00027477141674063483,
"loss": 0.1206,
"step": 56200
},
{
"epoch": 1.1407422429527478,
"grad_norm": 0.58203125,
"learning_rate": 0.00027471555138728075,
"loss": 0.1178,
"step": 56250
},
{
"epoch": 1.1417562360575948,
"grad_norm": 0.44921875,
"learning_rate": 0.0002746596299414032,
"loss": 0.1115,
"step": 56300
},
{
"epoch": 1.1427702291624418,
"grad_norm": 0.5234375,
"learning_rate": 0.00027460365242815355,
"loss": 0.1184,
"step": 56350
},
{
"epoch": 1.1437842222672885,
"grad_norm": 0.64453125,
"learning_rate": 0.00027454761887270844,
"loss": 0.1124,
"step": 56400
},
{
"epoch": 1.1447982153721354,
"grad_norm": 0.46484375,
"learning_rate": 0.00027449152930026976,
"loss": 0.1177,
"step": 56450
},
{
"epoch": 1.1458122084769824,
"grad_norm": 0.498046875,
"learning_rate": 0.00027443538373606453,
"loss": 0.1235,
"step": 56500
},
{
"epoch": 1.1468262015818294,
"grad_norm": 0.484375,
"learning_rate": 0.000274379182205345,
"loss": 0.1216,
"step": 56550
},
{
"epoch": 1.147840194686676,
"grad_norm": 0.55078125,
"learning_rate": 0.00027432292473338857,
"loss": 0.1152,
"step": 56600
},
{
"epoch": 1.148854187791523,
"grad_norm": 0.66796875,
"learning_rate": 0.0002742666113454978,
"loss": 0.1182,
"step": 56650
},
{
"epoch": 1.14986818089637,
"grad_norm": 0.515625,
"learning_rate": 0.0002742102420670004,
"loss": 0.1146,
"step": 56700
},
{
"epoch": 1.1508821740012167,
"grad_norm": 0.4609375,
"learning_rate": 0.0002741538169232493,
"loss": 0.1144,
"step": 56750
},
{
"epoch": 1.1518961671060637,
"grad_norm": 0.439453125,
"learning_rate": 0.00027409733593962237,
"loss": 0.1145,
"step": 56800
},
{
"epoch": 1.1529101602109106,
"grad_norm": 0.416015625,
"learning_rate": 0.0002740407991415228,
"loss": 0.1088,
"step": 56850
},
{
"epoch": 1.1539241533157574,
"grad_norm": 0.5390625,
"learning_rate": 0.00027398420655437865,
"loss": 0.114,
"step": 56900
},
{
"epoch": 1.1549381464206043,
"grad_norm": 0.484375,
"learning_rate": 0.0002739275582036434,
"loss": 0.1179,
"step": 56950
},
{
"epoch": 1.1559521395254513,
"grad_norm": 0.50390625,
"learning_rate": 0.0002738708541147953,
"loss": 0.1235,
"step": 57000
},
{
"epoch": 1.1559521395254513,
"eval_cer": 0.03642048203426158,
"eval_loss": 0.17667804658412933,
"eval_runtime": 121.0428,
"eval_samples_per_second": 23.198,
"eval_steps_per_second": 0.182,
"eval_wer": 0.10159794163450471,
"step": 57000
},
{
"epoch": 1.156966132630298,
"grad_norm": 0.46875,
"learning_rate": 0.0002738140943133378,
"loss": 0.1207,
"step": 57050
},
{
"epoch": 1.157980125735145,
"grad_norm": 0.55078125,
"learning_rate": 0.00027375727882479936,
"loss": 0.1203,
"step": 57100
},
{
"epoch": 1.158994118839992,
"grad_norm": 0.51171875,
"learning_rate": 0.0002737004076747336,
"loss": 0.1161,
"step": 57150
},
{
"epoch": 1.1600081119448387,
"grad_norm": 0.412109375,
"learning_rate": 0.0002736434808887191,
"loss": 0.1128,
"step": 57200
},
{
"epoch": 1.1610221050496856,
"grad_norm": 0.70703125,
"learning_rate": 0.0002735864984923594,
"loss": 0.1128,
"step": 57250
},
{
"epoch": 1.1620360981545326,
"grad_norm": 0.53515625,
"learning_rate": 0.0002735294605112831,
"loss": 0.1191,
"step": 57300
},
{
"epoch": 1.1630500912593795,
"grad_norm": 0.53515625,
"learning_rate": 0.0002734723669711439,
"loss": 0.1236,
"step": 57350
},
{
"epoch": 1.1640640843642263,
"grad_norm": 0.54296875,
"learning_rate": 0.0002734152178976203,
"loss": 0.1138,
"step": 57400
},
{
"epoch": 1.1650780774690732,
"grad_norm": 0.5546875,
"learning_rate": 0.0002733580133164159,
"loss": 0.136,
"step": 57450
},
{
"epoch": 1.1660920705739202,
"grad_norm": 0.49609375,
"learning_rate": 0.0002733007532532594,
"loss": 0.1121,
"step": 57500
},
{
"epoch": 1.167106063678767,
"grad_norm": 0.5,
"learning_rate": 0.0002732434377339041,
"loss": 0.1138,
"step": 57550
},
{
"epoch": 1.1681200567836139,
"grad_norm": 0.45703125,
"learning_rate": 0.0002731860667841285,
"loss": 0.1103,
"step": 57600
},
{
"epoch": 1.1691340498884608,
"grad_norm": 0.49609375,
"learning_rate": 0.0002731286404297359,
"loss": 0.1127,
"step": 57650
},
{
"epoch": 1.1701480429933078,
"grad_norm": 0.515625,
"learning_rate": 0.0002730711586965547,
"loss": 0.1166,
"step": 57700
},
{
"epoch": 1.1711620360981545,
"grad_norm": 0.56640625,
"learning_rate": 0.0002730136216104381,
"loss": 0.1127,
"step": 57750
},
{
"epoch": 1.1721760292030015,
"grad_norm": 0.59765625,
"learning_rate": 0.00027295602919726405,
"loss": 0.1094,
"step": 57800
},
{
"epoch": 1.1731900223078484,
"grad_norm": 0.482421875,
"learning_rate": 0.0002728983814829357,
"loss": 0.1173,
"step": 57850
},
{
"epoch": 1.1742040154126951,
"grad_norm": 0.50390625,
"learning_rate": 0.0002728406784933808,
"loss": 0.1172,
"step": 57900
},
{
"epoch": 1.175218008517542,
"grad_norm": 0.5,
"learning_rate": 0.00027278292025455204,
"loss": 0.1112,
"step": 57950
},
{
"epoch": 1.176232001622389,
"grad_norm": 0.515625,
"learning_rate": 0.00027272510679242704,
"loss": 0.1084,
"step": 58000
},
{
"epoch": 1.1772459947272358,
"grad_norm": 0.44140625,
"learning_rate": 0.00027266723813300814,
"loss": 0.1137,
"step": 58050
},
{
"epoch": 1.1782599878320827,
"grad_norm": 0.53515625,
"learning_rate": 0.0002726093143023226,
"loss": 0.1219,
"step": 58100
},
{
"epoch": 1.1792739809369297,
"grad_norm": 0.56640625,
"learning_rate": 0.00027255133532642245,
"loss": 0.1134,
"step": 58150
},
{
"epoch": 1.1802879740417764,
"grad_norm": 0.52734375,
"learning_rate": 0.0002724933012313845,
"loss": 0.1149,
"step": 58200
},
{
"epoch": 1.1813019671466234,
"grad_norm": 0.51171875,
"learning_rate": 0.00027243521204331044,
"loss": 0.1182,
"step": 58250
},
{
"epoch": 1.1823159602514703,
"grad_norm": 0.49609375,
"learning_rate": 0.00027237706778832657,
"loss": 0.1107,
"step": 58300
},
{
"epoch": 1.183329953356317,
"grad_norm": 0.51171875,
"learning_rate": 0.00027231886849258413,
"loss": 0.1106,
"step": 58350
},
{
"epoch": 1.184343946461164,
"grad_norm": 0.51171875,
"learning_rate": 0.00027226061418225906,
"loss": 0.1178,
"step": 58400
},
{
"epoch": 1.185357939566011,
"grad_norm": 0.474609375,
"learning_rate": 0.000272202304883552,
"loss": 0.1082,
"step": 58450
},
{
"epoch": 1.1863719326708577,
"grad_norm": 0.45703125,
"learning_rate": 0.0002721439406226884,
"loss": 0.1078,
"step": 58500
},
{
"epoch": 1.1863719326708577,
"eval_cer": 0.03631643837754322,
"eval_loss": 0.1754067838191986,
"eval_runtime": 118.1992,
"eval_samples_per_second": 23.757,
"eval_steps_per_second": 0.186,
"eval_wer": 0.09960051459137383,
"step": 58500
},
{
"epoch": 1.1873859257757047,
"grad_norm": 0.515625,
"learning_rate": 0.0002720855214259183,
"loss": 0.1299,
"step": 58550
},
{
"epoch": 1.1883999188805516,
"grad_norm": 0.45703125,
"learning_rate": 0.0002720270473195166,
"loss": 0.1295,
"step": 58600
},
{
"epoch": 1.1894139119853986,
"grad_norm": 0.490234375,
"learning_rate": 0.0002719685183297829,
"loss": 0.1291,
"step": 58650
},
{
"epoch": 1.1904279050902453,
"grad_norm": 0.5078125,
"learning_rate": 0.0002719099344830412,
"loss": 0.1129,
"step": 58700
},
{
"epoch": 1.1914418981950923,
"grad_norm": 0.48046875,
"learning_rate": 0.00027185129580564054,
"loss": 0.1265,
"step": 58750
},
{
"epoch": 1.1924558912999392,
"grad_norm": 0.458984375,
"learning_rate": 0.00027179260232395447,
"loss": 0.1219,
"step": 58800
},
{
"epoch": 1.193469884404786,
"grad_norm": 0.498046875,
"learning_rate": 0.00027173385406438114,
"loss": 0.1215,
"step": 58850
},
{
"epoch": 1.194483877509633,
"grad_norm": 0.47265625,
"learning_rate": 0.00027167505105334344,
"loss": 0.1101,
"step": 58900
},
{
"epoch": 1.1954978706144799,
"grad_norm": 0.5078125,
"learning_rate": 0.00027161619331728874,
"loss": 0.1064,
"step": 58950
},
{
"epoch": 1.1965118637193268,
"grad_norm": 0.4765625,
"learning_rate": 0.00027155728088268914,
"loss": 0.112,
"step": 59000
},
{
"epoch": 1.1975258568241736,
"grad_norm": 0.474609375,
"learning_rate": 0.00027149831377604135,
"loss": 0.1149,
"step": 59050
},
{
"epoch": 1.1985398499290205,
"grad_norm": 0.458984375,
"learning_rate": 0.00027143929202386657,
"loss": 0.107,
"step": 59100
},
{
"epoch": 1.1995538430338675,
"grad_norm": 0.4609375,
"learning_rate": 0.00027138021565271074,
"loss": 0.1205,
"step": 59150
},
{
"epoch": 1.2005678361387142,
"grad_norm": 0.484375,
"learning_rate": 0.00027132108468914414,
"loss": 0.1107,
"step": 59200
},
{
"epoch": 1.2015818292435612,
"grad_norm": 0.3984375,
"learning_rate": 0.0002712618991597618,
"loss": 0.1086,
"step": 59250
},
{
"epoch": 1.2025958223484081,
"grad_norm": 0.44140625,
"learning_rate": 0.0002712026590911833,
"loss": 0.1091,
"step": 59300
},
{
"epoch": 1.2036098154532548,
"grad_norm": 0.703125,
"learning_rate": 0.00027114336451005246,
"loss": 0.1192,
"step": 59350
},
{
"epoch": 1.2046238085581018,
"grad_norm": 0.486328125,
"learning_rate": 0.00027108401544303807,
"loss": 0.1189,
"step": 59400
},
{
"epoch": 1.2056378016629488,
"grad_norm": 0.453125,
"learning_rate": 0.000271024611916833,
"loss": 0.1178,
"step": 59450
},
{
"epoch": 1.2066517947677955,
"grad_norm": 0.474609375,
"learning_rate": 0.00027096515395815484,
"loss": 0.1118,
"step": 59500
},
{
"epoch": 1.2076657878726424,
"grad_norm": 0.52734375,
"learning_rate": 0.0002709056415937457,
"loss": 0.1199,
"step": 59550
},
{
"epoch": 1.2086797809774894,
"grad_norm": 0.470703125,
"learning_rate": 0.000270846074850372,
"loss": 0.1243,
"step": 59600
},
{
"epoch": 1.2096937740823361,
"grad_norm": 0.5234375,
"learning_rate": 0.0002707864537548247,
"loss": 0.1071,
"step": 59650
},
{
"epoch": 1.210707767187183,
"grad_norm": 0.46875,
"learning_rate": 0.00027072677833391923,
"loss": 0.1227,
"step": 59700
},
{
"epoch": 1.21172176029203,
"grad_norm": 0.4453125,
"learning_rate": 0.0002706670486144954,
"loss": 0.1176,
"step": 59750
},
{
"epoch": 1.2127357533968768,
"grad_norm": 0.50390625,
"learning_rate": 0.0002706072646234175,
"loss": 0.1136,
"step": 59800
},
{
"epoch": 1.2137497465017237,
"grad_norm": 0.72265625,
"learning_rate": 0.00027054742638757417,
"loss": 0.1143,
"step": 59850
},
{
"epoch": 1.2147637396065707,
"grad_norm": 0.38671875,
"learning_rate": 0.00027048753393387853,
"loss": 0.1238,
"step": 59900
},
{
"epoch": 1.2157777327114176,
"grad_norm": 0.453125,
"learning_rate": 0.00027042758728926795,
"loss": 0.1077,
"step": 59950
},
{
"epoch": 1.2167917258162644,
"grad_norm": 0.423828125,
"learning_rate": 0.0002703675864807043,
"loss": 0.1045,
"step": 60000
},
{
"epoch": 1.2167917258162644,
"eval_cer": 0.03392863645585688,
"eval_loss": 0.17490266263484955,
"eval_runtime": 118.1431,
"eval_samples_per_second": 23.768,
"eval_steps_per_second": 0.186,
"eval_wer": 0.09658744667885436,
"step": 60000
},
{
"epoch": 1.2178057189211113,
"grad_norm": 0.427734375,
"learning_rate": 0.0002703075315351738,
"loss": 0.1222,
"step": 60050
},
{
"epoch": 1.2188197120259583,
"grad_norm": 0.60546875,
"learning_rate": 0.00027024742247968686,
"loss": 0.1133,
"step": 60100
},
{
"epoch": 1.219833705130805,
"grad_norm": 0.5078125,
"learning_rate": 0.00027018725934127845,
"loss": 0.1125,
"step": 60150
},
{
"epoch": 1.220847698235652,
"grad_norm": 0.455078125,
"learning_rate": 0.0002701270421470077,
"loss": 0.1083,
"step": 60200
},
{
"epoch": 1.221861691340499,
"grad_norm": 0.447265625,
"learning_rate": 0.0002700667709239582,
"loss": 0.1113,
"step": 60250
},
{
"epoch": 1.222875684445346,
"grad_norm": 0.439453125,
"learning_rate": 0.0002700064456992377,
"loss": 0.1194,
"step": 60300
},
{
"epoch": 1.2238896775501926,
"grad_norm": 0.53125,
"learning_rate": 0.00026994606649997834,
"loss": 0.118,
"step": 60350
},
{
"epoch": 1.2249036706550396,
"grad_norm": 0.458984375,
"learning_rate": 0.0002698856333533364,
"loss": 0.1087,
"step": 60400
},
{
"epoch": 1.2259176637598865,
"grad_norm": 0.447265625,
"learning_rate": 0.00026982514628649254,
"loss": 0.1124,
"step": 60450
},
{
"epoch": 1.2269316568647333,
"grad_norm": 0.4765625,
"learning_rate": 0.00026976460532665173,
"loss": 0.1128,
"step": 60500
},
{
"epoch": 1.2279456499695802,
"grad_norm": 0.47265625,
"learning_rate": 0.000269704010501043,
"loss": 0.1093,
"step": 60550
},
{
"epoch": 1.2289596430744272,
"grad_norm": 0.5,
"learning_rate": 0.00026964336183691977,
"loss": 0.1239,
"step": 60600
},
{
"epoch": 1.229973636179274,
"grad_norm": 0.58203125,
"learning_rate": 0.0002695826593615596,
"loss": 0.1243,
"step": 60650
},
{
"epoch": 1.2309876292841209,
"grad_norm": 0.515625,
"learning_rate": 0.00026952190310226414,
"loss": 0.1211,
"step": 60700
},
{
"epoch": 1.2320016223889678,
"grad_norm": 0.50390625,
"learning_rate": 0.0002694610930863595,
"loss": 0.1103,
"step": 60750
},
{
"epoch": 1.2330156154938146,
"grad_norm": 0.470703125,
"learning_rate": 0.0002694002293411957,
"loss": 0.1077,
"step": 60800
},
{
"epoch": 1.2340296085986615,
"grad_norm": 0.5234375,
"learning_rate": 0.0002693393118941471,
"loss": 0.1185,
"step": 60850
},
{
"epoch": 1.2350436017035085,
"grad_norm": 0.46484375,
"learning_rate": 0.0002692783407726122,
"loss": 0.1284,
"step": 60900
},
{
"epoch": 1.2360575948083552,
"grad_norm": 0.453125,
"learning_rate": 0.00026921731600401357,
"loss": 0.1129,
"step": 60950
},
{
"epoch": 1.2370715879132022,
"grad_norm": 0.466796875,
"learning_rate": 0.00026915623761579786,
"loss": 0.1149,
"step": 61000
},
{
"epoch": 1.238085581018049,
"grad_norm": 1.1875,
"learning_rate": 0.00026909510563543596,
"loss": 0.1254,
"step": 61050
},
{
"epoch": 1.2390995741228958,
"grad_norm": 0.392578125,
"learning_rate": 0.0002690339200904228,
"loss": 0.1163,
"step": 61100
},
{
"epoch": 1.2401135672277428,
"grad_norm": 0.578125,
"learning_rate": 0.00026897268100827747,
"loss": 0.1157,
"step": 61150
},
{
"epoch": 1.2411275603325898,
"grad_norm": 0.80078125,
"learning_rate": 0.00026891138841654295,
"loss": 0.1151,
"step": 61200
},
{
"epoch": 1.2421415534374367,
"grad_norm": 0.54296875,
"learning_rate": 0.00026885004234278656,
"loss": 0.1227,
"step": 61250
},
{
"epoch": 1.2431555465422834,
"grad_norm": 0.443359375,
"learning_rate": 0.0002687886428145995,
"loss": 0.1091,
"step": 61300
},
{
"epoch": 1.2441695396471304,
"grad_norm": 0.490234375,
"learning_rate": 0.00026872718985959703,
"loss": 0.1171,
"step": 61350
},
{
"epoch": 1.2451835327519774,
"grad_norm": 0.56640625,
"learning_rate": 0.00026866568350541846,
"loss": 0.1073,
"step": 61400
},
{
"epoch": 1.246197525856824,
"grad_norm": 0.5390625,
"learning_rate": 0.0002686041237797271,
"loss": 0.1129,
"step": 61450
},
{
"epoch": 1.247211518961671,
"grad_norm": 0.498046875,
"learning_rate": 0.0002685425107102103,
"loss": 0.1057,
"step": 61500
},
{
"epoch": 1.247211518961671,
"eval_cer": 0.03003740369459025,
"eval_loss": 0.17543725669384003,
"eval_runtime": 129.7983,
"eval_samples_per_second": 21.634,
"eval_steps_per_second": 0.169,
"eval_wer": 0.09222019094048345,
"step": 61500
},
{
"epoch": 1.248225512066518,
"grad_norm": 0.44921875,
"learning_rate": 0.00026848084432457936,
"loss": 0.1111,
"step": 61550
},
{
"epoch": 1.249239505171365,
"grad_norm": 0.447265625,
"learning_rate": 0.00026841912465056964,
"loss": 0.1184,
"step": 61600
},
{
"epoch": 1.2502534982762117,
"grad_norm": 0.5234375,
"learning_rate": 0.00026835735171594027,
"loss": 0.1064,
"step": 61650
},
{
"epoch": 1.2512674913810586,
"grad_norm": 0.486328125,
"learning_rate": 0.00026829552554847463,
"loss": 0.1176,
"step": 61700
},
{
"epoch": 1.2522814844859056,
"grad_norm": 0.52734375,
"learning_rate": 0.00026823364617597974,
"loss": 0.1141,
"step": 61750
},
{
"epoch": 1.2532954775907523,
"grad_norm": 1.4375,
"learning_rate": 0.0002681717136262868,
"loss": 0.1298,
"step": 61800
},
{
"epoch": 1.2543094706955993,
"grad_norm": 0.46875,
"learning_rate": 0.0002681097279272508,
"loss": 0.1078,
"step": 61850
},
{
"epoch": 1.2553234638004462,
"grad_norm": 0.84765625,
"learning_rate": 0.00026804768910675066,
"loss": 0.1121,
"step": 61900
},
{
"epoch": 1.256337456905293,
"grad_norm": 0.482421875,
"learning_rate": 0.0002679855971926891,
"loss": 0.1138,
"step": 61950
},
{
"epoch": 1.25735145001014,
"grad_norm": 0.51953125,
"learning_rate": 0.00026792345221299295,
"loss": 0.1086,
"step": 62000
},
{
"epoch": 1.2583654431149869,
"grad_norm": 0.494140625,
"learning_rate": 0.0002678612541956127,
"loss": 0.1085,
"step": 62050
},
{
"epoch": 1.2593794362198336,
"grad_norm": 0.51171875,
"learning_rate": 0.00026779900316852267,
"loss": 0.1159,
"step": 62100
},
{
"epoch": 1.2603934293246806,
"grad_norm": 0.5390625,
"learning_rate": 0.00026773669915972125,
"loss": 0.1088,
"step": 62150
},
{
"epoch": 1.2614074224295275,
"grad_norm": 0.4375,
"learning_rate": 0.0002676743421972304,
"loss": 0.1102,
"step": 62200
},
{
"epoch": 1.2624214155343743,
"grad_norm": 0.482421875,
"learning_rate": 0.0002676119323090961,
"loss": 0.114,
"step": 62250
},
{
"epoch": 1.2634354086392212,
"grad_norm": 0.58203125,
"learning_rate": 0.000267549469523388,
"loss": 0.1156,
"step": 62300
},
{
"epoch": 1.2644494017440682,
"grad_norm": 0.515625,
"learning_rate": 0.00026748695386819964,
"loss": 0.1079,
"step": 62350
},
{
"epoch": 1.265463394848915,
"grad_norm": 0.44921875,
"learning_rate": 0.00026742438537164827,
"loss": 0.1083,
"step": 62400
},
{
"epoch": 1.2664773879537619,
"grad_norm": 0.73046875,
"learning_rate": 0.0002673617640618749,
"loss": 0.1233,
"step": 62450
},
{
"epoch": 1.2674913810586088,
"grad_norm": 0.56640625,
"learning_rate": 0.00026729908996704437,
"loss": 0.1105,
"step": 62500
},
{
"epoch": 1.2685053741634558,
"grad_norm": 0.51171875,
"learning_rate": 0.00026723636311534515,
"loss": 0.1129,
"step": 62550
},
{
"epoch": 1.2695193672683025,
"grad_norm": 0.458984375,
"learning_rate": 0.00026717358353498953,
"loss": 0.1131,
"step": 62600
},
{
"epoch": 1.2705333603731495,
"grad_norm": 0.55859375,
"learning_rate": 0.0002671107512542135,
"loss": 0.1277,
"step": 62650
},
{
"epoch": 1.2715473534779964,
"grad_norm": 0.474609375,
"learning_rate": 0.0002670478663012767,
"loss": 0.1089,
"step": 62700
},
{
"epoch": 1.2725613465828434,
"grad_norm": 0.4453125,
"learning_rate": 0.00026698492870446257,
"loss": 0.1256,
"step": 62750
},
{
"epoch": 1.27357533968769,
"grad_norm": 0.48828125,
"learning_rate": 0.0002669219384920781,
"loss": 0.1067,
"step": 62800
},
{
"epoch": 1.274589332792537,
"grad_norm": 0.5546875,
"learning_rate": 0.00026685889569245393,
"loss": 0.1146,
"step": 62850
},
{
"epoch": 1.275603325897384,
"grad_norm": 0.515625,
"learning_rate": 0.0002667958003339445,
"loss": 0.1066,
"step": 62900
},
{
"epoch": 1.2766173190022307,
"grad_norm": 0.48828125,
"learning_rate": 0.0002667326524449279,
"loss": 0.1214,
"step": 62950
},
{
"epoch": 1.2776313121070777,
"grad_norm": 0.466796875,
"learning_rate": 0.00026666945205380555,
"loss": 0.1071,
"step": 63000
},
{
"epoch": 1.2776313121070777,
"eval_cer": 0.03263329292971331,
"eval_loss": 0.1732511967420578,
"eval_runtime": 127.7415,
"eval_samples_per_second": 21.982,
"eval_steps_per_second": 0.172,
"eval_wer": 0.09442074615749205,
"step": 63000
},
{
"epoch": 1.2786453052119247,
"grad_norm": 0.474609375,
"learning_rate": 0.00026660619918900287,
"loss": 0.1134,
"step": 63050
},
{
"epoch": 1.2796592983167714,
"grad_norm": 0.458984375,
"learning_rate": 0.00026654289387896855,
"loss": 0.1119,
"step": 63100
},
{
"epoch": 1.2806732914216183,
"grad_norm": 0.51171875,
"learning_rate": 0.00026647953615217514,
"loss": 0.1172,
"step": 63150
},
{
"epoch": 1.2816872845264653,
"grad_norm": 0.4375,
"learning_rate": 0.00026641612603711857,
"loss": 0.1076,
"step": 63200
},
{
"epoch": 1.282701277631312,
"grad_norm": 0.458984375,
"learning_rate": 0.0002663526635623185,
"loss": 0.112,
"step": 63250
},
{
"epoch": 1.283715270736159,
"grad_norm": 0.6875,
"learning_rate": 0.00026628914875631793,
"loss": 0.1193,
"step": 63300
},
{
"epoch": 1.284729263841006,
"grad_norm": 0.5625,
"learning_rate": 0.0002662255816476836,
"loss": 0.1232,
"step": 63350
},
{
"epoch": 1.2857432569458527,
"grad_norm": 0.44140625,
"learning_rate": 0.0002661619622650056,
"loss": 0.1134,
"step": 63400
},
{
"epoch": 1.2867572500506996,
"grad_norm": 0.515625,
"learning_rate": 0.00026609829063689767,
"loss": 0.1161,
"step": 63450
},
{
"epoch": 1.2877712431555466,
"grad_norm": 0.44140625,
"learning_rate": 0.000266034566791997,
"loss": 0.1152,
"step": 63500
},
{
"epoch": 1.2887852362603933,
"grad_norm": 0.455078125,
"learning_rate": 0.0002659707907589643,
"loss": 0.1139,
"step": 63550
},
{
"epoch": 1.2897992293652403,
"grad_norm": 0.47265625,
"learning_rate": 0.0002659069625664836,
"loss": 0.113,
"step": 63600
},
{
"epoch": 1.2908132224700872,
"grad_norm": 0.50390625,
"learning_rate": 0.0002658430822432626,
"loss": 0.1143,
"step": 63650
},
{
"epoch": 1.291827215574934,
"grad_norm": 0.482421875,
"learning_rate": 0.0002657791498180323,
"loss": 0.1104,
"step": 63700
},
{
"epoch": 1.292841208679781,
"grad_norm": 0.4765625,
"learning_rate": 0.0002657151653195472,
"loss": 0.1082,
"step": 63750
},
{
"epoch": 1.2938552017846279,
"grad_norm": 0.439453125,
"learning_rate": 0.00026565112877658515,
"loss": 0.1084,
"step": 63800
},
{
"epoch": 1.2948691948894748,
"grad_norm": 0.5234375,
"learning_rate": 0.00026558704021794753,
"loss": 0.112,
"step": 63850
},
{
"epoch": 1.2958831879943216,
"grad_norm": 0.482421875,
"learning_rate": 0.00026552289967245906,
"loss": 0.1244,
"step": 63900
},
{
"epoch": 1.2968971810991685,
"grad_norm": 0.53125,
"learning_rate": 0.00026545870716896774,
"loss": 0.1187,
"step": 63950
},
{
"epoch": 1.2979111742040155,
"grad_norm": 0.53125,
"learning_rate": 0.0002653944627363451,
"loss": 0.1134,
"step": 64000
},
{
"epoch": 1.2989251673088624,
"grad_norm": 0.7109375,
"learning_rate": 0.0002653301664034859,
"loss": 0.1151,
"step": 64050
},
{
"epoch": 1.2999391604137092,
"grad_norm": 0.515625,
"learning_rate": 0.00026526581819930837,
"loss": 0.1206,
"step": 64100
},
{
"epoch": 1.3009531535185561,
"grad_norm": 0.423828125,
"learning_rate": 0.0002652014181527539,
"loss": 0.1081,
"step": 64150
},
{
"epoch": 1.301967146623403,
"grad_norm": 0.455078125,
"learning_rate": 0.00026513696629278734,
"loss": 0.105,
"step": 64200
},
{
"epoch": 1.3029811397282498,
"grad_norm": 0.66015625,
"learning_rate": 0.0002650724626483968,
"loss": 0.1237,
"step": 64250
},
{
"epoch": 1.3039951328330968,
"grad_norm": 0.5078125,
"learning_rate": 0.0002650079072485937,
"loss": 0.1159,
"step": 64300
},
{
"epoch": 1.3050091259379437,
"grad_norm": 0.482421875,
"learning_rate": 0.0002649433001224126,
"loss": 0.1145,
"step": 64350
},
{
"epoch": 1.3060231190427904,
"grad_norm": 0.6015625,
"learning_rate": 0.00026487864129891156,
"loss": 0.1206,
"step": 64400
},
{
"epoch": 1.3070371121476374,
"grad_norm": 0.466796875,
"learning_rate": 0.00026481393080717176,
"loss": 0.1213,
"step": 64450
},
{
"epoch": 1.3080511052524844,
"grad_norm": 0.609375,
"learning_rate": 0.00026474916867629753,
"loss": 0.1216,
"step": 64500
},
{
"epoch": 1.3080511052524844,
"eval_cer": 0.030984200970727318,
"eval_loss": 0.1738138198852539,
"eval_runtime": 127.5722,
"eval_samples_per_second": 22.011,
"eval_steps_per_second": 0.172,
"eval_wer": 0.09448845554878461,
"step": 64500
},
{
"epoch": 1.309065098357331,
"grad_norm": 0.458984375,
"learning_rate": 0.0002646843549354166,
"loss": 0.1128,
"step": 64550
},
{
"epoch": 1.310079091462178,
"grad_norm": 0.5234375,
"learning_rate": 0.00026461948961367985,
"loss": 0.1133,
"step": 64600
},
{
"epoch": 1.311093084567025,
"grad_norm": 0.4921875,
"learning_rate": 0.0002645545727402613,
"loss": 0.1129,
"step": 64650
},
{
"epoch": 1.3121070776718717,
"grad_norm": 0.474609375,
"learning_rate": 0.0002644896043443582,
"loss": 0.1043,
"step": 64700
},
{
"epoch": 1.3131210707767187,
"grad_norm": 0.4921875,
"learning_rate": 0.0002644245844551911,
"loss": 0.1278,
"step": 64750
},
{
"epoch": 1.3141350638815656,
"grad_norm": 0.55078125,
"learning_rate": 0.00026435951310200336,
"loss": 0.1126,
"step": 64800
},
{
"epoch": 1.3151490569864124,
"grad_norm": 0.4453125,
"learning_rate": 0.0002642943903140619,
"loss": 0.1189,
"step": 64850
},
{
"epoch": 1.3161630500912593,
"grad_norm": 0.46875,
"learning_rate": 0.00026422921612065647,
"loss": 0.1065,
"step": 64900
},
{
"epoch": 1.3171770431961063,
"grad_norm": 0.5546875,
"learning_rate": 0.0002641639905511001,
"loss": 0.1217,
"step": 64950
},
{
"epoch": 1.318191036300953,
"grad_norm": 0.431640625,
"learning_rate": 0.0002640987136347289,
"loss": 0.1134,
"step": 65000
},
{
"epoch": 1.3192050294058,
"grad_norm": 0.5078125,
"learning_rate": 0.00026403338540090195,
"loss": 0.1124,
"step": 65050
},
{
"epoch": 1.320219022510647,
"grad_norm": 0.490234375,
"learning_rate": 0.00026396800587900166,
"loss": 0.1141,
"step": 65100
},
{
"epoch": 1.3212330156154939,
"grad_norm": 0.478515625,
"learning_rate": 0.00026390257509843333,
"loss": 0.1168,
"step": 65150
},
{
"epoch": 1.3222470087203406,
"grad_norm": 0.51953125,
"learning_rate": 0.0002638370930886253,
"loss": 0.1317,
"step": 65200
},
{
"epoch": 1.3232610018251876,
"grad_norm": 0.5234375,
"learning_rate": 0.000263771559879029,
"loss": 0.1137,
"step": 65250
},
{
"epoch": 1.3242749949300345,
"grad_norm": 0.6796875,
"learning_rate": 0.0002637059754991189,
"loss": 0.1166,
"step": 65300
},
{
"epoch": 1.3252889880348815,
"grad_norm": 0.470703125,
"learning_rate": 0.0002636403399783925,
"loss": 0.105,
"step": 65350
},
{
"epoch": 1.3263029811397282,
"grad_norm": 0.494140625,
"learning_rate": 0.0002635746533463702,
"loss": 0.1048,
"step": 65400
},
{
"epoch": 1.3273169742445752,
"grad_norm": 0.470703125,
"learning_rate": 0.00026350891563259554,
"loss": 0.1124,
"step": 65450
},
{
"epoch": 1.3283309673494221,
"grad_norm": 0.478515625,
"learning_rate": 0.0002634431268666349,
"loss": 0.1062,
"step": 65500
},
{
"epoch": 1.3293449604542689,
"grad_norm": 0.4453125,
"learning_rate": 0.0002633772870780777,
"loss": 0.1224,
"step": 65550
},
{
"epoch": 1.3303589535591158,
"grad_norm": 0.466796875,
"learning_rate": 0.00026331139629653635,
"loss": 0.1156,
"step": 65600
},
{
"epoch": 1.3313729466639628,
"grad_norm": 0.4921875,
"learning_rate": 0.000263245454551646,
"loss": 0.1185,
"step": 65650
},
{
"epoch": 1.3323869397688095,
"grad_norm": 0.40234375,
"learning_rate": 0.0002631794618730649,
"loss": 0.112,
"step": 65700
},
{
"epoch": 1.3334009328736565,
"grad_norm": 0.4140625,
"learning_rate": 0.0002631134182904742,
"loss": 0.1193,
"step": 65750
},
{
"epoch": 1.3344149259785034,
"grad_norm": 0.51171875,
"learning_rate": 0.0002630473238335779,
"loss": 0.1049,
"step": 65800
},
{
"epoch": 1.3354289190833502,
"grad_norm": 0.48828125,
"learning_rate": 0.0002629811785321028,
"loss": 0.1129,
"step": 65850
},
{
"epoch": 1.336442912188197,
"grad_norm": 0.48046875,
"learning_rate": 0.00026291498241579883,
"loss": 0.1185,
"step": 65900
},
{
"epoch": 1.337456905293044,
"grad_norm": 0.458984375,
"learning_rate": 0.00026284873551443835,
"loss": 0.1077,
"step": 65950
},
{
"epoch": 1.3384708983978908,
"grad_norm": 0.4375,
"learning_rate": 0.000262782437857817,
"loss": 0.1071,
"step": 66000
},
{
"epoch": 1.3384708983978908,
"eval_cer": 0.02826345934754223,
"eval_loss": 0.17099952697753906,
"eval_runtime": 126.6833,
"eval_samples_per_second": 22.166,
"eval_steps_per_second": 0.174,
"eval_wer": 0.0911368406798023,
"step": 66000
},
{
"epoch": 1.3394848915027378,
"grad_norm": 0.46484375,
"learning_rate": 0.000262716089475753,
"loss": 0.116,
"step": 66050
},
{
"epoch": 1.3404988846075847,
"grad_norm": 0.5078125,
"learning_rate": 0.00026264969039808744,
"loss": 0.1075,
"step": 66100
},
{
"epoch": 1.3415128777124314,
"grad_norm": 0.478515625,
"learning_rate": 0.0002625832406546842,
"loss": 0.1159,
"step": 66150
},
{
"epoch": 1.3425268708172784,
"grad_norm": 0.486328125,
"learning_rate": 0.00026251674027543,
"loss": 0.1205,
"step": 66200
},
{
"epoch": 1.3435408639221254,
"grad_norm": 0.427734375,
"learning_rate": 0.00026245018929023427,
"loss": 0.1104,
"step": 66250
},
{
"epoch": 1.344554857026972,
"grad_norm": 0.52734375,
"learning_rate": 0.00026238358772902917,
"loss": 0.1065,
"step": 66300
},
{
"epoch": 1.345568850131819,
"grad_norm": 0.458984375,
"learning_rate": 0.00026231693562176974,
"loss": 0.1178,
"step": 66350
},
{
"epoch": 1.346582843236666,
"grad_norm": 0.5,
"learning_rate": 0.0002622502329984337,
"loss": 0.1141,
"step": 66400
},
{
"epoch": 1.347596836341513,
"grad_norm": 0.435546875,
"learning_rate": 0.0002621834798890214,
"loss": 0.1033,
"step": 66450
},
{
"epoch": 1.3486108294463597,
"grad_norm": 0.46875,
"learning_rate": 0.0002621166763235559,
"loss": 0.1063,
"step": 66500
},
{
"epoch": 1.3496248225512066,
"grad_norm": 0.53515625,
"learning_rate": 0.0002620498223320832,
"loss": 0.1157,
"step": 66550
},
{
"epoch": 1.3506388156560536,
"grad_norm": 0.462890625,
"learning_rate": 0.0002619829179446717,
"loss": 0.111,
"step": 66600
},
{
"epoch": 1.3516528087609005,
"grad_norm": 0.44140625,
"learning_rate": 0.00026191596319141247,
"loss": 0.109,
"step": 66650
},
{
"epoch": 1.3526668018657473,
"grad_norm": 0.482421875,
"learning_rate": 0.0002618489581024194,
"loss": 0.1123,
"step": 66700
},
{
"epoch": 1.3536807949705942,
"grad_norm": 0.435546875,
"learning_rate": 0.0002617819027078291,
"loss": 0.107,
"step": 66750
},
{
"epoch": 1.3546947880754412,
"grad_norm": 0.51953125,
"learning_rate": 0.00026171479703780037,
"loss": 0.1096,
"step": 66800
},
{
"epoch": 1.355708781180288,
"grad_norm": 0.578125,
"learning_rate": 0.00026164764112251505,
"loss": 0.1121,
"step": 66850
},
{
"epoch": 1.3567227742851349,
"grad_norm": 0.69921875,
"learning_rate": 0.0002615804349921775,
"loss": 0.1072,
"step": 66900
},
{
"epoch": 1.3577367673899818,
"grad_norm": 0.44140625,
"learning_rate": 0.0002615131786770145,
"loss": 0.1115,
"step": 66950
},
{
"epoch": 1.3587507604948286,
"grad_norm": 0.474609375,
"learning_rate": 0.00026144587220727544,
"loss": 0.1058,
"step": 67000
},
{
"epoch": 1.3597647535996755,
"grad_norm": 0.484375,
"learning_rate": 0.0002613785156132324,
"loss": 0.1184,
"step": 67050
},
{
"epoch": 1.3607787467045225,
"grad_norm": 0.3984375,
"learning_rate": 0.00026131110892518,
"loss": 0.1115,
"step": 67100
},
{
"epoch": 1.3617927398093692,
"grad_norm": 0.5078125,
"learning_rate": 0.0002612436521734352,
"loss": 0.1063,
"step": 67150
},
{
"epoch": 1.3628067329142162,
"grad_norm": 0.51953125,
"learning_rate": 0.0002611761453883376,
"loss": 0.1118,
"step": 67200
},
{
"epoch": 1.3638207260190631,
"grad_norm": 0.412109375,
"learning_rate": 0.0002611085886002494,
"loss": 0.1089,
"step": 67250
},
{
"epoch": 1.3648347191239099,
"grad_norm": 0.50390625,
"learning_rate": 0.00026104098183955506,
"loss": 0.1059,
"step": 67300
},
{
"epoch": 1.3658487122287568,
"grad_norm": 0.42578125,
"learning_rate": 0.00026097332513666174,
"loss": 0.1196,
"step": 67350
},
{
"epoch": 1.3668627053336038,
"grad_norm": 0.494140625,
"learning_rate": 0.000260905618521999,
"loss": 0.1123,
"step": 67400
},
{
"epoch": 1.3678766984384505,
"grad_norm": 0.462890625,
"learning_rate": 0.0002608378620260187,
"loss": 0.1106,
"step": 67450
},
{
"epoch": 1.3688906915432975,
"grad_norm": 0.56640625,
"learning_rate": 0.00026077005567919526,
"loss": 0.109,
"step": 67500
},
{
"epoch": 1.3688906915432975,
"eval_cer": 0.03368413386256874,
"eval_loss": 0.17319071292877197,
"eval_runtime": 129.0671,
"eval_samples_per_second": 21.756,
"eval_steps_per_second": 0.17,
"eval_wer": 0.09628275441803778,
"step": 67500
},
{
"epoch": 1.3699046846481444,
"grad_norm": 0.431640625,
"learning_rate": 0.00026070219951202563,
"loss": 0.1146,
"step": 67550
},
{
"epoch": 1.3709186777529911,
"grad_norm": 0.451171875,
"learning_rate": 0.00026063429355502896,
"loss": 0.1129,
"step": 67600
},
{
"epoch": 1.371932670857838,
"grad_norm": 0.49609375,
"learning_rate": 0.0002605663378387469,
"loss": 0.1059,
"step": 67650
},
{
"epoch": 1.372946663962685,
"grad_norm": 0.482421875,
"learning_rate": 0.00026049833239374346,
"loss": 0.1052,
"step": 67700
},
{
"epoch": 1.373960657067532,
"grad_norm": 0.4921875,
"learning_rate": 0.00026043027725060493,
"loss": 0.1215,
"step": 67750
},
{
"epoch": 1.374974650172379,
"grad_norm": 0.451171875,
"learning_rate": 0.00026036217243994024,
"loss": 0.123,
"step": 67800
},
{
"epoch": 1.3759886432772257,
"grad_norm": 0.50390625,
"learning_rate": 0.00026029401799238026,
"loss": 0.12,
"step": 67850
},
{
"epoch": 1.3770026363820727,
"grad_norm": 0.5234375,
"learning_rate": 0.00026022581393857853,
"loss": 0.1261,
"step": 67900
},
{
"epoch": 1.3780166294869196,
"grad_norm": 0.50390625,
"learning_rate": 0.00026015756030921066,
"loss": 0.1167,
"step": 67950
},
{
"epoch": 1.3790306225917663,
"grad_norm": 0.466796875,
"learning_rate": 0.0002600892571349747,
"loss": 0.115,
"step": 68000
},
{
"epoch": 1.3800446156966133,
"grad_norm": 0.48828125,
"learning_rate": 0.0002600209044465909,
"loss": 0.1067,
"step": 68050
},
{
"epoch": 1.3810586088014603,
"grad_norm": 0.5859375,
"learning_rate": 0.00025995250227480186,
"loss": 0.1135,
"step": 68100
},
{
"epoch": 1.382072601906307,
"grad_norm": 0.51953125,
"learning_rate": 0.0002598840506503724,
"loss": 0.1133,
"step": 68150
},
{
"epoch": 1.383086595011154,
"grad_norm": 0.52734375,
"learning_rate": 0.0002598155496040895,
"loss": 0.1121,
"step": 68200
},
{
"epoch": 1.384100588116001,
"grad_norm": 0.478515625,
"learning_rate": 0.00025974699916676245,
"loss": 0.1075,
"step": 68250
},
{
"epoch": 1.3851145812208476,
"grad_norm": 0.486328125,
"learning_rate": 0.0002596783993692228,
"loss": 0.1109,
"step": 68300
},
{
"epoch": 1.3861285743256946,
"grad_norm": 0.435546875,
"learning_rate": 0.00025960975024232427,
"loss": 0.1139,
"step": 68350
},
{
"epoch": 1.3871425674305415,
"grad_norm": 0.494140625,
"learning_rate": 0.00025954105181694273,
"loss": 0.1045,
"step": 68400
},
{
"epoch": 1.3881565605353883,
"grad_norm": 0.57421875,
"learning_rate": 0.0002594723041239762,
"loss": 0.1155,
"step": 68450
},
{
"epoch": 1.3891705536402352,
"grad_norm": 0.5234375,
"learning_rate": 0.00025940350719434486,
"loss": 0.1115,
"step": 68500
},
{
"epoch": 1.3901845467450822,
"grad_norm": 0.51171875,
"learning_rate": 0.0002593346610589911,
"loss": 0.112,
"step": 68550
},
{
"epoch": 1.391198539849929,
"grad_norm": 0.48828125,
"learning_rate": 0.0002592657657488794,
"loss": 0.124,
"step": 68600
},
{
"epoch": 1.3922125329547759,
"grad_norm": 0.48046875,
"learning_rate": 0.0002591968212949964,
"loss": 0.1315,
"step": 68650
},
{
"epoch": 1.3932265260596228,
"grad_norm": 0.51171875,
"learning_rate": 0.00025912782772835087,
"loss": 0.1083,
"step": 68700
},
{
"epoch": 1.3942405191644696,
"grad_norm": 0.43359375,
"learning_rate": 0.0002590587850799734,
"loss": 0.1172,
"step": 68750
},
{
"epoch": 1.3952545122693165,
"grad_norm": 0.470703125,
"learning_rate": 0.0002589896933809172,
"loss": 0.1109,
"step": 68800
},
{
"epoch": 1.3962685053741635,
"grad_norm": 0.478515625,
"learning_rate": 0.0002589205526622569,
"loss": 0.1145,
"step": 68850
},
{
"epoch": 1.3972824984790102,
"grad_norm": 0.45703125,
"learning_rate": 0.0002588513629550895,
"loss": 0.1126,
"step": 68900
},
{
"epoch": 1.3982964915838572,
"grad_norm": 0.515625,
"learning_rate": 0.00025878212429053423,
"loss": 0.1229,
"step": 68950
},
{
"epoch": 1.3993104846887041,
"grad_norm": 0.5078125,
"learning_rate": 0.000258712836699732,
"loss": 0.1069,
"step": 69000
},
{
"epoch": 1.3993104846887041,
"eval_cer": 0.031514823619990945,
"eval_loss": 0.17294800281524658,
"eval_runtime": 126.3316,
"eval_samples_per_second": 22.227,
"eval_steps_per_second": 0.174,
"eval_wer": 0.09462387433136976,
"step": 69000
},
{
"epoch": 1.400324477793551,
"grad_norm": 0.54296875,
"learning_rate": 0.0002586435002138458,
"loss": 0.111,
"step": 69050
},
{
"epoch": 1.401338470898398,
"grad_norm": 0.404296875,
"learning_rate": 0.0002585741148640607,
"loss": 0.1053,
"step": 69100
},
{
"epoch": 1.4023524640032448,
"grad_norm": 0.5390625,
"learning_rate": 0.0002585046806815838,
"loss": 0.1124,
"step": 69150
},
{
"epoch": 1.4033664571080917,
"grad_norm": 0.44921875,
"learning_rate": 0.0002584351976976439,
"loss": 0.1064,
"step": 69200
},
{
"epoch": 1.4043804502129387,
"grad_norm": 0.447265625,
"learning_rate": 0.00025836566594349207,
"loss": 0.111,
"step": 69250
},
{
"epoch": 1.4053944433177854,
"grad_norm": 0.484375,
"learning_rate": 0.00025829608545040113,
"loss": 0.1121,
"step": 69300
},
{
"epoch": 1.4064084364226324,
"grad_norm": 0.41015625,
"learning_rate": 0.00025822645624966583,
"loss": 0.1094,
"step": 69350
},
{
"epoch": 1.4074224295274793,
"grad_norm": 0.7265625,
"learning_rate": 0.00025815677837260286,
"loss": 0.1129,
"step": 69400
},
{
"epoch": 1.408436422632326,
"grad_norm": 0.5,
"learning_rate": 0.0002580870518505508,
"loss": 0.1138,
"step": 69450
},
{
"epoch": 1.409450415737173,
"grad_norm": 0.5390625,
"learning_rate": 0.0002580172767148701,
"loss": 0.1224,
"step": 69500
},
{
"epoch": 1.41046440884202,
"grad_norm": 0.5,
"learning_rate": 0.00025794745299694314,
"loss": 0.1087,
"step": 69550
},
{
"epoch": 1.4114784019468667,
"grad_norm": 0.451171875,
"learning_rate": 0.0002578775807281741,
"loss": 0.1081,
"step": 69600
},
{
"epoch": 1.4124923950517136,
"grad_norm": 0.62109375,
"learning_rate": 0.0002578076599399888,
"loss": 0.1153,
"step": 69650
},
{
"epoch": 1.4135063881565606,
"grad_norm": 0.5546875,
"learning_rate": 0.0002577376906638352,
"loss": 0.1224,
"step": 69700
},
{
"epoch": 1.4145203812614073,
"grad_norm": 0.50390625,
"learning_rate": 0.00025766767293118305,
"loss": 0.114,
"step": 69750
},
{
"epoch": 1.4155343743662543,
"grad_norm": 0.42578125,
"learning_rate": 0.0002575976067735236,
"loss": 0.1222,
"step": 69800
},
{
"epoch": 1.4165483674711012,
"grad_norm": 0.47265625,
"learning_rate": 0.00025752749222237014,
"loss": 0.1123,
"step": 69850
},
{
"epoch": 1.417562360575948,
"grad_norm": 0.4921875,
"learning_rate": 0.00025745732930925765,
"loss": 0.1068,
"step": 69900
},
{
"epoch": 1.418576353680795,
"grad_norm": 0.47265625,
"learning_rate": 0.0002573871180657428,
"loss": 0.1054,
"step": 69950
},
{
"epoch": 1.4195903467856419,
"grad_norm": 0.474609375,
"learning_rate": 0.00025731685852340417,
"loss": 0.1149,
"step": 70000
},
{
"epoch": 1.4206043398904886,
"grad_norm": 0.4609375,
"learning_rate": 0.00025724655071384186,
"loss": 0.1088,
"step": 70050
},
{
"epoch": 1.4216183329953356,
"grad_norm": 0.43359375,
"learning_rate": 0.0002571761946686778,
"loss": 0.1075,
"step": 70100
},
{
"epoch": 1.4226323261001825,
"grad_norm": 0.46484375,
"learning_rate": 0.0002571057904195556,
"loss": 0.1133,
"step": 70150
},
{
"epoch": 1.4236463192050295,
"grad_norm": 0.50390625,
"learning_rate": 0.00025703533799814046,
"loss": 0.1191,
"step": 70200
},
{
"epoch": 1.4246603123098762,
"grad_norm": 0.96875,
"learning_rate": 0.0002569648374361194,
"loss": 0.1081,
"step": 70250
},
{
"epoch": 1.4256743054147232,
"grad_norm": 0.458984375,
"learning_rate": 0.00025689428876520096,
"loss": 0.1192,
"step": 70300
},
{
"epoch": 1.4266882985195701,
"grad_norm": 0.486328125,
"learning_rate": 0.00025682369201711537,
"loss": 0.124,
"step": 70350
},
{
"epoch": 1.427702291624417,
"grad_norm": 0.44921875,
"learning_rate": 0.00025675304722361453,
"loss": 0.1125,
"step": 70400
},
{
"epoch": 1.4287162847292638,
"grad_norm": 0.56640625,
"learning_rate": 0.00025668235441647186,
"loss": 0.1115,
"step": 70450
},
{
"epoch": 1.4297302778341108,
"grad_norm": 0.484375,
"learning_rate": 0.00025661161362748244,
"loss": 0.1083,
"step": 70500
},
{
"epoch": 1.4297302778341108,
"eval_cer": 0.03154083453417054,
"eval_loss": 0.1698320358991623,
"eval_runtime": 128.2715,
"eval_samples_per_second": 21.891,
"eval_steps_per_second": 0.172,
"eval_wer": 0.09438689146184576,
"step": 70500
},
{
"epoch": 1.4307442709389577,
"grad_norm": 0.46484375,
"learning_rate": 0.0002565408248884629,
"loss": 0.1153,
"step": 70550
},
{
"epoch": 1.4317582640438045,
"grad_norm": 0.54296875,
"learning_rate": 0.0002564699882312514,
"loss": 0.1121,
"step": 70600
},
{
"epoch": 1.4327722571486514,
"grad_norm": 0.54296875,
"learning_rate": 0.00025639910368770776,
"loss": 0.1116,
"step": 70650
},
{
"epoch": 1.4337862502534984,
"grad_norm": 0.56640625,
"learning_rate": 0.00025632817128971326,
"loss": 0.1163,
"step": 70700
},
{
"epoch": 1.434800243358345,
"grad_norm": 0.50390625,
"learning_rate": 0.00025625719106917065,
"loss": 0.1139,
"step": 70750
},
{
"epoch": 1.435814236463192,
"grad_norm": 0.44140625,
"learning_rate": 0.00025618616305800437,
"loss": 0.1053,
"step": 70800
},
{
"epoch": 1.436828229568039,
"grad_norm": 0.4765625,
"learning_rate": 0.0002561150872881601,
"loss": 0.1142,
"step": 70850
},
{
"epoch": 1.4378422226728858,
"grad_norm": 0.443359375,
"learning_rate": 0.00025604396379160524,
"loss": 0.1098,
"step": 70900
},
{
"epoch": 1.4388562157777327,
"grad_norm": 0.4375,
"learning_rate": 0.00025597279260032856,
"loss": 0.1152,
"step": 70950
},
{
"epoch": 1.4398702088825797,
"grad_norm": 0.46484375,
"learning_rate": 0.0002559015737463402,
"loss": 0.1055,
"step": 71000
},
{
"epoch": 1.4408842019874264,
"grad_norm": 0.490234375,
"learning_rate": 0.00025583030726167186,
"loss": 0.1115,
"step": 71050
},
{
"epoch": 1.4418981950922733,
"grad_norm": 0.46875,
"learning_rate": 0.0002557589931783766,
"loss": 0.1056,
"step": 71100
},
{
"epoch": 1.4429121881971203,
"grad_norm": 0.5546875,
"learning_rate": 0.0002556876315285289,
"loss": 0.1037,
"step": 71150
},
{
"epoch": 1.443926181301967,
"grad_norm": 0.455078125,
"learning_rate": 0.00025561622234422475,
"loss": 0.1278,
"step": 71200
},
{
"epoch": 1.444940174406814,
"grad_norm": 0.546875,
"learning_rate": 0.0002555447656575812,
"loss": 0.1075,
"step": 71250
},
{
"epoch": 1.445954167511661,
"grad_norm": 0.5078125,
"learning_rate": 0.00025547326150073696,
"loss": 0.1063,
"step": 71300
},
{
"epoch": 1.4469681606165077,
"grad_norm": 0.482421875,
"learning_rate": 0.00025540170990585214,
"loss": 0.1095,
"step": 71350
},
{
"epoch": 1.4479821537213546,
"grad_norm": 0.431640625,
"learning_rate": 0.0002553301109051078,
"loss": 0.1155,
"step": 71400
},
{
"epoch": 1.4489961468262016,
"grad_norm": 0.431640625,
"learning_rate": 0.0002552584645307068,
"loss": 0.1011,
"step": 71450
},
{
"epoch": 1.4500101399310485,
"grad_norm": 0.498046875,
"learning_rate": 0.0002551867708148729,
"loss": 0.1079,
"step": 71500
},
{
"epoch": 1.4510241330358953,
"grad_norm": 0.69921875,
"learning_rate": 0.0002551150297898514,
"loss": 0.1103,
"step": 71550
},
{
"epoch": 1.4520381261407422,
"grad_norm": 0.443359375,
"learning_rate": 0.00025504324148790887,
"loss": 0.1136,
"step": 71600
},
{
"epoch": 1.4530521192455892,
"grad_norm": 0.486328125,
"learning_rate": 0.00025497140594133295,
"loss": 0.109,
"step": 71650
},
{
"epoch": 1.4540661123504361,
"grad_norm": 0.5625,
"learning_rate": 0.00025489952318243276,
"loss": 0.1051,
"step": 71700
},
{
"epoch": 1.4550801054552829,
"grad_norm": 0.5078125,
"learning_rate": 0.0002548275932435385,
"loss": 0.1064,
"step": 71750
},
{
"epoch": 1.4560940985601298,
"grad_norm": 0.486328125,
"learning_rate": 0.0002547556161570017,
"loss": 0.1107,
"step": 71800
},
{
"epoch": 1.4571080916649768,
"grad_norm": 0.49609375,
"learning_rate": 0.00025468359195519495,
"loss": 0.106,
"step": 71850
},
{
"epoch": 1.4581220847698235,
"grad_norm": 0.39453125,
"learning_rate": 0.0002546115206705122,
"loss": 0.1016,
"step": 71900
},
{
"epoch": 1.4591360778746705,
"grad_norm": 0.482421875,
"learning_rate": 0.0002545394023353685,
"loss": 0.1131,
"step": 71950
},
{
"epoch": 1.4601500709795174,
"grad_norm": 0.546875,
"learning_rate": 0.0002544672369821999,
"loss": 0.1045,
"step": 72000
},
{
"epoch": 1.4601500709795174,
"eval_cer": 0.030864550765501205,
"eval_loss": 0.16758893430233002,
"eval_runtime": 128.1097,
"eval_samples_per_second": 21.919,
"eval_steps_per_second": 0.172,
"eval_wer": 0.09211862685354459,
"step": 72000
},
{
"epoch": 1.4611640640843642,
"grad_norm": 0.4609375,
"learning_rate": 0.00025439502464346395,
"loss": 0.107,
"step": 72050
},
{
"epoch": 1.4621780571892111,
"grad_norm": 0.42578125,
"learning_rate": 0.0002543227653516391,
"loss": 0.1042,
"step": 72100
},
{
"epoch": 1.463192050294058,
"grad_norm": 0.5078125,
"learning_rate": 0.0002542504591392249,
"loss": 0.1144,
"step": 72150
},
{
"epoch": 1.4642060433989048,
"grad_norm": 0.421875,
"learning_rate": 0.000254178106038742,
"loss": 0.1123,
"step": 72200
},
{
"epoch": 1.4652200365037518,
"grad_norm": 0.5,
"learning_rate": 0.0002541057060827323,
"loss": 0.1156,
"step": 72250
},
{
"epoch": 1.4662340296085987,
"grad_norm": 0.55078125,
"learning_rate": 0.00025403325930375867,
"loss": 0.1076,
"step": 72300
},
{
"epoch": 1.4672480227134455,
"grad_norm": 0.515625,
"learning_rate": 0.000253960765734405,
"loss": 0.1211,
"step": 72350
},
{
"epoch": 1.4682620158182924,
"grad_norm": 0.443359375,
"learning_rate": 0.00025388822540727625,
"loss": 0.1082,
"step": 72400
},
{
"epoch": 1.4692760089231394,
"grad_norm": 0.55859375,
"learning_rate": 0.0002538156383549984,
"loss": 0.1055,
"step": 72450
},
{
"epoch": 1.470290002027986,
"grad_norm": 0.490234375,
"learning_rate": 0.0002537430046102186,
"loss": 0.1095,
"step": 72500
},
{
"epoch": 1.471303995132833,
"grad_norm": 0.53515625,
"learning_rate": 0.0002536703242056047,
"loss": 0.1101,
"step": 72550
},
{
"epoch": 1.47231798823768,
"grad_norm": 0.490234375,
"learning_rate": 0.00025359759717384585,
"loss": 0.1079,
"step": 72600
},
{
"epoch": 1.4733319813425267,
"grad_norm": 0.447265625,
"learning_rate": 0.00025352482354765193,
"loss": 0.106,
"step": 72650
},
{
"epoch": 1.4743459744473737,
"grad_norm": 0.53515625,
"learning_rate": 0.0002534520033597539,
"loss": 0.1101,
"step": 72700
},
{
"epoch": 1.4753599675522207,
"grad_norm": 0.455078125,
"learning_rate": 0.0002533791366429036,
"loss": 0.1158,
"step": 72750
},
{
"epoch": 1.4763739606570676,
"grad_norm": 0.474609375,
"learning_rate": 0.00025330622342987396,
"loss": 0.104,
"step": 72800
},
{
"epoch": 1.4773879537619143,
"grad_norm": 0.59375,
"learning_rate": 0.00025323326375345853,
"loss": 0.1024,
"step": 72850
},
{
"epoch": 1.4784019468667613,
"grad_norm": 0.44140625,
"learning_rate": 0.0002531602576464721,
"loss": 0.1044,
"step": 72900
},
{
"epoch": 1.4794159399716083,
"grad_norm": 0.53125,
"learning_rate": 0.00025308720514175,
"loss": 0.1053,
"step": 72950
},
{
"epoch": 1.4804299330764552,
"grad_norm": 0.455078125,
"learning_rate": 0.0002530141062721486,
"loss": 0.1079,
"step": 73000
},
{
"epoch": 1.481443926181302,
"grad_norm": 0.4765625,
"learning_rate": 0.00025294096107054525,
"loss": 0.1059,
"step": 73050
},
{
"epoch": 1.482457919286149,
"grad_norm": 0.435546875,
"learning_rate": 0.00025286776956983786,
"loss": 0.1017,
"step": 73100
},
{
"epoch": 1.4834719123909959,
"grad_norm": 0.55078125,
"learning_rate": 0.0002527945318029454,
"loss": 0.1073,
"step": 73150
},
{
"epoch": 1.4844859054958426,
"grad_norm": 0.4921875,
"learning_rate": 0.00025272124780280754,
"loss": 0.104,
"step": 73200
},
{
"epoch": 1.4854998986006895,
"grad_norm": 0.474609375,
"learning_rate": 0.0002526479176023847,
"loss": 0.1118,
"step": 73250
},
{
"epoch": 1.4865138917055365,
"grad_norm": 0.427734375,
"learning_rate": 0.00025257454123465826,
"loss": 0.1031,
"step": 73300
},
{
"epoch": 1.4875278848103832,
"grad_norm": 0.52734375,
"learning_rate": 0.0002525011187326301,
"loss": 0.1084,
"step": 73350
},
{
"epoch": 1.4885418779152302,
"grad_norm": 0.4609375,
"learning_rate": 0.0002524276501293231,
"loss": 0.1104,
"step": 73400
},
{
"epoch": 1.4895558710200771,
"grad_norm": 0.453125,
"learning_rate": 0.0002523541354577807,
"loss": 0.108,
"step": 73450
},
{
"epoch": 1.4905698641249239,
"grad_norm": 0.53125,
"learning_rate": 0.00025228057475106723,
"loss": 0.1221,
"step": 73500
},
{
"epoch": 1.4905698641249239,
"eval_cer": 0.03172291093342767,
"eval_loss": 0.16636300086975098,
"eval_runtime": 127.4384,
"eval_samples_per_second": 22.034,
"eval_steps_per_second": 0.173,
"eval_wer": 0.09469158372266233,
"step": 73500
},
{
"epoch": 1.4915838572297708,
"grad_norm": 0.466796875,
"learning_rate": 0.00025220696804226754,
"loss": 0.1057,
"step": 73550
},
{
"epoch": 1.4925978503346178,
"grad_norm": 0.484375,
"learning_rate": 0.0002521333153644872,
"loss": 0.1189,
"step": 73600
},
{
"epoch": 1.4936118434394645,
"grad_norm": 0.443359375,
"learning_rate": 0.00025205961675085265,
"loss": 0.11,
"step": 73650
},
{
"epoch": 1.4946258365443115,
"grad_norm": 0.4921875,
"learning_rate": 0.0002519858722345108,
"loss": 0.1086,
"step": 73700
},
{
"epoch": 1.4956398296491584,
"grad_norm": 0.43359375,
"learning_rate": 0.0002519120818486291,
"loss": 0.102,
"step": 73750
},
{
"epoch": 1.4966538227540052,
"grad_norm": 0.578125,
"learning_rate": 0.000251838245626396,
"loss": 0.0992,
"step": 73800
},
{
"epoch": 1.4976678158588521,
"grad_norm": 0.4921875,
"learning_rate": 0.0002517643636010203,
"loss": 0.1055,
"step": 73850
},
{
"epoch": 1.498681808963699,
"grad_norm": 0.458984375,
"learning_rate": 0.0002516904358057313,
"loss": 0.1112,
"step": 73900
},
{
"epoch": 1.4996958020685458,
"grad_norm": 0.52734375,
"learning_rate": 0.00025161646227377923,
"loss": 0.1111,
"step": 73950
},
{
"epoch": 1.500709795173393,
"grad_norm": 0.490234375,
"learning_rate": 0.0002515424430384346,
"loss": 0.1123,
"step": 74000
},
{
"epoch": 1.5017237882782397,
"grad_norm": 0.435546875,
"learning_rate": 0.00025146837813298854,
"loss": 0.1148,
"step": 74050
},
{
"epoch": 1.5027377813830864,
"grad_norm": 0.51171875,
"learning_rate": 0.00025139426759075294,
"loss": 0.1003,
"step": 74100
},
{
"epoch": 1.5037517744879336,
"grad_norm": 0.578125,
"learning_rate": 0.0002513201114450598,
"loss": 0.1031,
"step": 74150
},
{
"epoch": 1.5047657675927804,
"grad_norm": 0.447265625,
"learning_rate": 0.0002512459097292619,
"loss": 0.1107,
"step": 74200
},
{
"epoch": 1.505779760697627,
"grad_norm": 0.478515625,
"learning_rate": 0.00025117166247673255,
"loss": 0.1146,
"step": 74250
},
{
"epoch": 1.5067937538024743,
"grad_norm": 0.47265625,
"learning_rate": 0.00025109736972086556,
"loss": 0.1065,
"step": 74300
},
{
"epoch": 1.507807746907321,
"grad_norm": 0.486328125,
"learning_rate": 0.0002510230314950749,
"loss": 0.1019,
"step": 74350
},
{
"epoch": 1.508821740012168,
"grad_norm": 0.494140625,
"learning_rate": 0.0002509486478327953,
"loss": 0.1144,
"step": 74400
},
{
"epoch": 1.509835733117015,
"grad_norm": 0.458984375,
"learning_rate": 0.00025087421876748183,
"loss": 0.1062,
"step": 74450
},
{
"epoch": 1.5108497262218616,
"grad_norm": 0.55078125,
"learning_rate": 0.00025079974433260993,
"loss": 0.1161,
"step": 74500
},
{
"epoch": 1.5118637193267086,
"grad_norm": 0.54296875,
"learning_rate": 0.0002507252245616756,
"loss": 0.11,
"step": 74550
},
{
"epoch": 1.5128777124315556,
"grad_norm": 0.41796875,
"learning_rate": 0.00025065065948819506,
"loss": 0.101,
"step": 74600
},
{
"epoch": 1.5138917055364023,
"grad_norm": 0.5234375,
"learning_rate": 0.000250576049145705,
"loss": 0.1134,
"step": 74650
},
{
"epoch": 1.5149056986412492,
"grad_norm": 0.55859375,
"learning_rate": 0.0002505013935677624,
"loss": 0.1045,
"step": 74700
},
{
"epoch": 1.5159196917460962,
"grad_norm": 0.427734375,
"learning_rate": 0.0002504266927879446,
"loss": 0.1068,
"step": 74750
},
{
"epoch": 1.516933684850943,
"grad_norm": 0.443359375,
"learning_rate": 0.0002503519468398495,
"loss": 0.1057,
"step": 74800
},
{
"epoch": 1.5179476779557899,
"grad_norm": 0.50390625,
"learning_rate": 0.00025027715575709475,
"loss": 0.1029,
"step": 74850
},
{
"epoch": 1.5189616710606368,
"grad_norm": 0.56640625,
"learning_rate": 0.00025020231957331897,
"loss": 0.1096,
"step": 74900
},
{
"epoch": 1.5199756641654836,
"grad_norm": 0.44140625,
"learning_rate": 0.0002501274383221806,
"loss": 0.1098,
"step": 74950
},
{
"epoch": 1.5209896572703305,
"grad_norm": 0.609375,
"learning_rate": 0.0002500525120373586,
"loss": 0.1069,
"step": 75000
},
{
"epoch": 1.5209896572703305,
"eval_cer": 0.031192288284164035,
"eval_loss": 0.16423167288303375,
"eval_runtime": 132.6118,
"eval_samples_per_second": 21.175,
"eval_steps_per_second": 0.166,
"eval_wer": 0.09225404563612973,
"step": 75000
},
{
"epoch": 1.5220036503751775,
"grad_norm": 0.4609375,
"learning_rate": 0.0002499775407525521,
"loss": 0.1113,
"step": 75050
},
{
"epoch": 1.5230176434800242,
"grad_norm": 0.56640625,
"learning_rate": 0.0002499025245014803,
"loss": 0.103,
"step": 75100
},
{
"epoch": 1.5240316365848712,
"grad_norm": 0.466796875,
"learning_rate": 0.00024982746331788297,
"loss": 0.1089,
"step": 75150
},
{
"epoch": 1.5250456296897181,
"grad_norm": 0.45703125,
"learning_rate": 0.00024975235723551976,
"loss": 0.1048,
"step": 75200
},
{
"epoch": 1.5260596227945649,
"grad_norm": 0.44140625,
"learning_rate": 0.00024967720628817075,
"loss": 0.1116,
"step": 75250
},
{
"epoch": 1.527073615899412,
"grad_norm": 0.484375,
"learning_rate": 0.0002496020105096361,
"loss": 0.1109,
"step": 75300
},
{
"epoch": 1.5280876090042588,
"grad_norm": 0.498046875,
"learning_rate": 0.0002495267699337361,
"loss": 0.1092,
"step": 75350
},
{
"epoch": 1.5291016021091055,
"grad_norm": 0.5390625,
"learning_rate": 0.0002494514845943112,
"loss": 0.1077,
"step": 75400
},
{
"epoch": 1.5301155952139527,
"grad_norm": 0.451171875,
"learning_rate": 0.0002493761545252221,
"loss": 0.1145,
"step": 75450
},
{
"epoch": 1.5311295883187994,
"grad_norm": 0.56640625,
"learning_rate": 0.00024930077976034943,
"loss": 0.1142,
"step": 75500
},
{
"epoch": 1.5321435814236464,
"grad_norm": 0.490234375,
"learning_rate": 0.0002492253603335942,
"loss": 0.1109,
"step": 75550
},
{
"epoch": 1.5331575745284933,
"grad_norm": 0.5390625,
"learning_rate": 0.00024914989627887707,
"loss": 0.1086,
"step": 75600
},
{
"epoch": 1.53417156763334,
"grad_norm": 0.49609375,
"learning_rate": 0.0002490743876301392,
"loss": 0.1249,
"step": 75650
},
{
"epoch": 1.535185560738187,
"grad_norm": 0.486328125,
"learning_rate": 0.0002489988344213416,
"loss": 0.1147,
"step": 75700
},
{
"epoch": 1.536199553843034,
"grad_norm": 0.482421875,
"learning_rate": 0.00024892323668646536,
"loss": 0.1007,
"step": 75750
},
{
"epoch": 1.5372135469478807,
"grad_norm": 0.451171875,
"learning_rate": 0.0002488475944595116,
"loss": 0.1163,
"step": 75800
},
{
"epoch": 1.5382275400527277,
"grad_norm": 0.447265625,
"learning_rate": 0.0002487719077745014,
"loss": 0.1034,
"step": 75850
},
{
"epoch": 1.5392415331575746,
"grad_norm": 0.4453125,
"learning_rate": 0.00024869617666547594,
"loss": 0.1084,
"step": 75900
},
{
"epoch": 1.5402555262624213,
"grad_norm": 0.55859375,
"learning_rate": 0.00024862040116649625,
"loss": 0.1069,
"step": 75950
},
{
"epoch": 1.5412695193672683,
"grad_norm": 0.51953125,
"learning_rate": 0.00024854458131164347,
"loss": 0.108,
"step": 76000
},
{
"epoch": 1.5422835124721153,
"grad_norm": 0.47265625,
"learning_rate": 0.00024846871713501864,
"loss": 0.1068,
"step": 76050
},
{
"epoch": 1.543297505576962,
"grad_norm": 0.50390625,
"learning_rate": 0.00024839280867074257,
"loss": 0.1014,
"step": 76100
},
{
"epoch": 1.544311498681809,
"grad_norm": 0.55078125,
"learning_rate": 0.0002483168559529563,
"loss": 0.1049,
"step": 76150
},
{
"epoch": 1.545325491786656,
"grad_norm": 0.53515625,
"learning_rate": 0.0002482408590158205,
"loss": 0.1043,
"step": 76200
},
{
"epoch": 1.5463394848915026,
"grad_norm": 0.474609375,
"learning_rate": 0.0002481648178935158,
"loss": 0.1031,
"step": 76250
},
{
"epoch": 1.5473534779963496,
"grad_norm": 0.455078125,
"learning_rate": 0.00024808873262024283,
"loss": 0.1089,
"step": 76300
},
{
"epoch": 1.5483674711011965,
"grad_norm": 0.5,
"learning_rate": 0.0002480126032302219,
"loss": 0.1165,
"step": 76350
},
{
"epoch": 1.5493814642060433,
"grad_norm": 0.41796875,
"learning_rate": 0.00024793642975769333,
"loss": 0.112,
"step": 76400
},
{
"epoch": 1.5503954573108902,
"grad_norm": 0.51171875,
"learning_rate": 0.0002478602122369171,
"loss": 0.1002,
"step": 76450
},
{
"epoch": 1.5514094504157372,
"grad_norm": 0.4609375,
"learning_rate": 0.0002477839507021731,
"loss": 0.1097,
"step": 76500
},
{
"epoch": 1.5514094504157372,
"eval_cer": 0.02821143751918305,
"eval_loss": 0.16545580327510834,
"eval_runtime": 130.6901,
"eval_samples_per_second": 21.486,
"eval_steps_per_second": 0.168,
"eval_wer": 0.08995192633218227,
"step": 76500
},
{
"epoch": 1.552423443520584,
"grad_norm": 0.44140625,
"learning_rate": 0.000247707645187761,
"loss": 0.1089,
"step": 76550
},
{
"epoch": 1.553437436625431,
"grad_norm": 0.515625,
"learning_rate": 0.0002476312957280003,
"loss": 0.1065,
"step": 76600
},
{
"epoch": 1.5544514297302778,
"grad_norm": 0.486328125,
"learning_rate": 0.00024755490235723015,
"loss": 0.1152,
"step": 76650
},
{
"epoch": 1.5554654228351246,
"grad_norm": 0.578125,
"learning_rate": 0.00024747846510980953,
"loss": 0.1117,
"step": 76700
},
{
"epoch": 1.5564794159399717,
"grad_norm": 0.53125,
"learning_rate": 0.0002474019840201172,
"loss": 0.1045,
"step": 76750
},
{
"epoch": 1.5574934090448185,
"grad_norm": 0.7421875,
"learning_rate": 0.0002473254591225514,
"loss": 0.1048,
"step": 76800
},
{
"epoch": 1.5585074021496654,
"grad_norm": 0.45703125,
"learning_rate": 0.0002472488904515304,
"loss": 0.111,
"step": 76850
},
{
"epoch": 1.5595213952545124,
"grad_norm": 0.494140625,
"learning_rate": 0.00024717227804149205,
"loss": 0.1055,
"step": 76900
},
{
"epoch": 1.5605353883593591,
"grad_norm": 0.466796875,
"learning_rate": 0.00024709562192689363,
"loss": 0.1107,
"step": 76950
},
{
"epoch": 1.561549381464206,
"grad_norm": 0.56640625,
"learning_rate": 0.00024701892214221247,
"loss": 0.1081,
"step": 77000
},
{
"epoch": 1.562563374569053,
"grad_norm": 0.5,
"learning_rate": 0.0002469421787219452,
"loss": 0.1189,
"step": 77050
},
{
"epoch": 1.5635773676738998,
"grad_norm": 0.51953125,
"learning_rate": 0.00024686539170060834,
"loss": 0.1131,
"step": 77100
},
{
"epoch": 1.5645913607787467,
"grad_norm": 0.478515625,
"learning_rate": 0.0002467885611127379,
"loss": 0.1194,
"step": 77150
},
{
"epoch": 1.5656053538835937,
"grad_norm": 0.380859375,
"learning_rate": 0.0002467116869928894,
"loss": 0.1119,
"step": 77200
},
{
"epoch": 1.5666193469884404,
"grad_norm": 0.5,
"learning_rate": 0.00024663476937563804,
"loss": 0.1053,
"step": 77250
},
{
"epoch": 1.5676333400932874,
"grad_norm": 0.51171875,
"learning_rate": 0.0002465578082955786,
"loss": 0.1016,
"step": 77300
},
{
"epoch": 1.5686473331981343,
"grad_norm": 0.4296875,
"learning_rate": 0.00024648080378732537,
"loss": 0.1087,
"step": 77350
},
{
"epoch": 1.569661326302981,
"grad_norm": 0.515625,
"learning_rate": 0.00024640375588551213,
"loss": 0.1133,
"step": 77400
},
{
"epoch": 1.570675319407828,
"grad_norm": 0.52734375,
"learning_rate": 0.00024632666462479243,
"loss": 0.1123,
"step": 77450
},
{
"epoch": 1.571689312512675,
"grad_norm": 0.421875,
"learning_rate": 0.0002462495300398388,
"loss": 0.1154,
"step": 77500
},
{
"epoch": 1.5727033056175217,
"grad_norm": 0.435546875,
"learning_rate": 0.00024617235216534383,
"loss": 0.1068,
"step": 77550
},
{
"epoch": 1.5737172987223687,
"grad_norm": 0.51171875,
"learning_rate": 0.00024609513103601927,
"loss": 0.1102,
"step": 77600
},
{
"epoch": 1.5747312918272156,
"grad_norm": 0.498046875,
"learning_rate": 0.00024601786668659626,
"loss": 0.1027,
"step": 77650
},
{
"epoch": 1.5757452849320623,
"grad_norm": 0.470703125,
"learning_rate": 0.0002459405591518256,
"loss": 0.103,
"step": 77700
},
{
"epoch": 1.5767592780369093,
"grad_norm": 0.55078125,
"learning_rate": 0.0002458632084664774,
"loss": 0.0998,
"step": 77750
},
{
"epoch": 1.5777732711417563,
"grad_norm": 0.4609375,
"learning_rate": 0.0002457858146653412,
"loss": 0.1073,
"step": 77800
},
{
"epoch": 1.578787264246603,
"grad_norm": 0.458984375,
"learning_rate": 0.00024570837778322584,
"loss": 0.1014,
"step": 77850
},
{
"epoch": 1.5798012573514502,
"grad_norm": 0.451171875,
"learning_rate": 0.0002456308978549597,
"loss": 0.1076,
"step": 77900
},
{
"epoch": 1.580815250456297,
"grad_norm": 0.5234375,
"learning_rate": 0.0002455533749153904,
"loss": 0.1071,
"step": 77950
},
{
"epoch": 1.5818292435611436,
"grad_norm": 0.453125,
"learning_rate": 0.000245475808999385,
"loss": 0.1008,
"step": 78000
},
{
"epoch": 1.5818292435611436,
"eval_cer": 0.027639197407232075,
"eval_loss": 0.16488178074359894,
"eval_runtime": 127.6157,
"eval_samples_per_second": 22.004,
"eval_steps_per_second": 0.172,
"eval_wer": 0.08788678989775882,
"step": 78000
},
{
"epoch": 1.5828432366659908,
"grad_norm": 0.470703125,
"learning_rate": 0.0002453982001418298,
"loss": 0.1016,
"step": 78050
},
{
"epoch": 1.5838572297708375,
"grad_norm": 0.451171875,
"learning_rate": 0.0002453205483776303,
"loss": 0.1195,
"step": 78100
},
{
"epoch": 1.5848712228756845,
"grad_norm": 0.57421875,
"learning_rate": 0.00024524285374171167,
"loss": 0.112,
"step": 78150
},
{
"epoch": 1.5858852159805314,
"grad_norm": 0.5078125,
"learning_rate": 0.000245165116269018,
"loss": 0.1045,
"step": 78200
},
{
"epoch": 1.5868992090853782,
"grad_norm": 0.4375,
"learning_rate": 0.0002450873359945128,
"loss": 0.1025,
"step": 78250
},
{
"epoch": 1.5879132021902251,
"grad_norm": 0.5078125,
"learning_rate": 0.0002450095129531788,
"loss": 0.105,
"step": 78300
},
{
"epoch": 1.588927195295072,
"grad_norm": 0.609375,
"learning_rate": 0.0002449316471800181,
"loss": 0.105,
"step": 78350
},
{
"epoch": 1.5899411883999188,
"grad_norm": 0.41796875,
"learning_rate": 0.00024485373871005174,
"loss": 0.1076,
"step": 78400
},
{
"epoch": 1.5909551815047658,
"grad_norm": 0.546875,
"learning_rate": 0.00024477578757832013,
"loss": 0.1097,
"step": 78450
},
{
"epoch": 1.5919691746096127,
"grad_norm": 0.474609375,
"learning_rate": 0.000244697793819883,
"loss": 0.0967,
"step": 78500
},
{
"epoch": 1.5929831677144595,
"grad_norm": 0.50390625,
"learning_rate": 0.0002446197574698189,
"loss": 0.1034,
"step": 78550
},
{
"epoch": 1.5939971608193064,
"grad_norm": 0.478515625,
"learning_rate": 0.000244541678563226,
"loss": 0.1228,
"step": 78600
},
{
"epoch": 1.5950111539241534,
"grad_norm": 0.55078125,
"learning_rate": 0.0002444635571352211,
"loss": 0.1063,
"step": 78650
},
{
"epoch": 1.5960251470290001,
"grad_norm": 0.46875,
"learning_rate": 0.00024438539322094047,
"loss": 0.1098,
"step": 78700
},
{
"epoch": 1.597039140133847,
"grad_norm": 0.4921875,
"learning_rate": 0.0002443071868555395,
"loss": 0.1023,
"step": 78750
},
{
"epoch": 1.598053133238694,
"grad_norm": 0.5,
"learning_rate": 0.0002442289380741924,
"loss": 0.1072,
"step": 78800
},
{
"epoch": 1.5990671263435408,
"grad_norm": 0.5078125,
"learning_rate": 0.00024415064691209274,
"loss": 0.1055,
"step": 78850
},
{
"epoch": 1.6000811194483877,
"grad_norm": 0.45703125,
"learning_rate": 0.00024407231340445307,
"loss": 0.1113,
"step": 78900
},
{
"epoch": 1.6010951125532347,
"grad_norm": 0.52734375,
"learning_rate": 0.00024399393758650493,
"loss": 0.1024,
"step": 78950
},
{
"epoch": 1.6021091056580814,
"grad_norm": 0.48828125,
"learning_rate": 0.00024391551949349882,
"loss": 0.1129,
"step": 79000
},
{
"epoch": 1.6031230987629286,
"grad_norm": 0.474609375,
"learning_rate": 0.00024383705916070444,
"loss": 0.0993,
"step": 79050
},
{
"epoch": 1.6041370918677753,
"grad_norm": 0.50390625,
"learning_rate": 0.0002437585566234104,
"loss": 0.1073,
"step": 79100
},
{
"epoch": 1.605151084972622,
"grad_norm": 0.439453125,
"learning_rate": 0.00024368001191692423,
"loss": 0.1026,
"step": 79150
},
{
"epoch": 1.6061650780774692,
"grad_norm": 0.412109375,
"learning_rate": 0.00024360142507657255,
"loss": 0.1002,
"step": 79200
},
{
"epoch": 1.607179071182316,
"grad_norm": 0.515625,
"learning_rate": 0.00024352279613770083,
"loss": 0.1042,
"step": 79250
},
{
"epoch": 1.6081930642871627,
"grad_norm": 0.53125,
"learning_rate": 0.00024344412513567347,
"loss": 0.1088,
"step": 79300
},
{
"epoch": 1.6092070573920099,
"grad_norm": 0.486328125,
"learning_rate": 0.00024336541210587392,
"loss": 0.105,
"step": 79350
},
{
"epoch": 1.6102210504968566,
"grad_norm": 0.56640625,
"learning_rate": 0.00024328665708370437,
"loss": 0.1089,
"step": 79400
},
{
"epoch": 1.6112350436017036,
"grad_norm": 0.4140625,
"learning_rate": 0.000243207860104586,
"loss": 0.1117,
"step": 79450
},
{
"epoch": 1.6122490367065505,
"grad_norm": 0.466796875,
"learning_rate": 0.00024312902120395883,
"loss": 0.1057,
"step": 79500
},
{
"epoch": 1.6122490367065505,
"eval_cer": 0.031676091287904405,
"eval_loss": 0.16220040619373322,
"eval_runtime": 128.8141,
"eval_samples_per_second": 21.799,
"eval_steps_per_second": 0.171,
"eval_wer": 0.09296499424470174,
"step": 79500
},
{
"epoch": 1.6132630298113972,
"grad_norm": 0.515625,
"learning_rate": 0.00024305014041728172,
"loss": 0.105,
"step": 79550
},
{
"epoch": 1.6142770229162442,
"grad_norm": 0.4296875,
"learning_rate": 0.00024297121778003237,
"loss": 0.1134,
"step": 79600
},
{
"epoch": 1.6152910160210912,
"grad_norm": 0.5234375,
"learning_rate": 0.00024289225332770737,
"loss": 0.112,
"step": 79650
},
{
"epoch": 1.6163050091259379,
"grad_norm": 0.451171875,
"learning_rate": 0.00024281324709582195,
"loss": 0.1129,
"step": 79700
},
{
"epoch": 1.6173190022307848,
"grad_norm": 0.400390625,
"learning_rate": 0.00024273419911991035,
"loss": 0.1032,
"step": 79750
},
{
"epoch": 1.6183329953356318,
"grad_norm": 0.5703125,
"learning_rate": 0.00024265510943552543,
"loss": 0.1057,
"step": 79800
},
{
"epoch": 1.6193469884404785,
"grad_norm": 0.462890625,
"learning_rate": 0.00024257597807823887,
"loss": 0.1124,
"step": 79850
},
{
"epoch": 1.6203609815453255,
"grad_norm": 0.482421875,
"learning_rate": 0.00024249680508364111,
"loss": 0.1129,
"step": 79900
},
{
"epoch": 1.6213749746501724,
"grad_norm": 0.419921875,
"learning_rate": 0.00024241759048734124,
"loss": 0.1016,
"step": 79950
},
{
"epoch": 1.6223889677550192,
"grad_norm": 0.486328125,
"learning_rate": 0.0002423383343249671,
"loss": 0.1117,
"step": 80000
},
{
"epoch": 1.6234029608598661,
"grad_norm": 0.431640625,
"learning_rate": 0.00024225903663216525,
"loss": 0.1155,
"step": 80050
},
{
"epoch": 1.624416953964713,
"grad_norm": 0.53515625,
"learning_rate": 0.0002421796974446009,
"loss": 0.1136,
"step": 80100
},
{
"epoch": 1.6254309470695598,
"grad_norm": 0.45703125,
"learning_rate": 0.00024210031679795796,
"loss": 0.0956,
"step": 80150
},
{
"epoch": 1.6264449401744068,
"grad_norm": 0.5859375,
"learning_rate": 0.00024202089472793897,
"loss": 0.1087,
"step": 80200
},
{
"epoch": 1.6274589332792537,
"grad_norm": 0.421875,
"learning_rate": 0.00024194143127026504,
"loss": 0.1037,
"step": 80250
},
{
"epoch": 1.6284729263841005,
"grad_norm": 0.5234375,
"learning_rate": 0.00024186192646067598,
"loss": 0.1133,
"step": 80300
},
{
"epoch": 1.6294869194889476,
"grad_norm": 0.5234375,
"learning_rate": 0.00024178238033493015,
"loss": 0.1023,
"step": 80350
},
{
"epoch": 1.6305009125937944,
"grad_norm": 0.46875,
"learning_rate": 0.00024170279292880456,
"loss": 0.1014,
"step": 80400
},
{
"epoch": 1.631514905698641,
"grad_norm": 0.4296875,
"learning_rate": 0.00024162316427809463,
"loss": 0.1064,
"step": 80450
},
{
"epoch": 1.6325288988034883,
"grad_norm": 0.47265625,
"learning_rate": 0.00024154349441861457,
"loss": 0.1039,
"step": 80500
},
{
"epoch": 1.633542891908335,
"grad_norm": 0.466796875,
"learning_rate": 0.00024146378338619684,
"loss": 0.1013,
"step": 80550
},
{
"epoch": 1.6345568850131817,
"grad_norm": 0.4453125,
"learning_rate": 0.00024138403121669262,
"loss": 0.1025,
"step": 80600
},
{
"epoch": 1.635570878118029,
"grad_norm": 0.49609375,
"learning_rate": 0.0002413042379459716,
"loss": 0.1095,
"step": 80650
},
{
"epoch": 1.6365848712228757,
"grad_norm": 0.51171875,
"learning_rate": 0.00024122440360992186,
"loss": 0.0999,
"step": 80700
},
{
"epoch": 1.6375988643277226,
"grad_norm": 0.431640625,
"learning_rate": 0.00024114452824444994,
"loss": 0.0965,
"step": 80750
},
{
"epoch": 1.6386128574325696,
"grad_norm": 0.44921875,
"learning_rate": 0.00024106461188548096,
"loss": 0.1026,
"step": 80800
},
{
"epoch": 1.6396268505374163,
"grad_norm": 0.5078125,
"learning_rate": 0.0002409846545689583,
"loss": 0.1043,
"step": 80850
},
{
"epoch": 1.6406408436422633,
"grad_norm": 0.482421875,
"learning_rate": 0.00024090465633084393,
"loss": 0.1068,
"step": 80900
},
{
"epoch": 1.6416548367471102,
"grad_norm": 0.5234375,
"learning_rate": 0.00024082461720711807,
"loss": 0.1043,
"step": 80950
},
{
"epoch": 1.642668829851957,
"grad_norm": 0.478515625,
"learning_rate": 0.00024074453723377953,
"loss": 0.0995,
"step": 81000
},
{
"epoch": 1.642668829851957,
"eval_cer": 0.030604441623705306,
"eval_loss": 0.16193068027496338,
"eval_runtime": 127.4447,
"eval_samples_per_second": 22.033,
"eval_steps_per_second": 0.173,
"eval_wer": 0.09025661859299886,
"step": 81000
},
{
"epoch": 1.643682822956804,
"grad_norm": 0.5234375,
"learning_rate": 0.00024066441644684521,
"loss": 0.1061,
"step": 81050
},
{
"epoch": 1.6446968160616509,
"grad_norm": 0.484375,
"learning_rate": 0.00024058425488235073,
"loss": 0.1031,
"step": 81100
},
{
"epoch": 1.6457108091664976,
"grad_norm": 0.53125,
"learning_rate": 0.00024050405257634963,
"loss": 0.1019,
"step": 81150
},
{
"epoch": 1.6467248022713445,
"grad_norm": 0.4609375,
"learning_rate": 0.0002404238095649142,
"loss": 0.1021,
"step": 81200
},
{
"epoch": 1.6477387953761915,
"grad_norm": 0.51953125,
"learning_rate": 0.00024034352588413467,
"loss": 0.1127,
"step": 81250
},
{
"epoch": 1.6487527884810382,
"grad_norm": 0.486328125,
"learning_rate": 0.00024026320157011983,
"loss": 0.1041,
"step": 81300
},
{
"epoch": 1.6497667815858852,
"grad_norm": 0.42578125,
"learning_rate": 0.0002401828366589966,
"loss": 0.0965,
"step": 81350
},
{
"epoch": 1.6507807746907321,
"grad_norm": 0.46484375,
"learning_rate": 0.00024010243118691016,
"loss": 0.1128,
"step": 81400
},
{
"epoch": 1.6517947677955789,
"grad_norm": 0.40234375,
"learning_rate": 0.00024002198519002403,
"loss": 0.1085,
"step": 81450
},
{
"epoch": 1.6528087609004258,
"grad_norm": 0.482421875,
"learning_rate": 0.00023994149870451984,
"loss": 0.0986,
"step": 81500
},
{
"epoch": 1.6538227540052728,
"grad_norm": 0.53125,
"learning_rate": 0.00023986097176659755,
"loss": 0.107,
"step": 81550
},
{
"epoch": 1.6548367471101195,
"grad_norm": 0.51171875,
"learning_rate": 0.0002397804044124752,
"loss": 0.1022,
"step": 81600
},
{
"epoch": 1.6558507402149667,
"grad_norm": 0.45703125,
"learning_rate": 0.00023969979667838907,
"loss": 0.1086,
"step": 81650
},
{
"epoch": 1.6568647333198134,
"grad_norm": 0.443359375,
"learning_rate": 0.00023961914860059358,
"loss": 0.1044,
"step": 81700
},
{
"epoch": 1.6578787264246602,
"grad_norm": 0.46484375,
"learning_rate": 0.00023953846021536134,
"loss": 0.1145,
"step": 81750
},
{
"epoch": 1.6588927195295073,
"grad_norm": 0.51171875,
"learning_rate": 0.00023945773155898306,
"loss": 0.1097,
"step": 81800
},
{
"epoch": 1.659906712634354,
"grad_norm": 0.4296875,
"learning_rate": 0.00023937696266776746,
"loss": 0.1062,
"step": 81850
},
{
"epoch": 1.6609207057392008,
"grad_norm": 0.546875,
"learning_rate": 0.0002392961535780416,
"loss": 0.1034,
"step": 81900
},
{
"epoch": 1.661934698844048,
"grad_norm": 0.490234375,
"learning_rate": 0.00023921530432615036,
"loss": 0.1035,
"step": 81950
},
{
"epoch": 1.6629486919488947,
"grad_norm": 0.47265625,
"learning_rate": 0.00023913441494845684,
"loss": 0.0979,
"step": 82000
},
{
"epoch": 1.6639626850537417,
"grad_norm": 0.50390625,
"learning_rate": 0.00023905348548134212,
"loss": 0.1251,
"step": 82050
},
{
"epoch": 1.6649766781585886,
"grad_norm": 0.494140625,
"learning_rate": 0.00023897251596120535,
"loss": 0.1029,
"step": 82100
},
{
"epoch": 1.6659906712634354,
"grad_norm": 0.455078125,
"learning_rate": 0.0002388915064244637,
"loss": 0.1129,
"step": 82150
},
{
"epoch": 1.6670046643682823,
"grad_norm": 0.5859375,
"learning_rate": 0.00023881045690755224,
"loss": 0.1063,
"step": 82200
},
{
"epoch": 1.6680186574731293,
"grad_norm": 0.462890625,
"learning_rate": 0.00023872936744692418,
"loss": 0.1019,
"step": 82250
},
{
"epoch": 1.669032650577976,
"grad_norm": 0.45703125,
"learning_rate": 0.0002386482380790506,
"loss": 0.1066,
"step": 82300
},
{
"epoch": 1.670046643682823,
"grad_norm": 0.6484375,
"learning_rate": 0.0002385670688404205,
"loss": 0.1154,
"step": 82350
},
{
"epoch": 1.67106063678767,
"grad_norm": 0.46875,
"learning_rate": 0.00023848585976754088,
"loss": 0.0969,
"step": 82400
},
{
"epoch": 1.6720746298925167,
"grad_norm": 0.5703125,
"learning_rate": 0.00023840461089693664,
"loss": 0.1085,
"step": 82450
},
{
"epoch": 1.6730886229973636,
"grad_norm": 0.490234375,
"learning_rate": 0.00023832332226515057,
"loss": 0.1046,
"step": 82500
},
{
"epoch": 1.6730886229973636,
"eval_cer": 0.035738996082756325,
"eval_loss": 0.1605072170495987,
"eval_runtime": 143.5413,
"eval_samples_per_second": 19.562,
"eval_steps_per_second": 0.153,
"eval_wer": 0.10792876972036021,
"step": 82500
},
{
"epoch": 1.6741026161022106,
"grad_norm": 0.5234375,
"learning_rate": 0.0002382419939087433,
"loss": 0.1057,
"step": 82550
},
{
"epoch": 1.6751166092070573,
"grad_norm": 0.51171875,
"learning_rate": 0.00023816062586429343,
"loss": 0.108,
"step": 82600
},
{
"epoch": 1.6761306023119042,
"grad_norm": 0.490234375,
"learning_rate": 0.00023807921816839728,
"loss": 0.0969,
"step": 82650
},
{
"epoch": 1.6771445954167512,
"grad_norm": 0.490234375,
"learning_rate": 0.00023799777085766917,
"loss": 0.0997,
"step": 82700
},
{
"epoch": 1.678158588521598,
"grad_norm": 0.5078125,
"learning_rate": 0.00023791628396874106,
"loss": 0.1044,
"step": 82750
},
{
"epoch": 1.679172581626445,
"grad_norm": 0.546875,
"learning_rate": 0.00023783475753826283,
"loss": 0.1015,
"step": 82800
},
{
"epoch": 1.6801865747312918,
"grad_norm": 0.462890625,
"learning_rate": 0.00023775319160290208,
"loss": 0.1015,
"step": 82850
},
{
"epoch": 1.6812005678361386,
"grad_norm": 0.5,
"learning_rate": 0.0002376715861993442,
"loss": 0.106,
"step": 82900
},
{
"epoch": 1.6822145609409858,
"grad_norm": 0.4609375,
"learning_rate": 0.0002375899413642924,
"loss": 0.0996,
"step": 82950
},
{
"epoch": 1.6832285540458325,
"grad_norm": 0.51953125,
"learning_rate": 0.00023750825713446752,
"loss": 0.1062,
"step": 83000
},
{
"epoch": 1.6842425471506792,
"grad_norm": 0.431640625,
"learning_rate": 0.0002374265335466081,
"loss": 0.1042,
"step": 83050
},
{
"epoch": 1.6852565402555264,
"grad_norm": 0.478515625,
"learning_rate": 0.00023734477063747054,
"loss": 0.1,
"step": 83100
},
{
"epoch": 1.6862705333603731,
"grad_norm": 0.50390625,
"learning_rate": 0.00023726296844382876,
"loss": 0.1002,
"step": 83150
},
{
"epoch": 1.68728452646522,
"grad_norm": 0.478515625,
"learning_rate": 0.00023718112700247445,
"loss": 0.107,
"step": 83200
},
{
"epoch": 1.688298519570067,
"grad_norm": 0.462890625,
"learning_rate": 0.00023709924635021687,
"loss": 0.0996,
"step": 83250
},
{
"epoch": 1.6893125126749138,
"grad_norm": 0.453125,
"learning_rate": 0.000237017326523883,
"loss": 0.1053,
"step": 83300
},
{
"epoch": 1.6903265057797607,
"grad_norm": 0.447265625,
"learning_rate": 0.0002369353675603174,
"loss": 0.1013,
"step": 83350
},
{
"epoch": 1.6913404988846077,
"grad_norm": 0.4140625,
"learning_rate": 0.00023685336949638225,
"loss": 0.1002,
"step": 83400
},
{
"epoch": 1.6923544919894544,
"grad_norm": 0.48046875,
"learning_rate": 0.00023677133236895724,
"loss": 0.1074,
"step": 83450
},
{
"epoch": 1.6933684850943014,
"grad_norm": 0.494140625,
"learning_rate": 0.00023668925621493975,
"loss": 0.0999,
"step": 83500
},
{
"epoch": 1.6943824781991483,
"grad_norm": 0.490234375,
"learning_rate": 0.00023660714107124464,
"loss": 0.099,
"step": 83550
},
{
"epoch": 1.695396471303995,
"grad_norm": 0.447265625,
"learning_rate": 0.0002365249869748043,
"loss": 0.1107,
"step": 83600
},
{
"epoch": 1.696410464408842,
"grad_norm": 0.4453125,
"learning_rate": 0.00023644279396256863,
"loss": 0.1086,
"step": 83650
},
{
"epoch": 1.697424457513689,
"grad_norm": 0.462890625,
"learning_rate": 0.0002363605620715051,
"loss": 0.1003,
"step": 83700
},
{
"epoch": 1.6984384506185357,
"grad_norm": 0.5,
"learning_rate": 0.00023627829133859865,
"loss": 0.0973,
"step": 83750
},
{
"epoch": 1.6994524437233827,
"grad_norm": 0.421875,
"learning_rate": 0.0002361959818008516,
"loss": 0.1023,
"step": 83800
},
{
"epoch": 1.7004664368282296,
"grad_norm": 0.431640625,
"learning_rate": 0.00023611363349528388,
"loss": 0.0998,
"step": 83850
},
{
"epoch": 1.7014804299330764,
"grad_norm": 0.54296875,
"learning_rate": 0.0002360312464589327,
"loss": 0.104,
"step": 83900
},
{
"epoch": 1.7024944230379233,
"grad_norm": 0.48046875,
"learning_rate": 0.00023594882072885273,
"loss": 0.1024,
"step": 83950
},
{
"epoch": 1.7035084161427703,
"grad_norm": 0.44140625,
"learning_rate": 0.00023586635634211618,
"loss": 0.1024,
"step": 84000
},
{
"epoch": 1.7035084161427703,
"eval_cer": 0.029870933843840874,
"eval_loss": 0.16048672795295715,
"eval_runtime": 127.4658,
"eval_samples_per_second": 22.029,
"eval_steps_per_second": 0.173,
"eval_wer": 0.08886857607150112,
"step": 84000
},
{
"epoch": 1.704522409247617,
"grad_norm": 0.466796875,
"learning_rate": 0.0002357838533358124,
"loss": 0.0991,
"step": 84050
},
{
"epoch": 1.705536402352464,
"grad_norm": 0.44921875,
"learning_rate": 0.0002357013117470483,
"loss": 0.1014,
"step": 84100
},
{
"epoch": 1.706550395457311,
"grad_norm": 0.6875,
"learning_rate": 0.00023561873161294811,
"loss": 0.1084,
"step": 84150
},
{
"epoch": 1.7075643885621576,
"grad_norm": 0.44140625,
"learning_rate": 0.00023553611297065337,
"loss": 0.1031,
"step": 84200
},
{
"epoch": 1.7085783816670048,
"grad_norm": 0.482421875,
"learning_rate": 0.00023545345585732292,
"loss": 0.1072,
"step": 84250
},
{
"epoch": 1.7095923747718516,
"grad_norm": 0.49609375,
"learning_rate": 0.00023537076031013296,
"loss": 0.1091,
"step": 84300
},
{
"epoch": 1.7106063678766983,
"grad_norm": 0.38671875,
"learning_rate": 0.00023528802636627687,
"loss": 0.1064,
"step": 84350
},
{
"epoch": 1.7116203609815455,
"grad_norm": 0.466796875,
"learning_rate": 0.00023520525406296549,
"loss": 0.1147,
"step": 84400
},
{
"epoch": 1.7126343540863922,
"grad_norm": 0.466796875,
"learning_rate": 0.00023512244343742666,
"loss": 0.1,
"step": 84450
},
{
"epoch": 1.7136483471912392,
"grad_norm": 0.443359375,
"learning_rate": 0.00023503959452690566,
"loss": 0.0954,
"step": 84500
},
{
"epoch": 1.714662340296086,
"grad_norm": 0.49609375,
"learning_rate": 0.00023495670736866498,
"loss": 0.1194,
"step": 84550
},
{
"epoch": 1.7156763334009328,
"grad_norm": 0.427734375,
"learning_rate": 0.00023487378199998414,
"loss": 0.0977,
"step": 84600
},
{
"epoch": 1.7166903265057798,
"grad_norm": 0.4375,
"learning_rate": 0.00023479081845816002,
"loss": 0.0976,
"step": 84650
},
{
"epoch": 1.7177043196106268,
"grad_norm": 0.48046875,
"learning_rate": 0.00023470781678050658,
"loss": 0.1088,
"step": 84700
},
{
"epoch": 1.7187183127154735,
"grad_norm": 0.451171875,
"learning_rate": 0.000234624777004355,
"loss": 0.1156,
"step": 84750
},
{
"epoch": 1.7197323058203204,
"grad_norm": 0.46484375,
"learning_rate": 0.00023454169916705347,
"loss": 0.1037,
"step": 84800
},
{
"epoch": 1.7207462989251674,
"grad_norm": 0.466796875,
"learning_rate": 0.0002344585833059675,
"loss": 0.108,
"step": 84850
},
{
"epoch": 1.7217602920300141,
"grad_norm": 0.455078125,
"learning_rate": 0.00023437542945847948,
"loss": 0.1128,
"step": 84900
},
{
"epoch": 1.722774285134861,
"grad_norm": 0.52734375,
"learning_rate": 0.00023429223766198906,
"loss": 0.1165,
"step": 84950
},
{
"epoch": 1.723788278239708,
"grad_norm": 0.474609375,
"learning_rate": 0.00023420900795391282,
"loss": 0.1013,
"step": 85000
},
{
"epoch": 1.7248022713445548,
"grad_norm": 0.412109375,
"learning_rate": 0.00023412574037168457,
"loss": 0.0963,
"step": 85050
},
{
"epoch": 1.7258162644494017,
"grad_norm": 0.51171875,
"learning_rate": 0.00023404243495275486,
"loss": 0.1073,
"step": 85100
},
{
"epoch": 1.7268302575542487,
"grad_norm": 0.462890625,
"learning_rate": 0.00023395909173459163,
"loss": 0.1034,
"step": 85150
},
{
"epoch": 1.7278442506590954,
"grad_norm": 0.44140625,
"learning_rate": 0.00023387571075467954,
"loss": 0.1085,
"step": 85200
},
{
"epoch": 1.7288582437639424,
"grad_norm": 0.498046875,
"learning_rate": 0.00023379229205052035,
"loss": 0.0986,
"step": 85250
},
{
"epoch": 1.7298722368687893,
"grad_norm": 0.5078125,
"learning_rate": 0.00023370883565963275,
"loss": 0.1028,
"step": 85300
},
{
"epoch": 1.730886229973636,
"grad_norm": 0.380859375,
"learning_rate": 0.00023362534161955236,
"loss": 0.1011,
"step": 85350
},
{
"epoch": 1.731900223078483,
"grad_norm": 0.435546875,
"learning_rate": 0.00023354180996783185,
"loss": 0.1068,
"step": 85400
},
{
"epoch": 1.73291421618333,
"grad_norm": 0.5,
"learning_rate": 0.00023345824074204061,
"loss": 0.1006,
"step": 85450
},
{
"epoch": 1.7339282092881767,
"grad_norm": 0.53515625,
"learning_rate": 0.00023337463397976518,
"loss": 0.1035,
"step": 85500
},
{
"epoch": 1.7339282092881767,
"eval_cer": 0.030047808060262086,
"eval_loss": 0.16027864813804626,
"eval_runtime": 127.051,
"eval_samples_per_second": 22.101,
"eval_steps_per_second": 0.173,
"eval_wer": 0.09066287494075428,
"step": 85500
},
{
"epoch": 1.7349422023930239,
"grad_norm": 0.5,
"learning_rate": 0.00023329098971860867,
"loss": 0.0958,
"step": 85550
},
{
"epoch": 1.7359561954978706,
"grad_norm": 0.4453125,
"learning_rate": 0.0002332073079961914,
"loss": 0.1067,
"step": 85600
},
{
"epoch": 1.7369701886027173,
"grad_norm": 0.54296875,
"learning_rate": 0.00023312358885015024,
"loss": 0.1067,
"step": 85650
},
{
"epoch": 1.7379841817075645,
"grad_norm": 0.388671875,
"learning_rate": 0.0002330398323181391,
"loss": 0.1022,
"step": 85700
},
{
"epoch": 1.7389981748124113,
"grad_norm": 0.4375,
"learning_rate": 0.0002329560384378286,
"loss": 0.102,
"step": 85750
},
{
"epoch": 1.7400121679172582,
"grad_norm": 0.52734375,
"learning_rate": 0.00023287220724690615,
"loss": 0.0998,
"step": 85800
},
{
"epoch": 1.7410261610221052,
"grad_norm": 0.49609375,
"learning_rate": 0.00023278833878307602,
"loss": 0.098,
"step": 85850
},
{
"epoch": 1.742040154126952,
"grad_norm": 0.44921875,
"learning_rate": 0.00023270443308405917,
"loss": 0.1163,
"step": 85900
},
{
"epoch": 1.7430541472317989,
"grad_norm": 0.49609375,
"learning_rate": 0.00023262049018759335,
"loss": 0.0991,
"step": 85950
},
{
"epoch": 1.7440681403366458,
"grad_norm": 0.53125,
"learning_rate": 0.00023253651013143304,
"loss": 0.1046,
"step": 86000
},
{
"epoch": 1.7450821334414925,
"grad_norm": 0.46875,
"learning_rate": 0.0002324524929533494,
"loss": 0.1031,
"step": 86050
},
{
"epoch": 1.7460961265463395,
"grad_norm": 0.6484375,
"learning_rate": 0.00023236843869113032,
"loss": 0.1051,
"step": 86100
},
{
"epoch": 1.7471101196511865,
"grad_norm": 0.44921875,
"learning_rate": 0.0002322843473825803,
"loss": 0.1011,
"step": 86150
},
{
"epoch": 1.7481241127560332,
"grad_norm": 0.447265625,
"learning_rate": 0.00023220021906552059,
"loss": 0.0971,
"step": 86200
},
{
"epoch": 1.7491381058608801,
"grad_norm": 0.48046875,
"learning_rate": 0.00023211605377778906,
"loss": 0.1072,
"step": 86250
},
{
"epoch": 1.750152098965727,
"grad_norm": 0.38671875,
"learning_rate": 0.0002320318515572402,
"loss": 0.1097,
"step": 86300
},
{
"epoch": 1.7511660920705738,
"grad_norm": 0.46484375,
"learning_rate": 0.0002319476124417451,
"loss": 0.1016,
"step": 86350
},
{
"epoch": 1.7521800851754208,
"grad_norm": 0.4765625,
"learning_rate": 0.00023186333646919142,
"loss": 0.1063,
"step": 86400
},
{
"epoch": 1.7531940782802677,
"grad_norm": 0.458984375,
"learning_rate": 0.00023177902367748347,
"loss": 0.1061,
"step": 86450
},
{
"epoch": 1.7542080713851145,
"grad_norm": 0.474609375,
"learning_rate": 0.00023169467410454213,
"loss": 0.1031,
"step": 86500
},
{
"epoch": 1.7552220644899614,
"grad_norm": 0.462890625,
"learning_rate": 0.00023161028778830465,
"loss": 0.1073,
"step": 86550
},
{
"epoch": 1.7562360575948084,
"grad_norm": 0.48828125,
"learning_rate": 0.00023152586476672506,
"loss": 0.1099,
"step": 86600
},
{
"epoch": 1.7572500506996551,
"grad_norm": 0.55859375,
"learning_rate": 0.00023144140507777363,
"loss": 0.1016,
"step": 86650
},
{
"epoch": 1.758264043804502,
"grad_norm": 0.416015625,
"learning_rate": 0.00023135690875943736,
"loss": 0.0989,
"step": 86700
},
{
"epoch": 1.759278036909349,
"grad_norm": 0.4609375,
"learning_rate": 0.00023127237584971962,
"loss": 0.0986,
"step": 86750
},
{
"epoch": 1.7602920300141958,
"grad_norm": 0.50390625,
"learning_rate": 0.0002311878063866402,
"loss": 0.1043,
"step": 86800
},
{
"epoch": 1.761306023119043,
"grad_norm": 0.4609375,
"learning_rate": 0.00023110320040823536,
"loss": 0.1043,
"step": 86850
},
{
"epoch": 1.7623200162238897,
"grad_norm": 0.56640625,
"learning_rate": 0.00023101855795255787,
"loss": 0.0959,
"step": 86900
},
{
"epoch": 1.7633340093287364,
"grad_norm": 0.423828125,
"learning_rate": 0.00023093387905767675,
"loss": 0.0968,
"step": 86950
},
{
"epoch": 1.7643480024335836,
"grad_norm": 0.447265625,
"learning_rate": 0.00023084916376167754,
"loss": 0.102,
"step": 87000
},
{
"epoch": 1.7643480024335836,
"eval_cer": 0.02896575403039115,
"eval_loss": 0.15883292257785797,
"eval_runtime": 128.449,
"eval_samples_per_second": 21.861,
"eval_steps_per_second": 0.171,
"eval_wer": 0.08903784954973255,
"step": 87000
},
{
"epoch": 1.7653619955384303,
"grad_norm": 0.51953125,
"learning_rate": 0.0002307644121026621,
"loss": 0.0996,
"step": 87050
},
{
"epoch": 1.7663759886432773,
"grad_norm": 0.458984375,
"learning_rate": 0.00023067962411874862,
"loss": 0.1088,
"step": 87100
},
{
"epoch": 1.7673899817481242,
"grad_norm": 0.486328125,
"learning_rate": 0.00023059479984807165,
"loss": 0.097,
"step": 87150
},
{
"epoch": 1.768403974852971,
"grad_norm": 0.421875,
"learning_rate": 0.00023050993932878213,
"loss": 0.1053,
"step": 87200
},
{
"epoch": 1.769417967957818,
"grad_norm": 0.470703125,
"learning_rate": 0.00023042504259904718,
"loss": 0.0989,
"step": 87250
},
{
"epoch": 1.7704319610626649,
"grad_norm": 0.5,
"learning_rate": 0.00023034010969705024,
"loss": 0.1057,
"step": 87300
},
{
"epoch": 1.7714459541675116,
"grad_norm": 0.435546875,
"learning_rate": 0.00023025514066099108,
"loss": 0.0951,
"step": 87350
},
{
"epoch": 1.7724599472723586,
"grad_norm": 0.482421875,
"learning_rate": 0.0002301701355290857,
"loss": 0.0984,
"step": 87400
},
{
"epoch": 1.7734739403772055,
"grad_norm": 0.466796875,
"learning_rate": 0.00023008509433956631,
"loss": 0.1055,
"step": 87450
},
{
"epoch": 1.7744879334820522,
"grad_norm": 0.412109375,
"learning_rate": 0.00023000001713068135,
"loss": 0.1107,
"step": 87500
},
{
"epoch": 1.7755019265868992,
"grad_norm": 0.515625,
"learning_rate": 0.00022991490394069542,
"loss": 0.0969,
"step": 87550
},
{
"epoch": 1.7765159196917462,
"grad_norm": 0.45703125,
"learning_rate": 0.00022982975480788937,
"loss": 0.1072,
"step": 87600
},
{
"epoch": 1.777529912796593,
"grad_norm": 0.47265625,
"learning_rate": 0.00022974456977056026,
"loss": 0.1,
"step": 87650
},
{
"epoch": 1.7785439059014398,
"grad_norm": 0.470703125,
"learning_rate": 0.00022965934886702108,
"loss": 0.1103,
"step": 87700
},
{
"epoch": 1.7795578990062868,
"grad_norm": 0.455078125,
"learning_rate": 0.00022957409213560112,
"loss": 0.0997,
"step": 87750
},
{
"epoch": 1.7805718921111335,
"grad_norm": 0.486328125,
"learning_rate": 0.0002294887996146459,
"loss": 0.0987,
"step": 87800
},
{
"epoch": 1.7815858852159805,
"grad_norm": 0.5,
"learning_rate": 0.00022940347134251673,
"loss": 0.0994,
"step": 87850
},
{
"epoch": 1.7825998783208274,
"grad_norm": 0.466796875,
"learning_rate": 0.0002293181073575913,
"loss": 0.1015,
"step": 87900
},
{
"epoch": 1.7836138714256742,
"grad_norm": 0.470703125,
"learning_rate": 0.00022923270769826314,
"loss": 0.1024,
"step": 87950
},
{
"epoch": 1.7846278645305214,
"grad_norm": 0.53125,
"learning_rate": 0.0002291472724029419,
"loss": 0.1145,
"step": 88000
},
{
"epoch": 1.785641857635368,
"grad_norm": 0.5703125,
"learning_rate": 0.0002290618015100534,
"loss": 0.1031,
"step": 88050
},
{
"epoch": 1.7866558507402148,
"grad_norm": 0.5234375,
"learning_rate": 0.00022897629505803918,
"loss": 0.1004,
"step": 88100
},
{
"epoch": 1.787669843845062,
"grad_norm": 0.4765625,
"learning_rate": 0.00022889075308535703,
"loss": 0.099,
"step": 88150
},
{
"epoch": 1.7886838369499087,
"grad_norm": 0.482421875,
"learning_rate": 0.00022880517563048062,
"loss": 0.0972,
"step": 88200
},
{
"epoch": 1.7896978300547555,
"grad_norm": 0.474609375,
"learning_rate": 0.00022871956273189957,
"loss": 0.0992,
"step": 88250
},
{
"epoch": 1.7907118231596026,
"grad_norm": 0.427734375,
"learning_rate": 0.0002286339144281194,
"loss": 0.1057,
"step": 88300
},
{
"epoch": 1.7917258162644494,
"grad_norm": 0.458984375,
"learning_rate": 0.0002285482307576617,
"loss": 0.1115,
"step": 88350
},
{
"epoch": 1.7927398093692963,
"grad_norm": 0.47265625,
"learning_rate": 0.00022846251175906378,
"loss": 0.0936,
"step": 88400
},
{
"epoch": 1.7937538024741433,
"grad_norm": 0.482421875,
"learning_rate": 0.00022837675747087896,
"loss": 0.096,
"step": 88450
},
{
"epoch": 1.79476779557899,
"grad_norm": 0.54296875,
"learning_rate": 0.00022829096793167643,
"loss": 0.1007,
"step": 88500
},
{
"epoch": 1.79476779557899,
"eval_cer": 0.028596399049040978,
"eval_loss": 0.1581563800573349,
"eval_runtime": 132.2778,
"eval_samples_per_second": 21.228,
"eval_steps_per_second": 0.166,
"eval_wer": 0.08693885841966281,
"step": 88500
},
{
"epoch": 1.795781788683837,
"grad_norm": 0.470703125,
"learning_rate": 0.00022820514318004122,
"loss": 0.1084,
"step": 88550
},
{
"epoch": 1.796795781788684,
"grad_norm": 0.48046875,
"learning_rate": 0.00022811928325457408,
"loss": 0.1006,
"step": 88600
},
{
"epoch": 1.7978097748935307,
"grad_norm": 0.515625,
"learning_rate": 0.00022803338819389184,
"loss": 0.1016,
"step": 88650
},
{
"epoch": 1.7988237679983776,
"grad_norm": 0.53515625,
"learning_rate": 0.00022794745803662687,
"loss": 0.0935,
"step": 88700
},
{
"epoch": 1.7998377611032246,
"grad_norm": 0.4609375,
"learning_rate": 0.00022786149282142743,
"loss": 0.1056,
"step": 88750
},
{
"epoch": 1.8008517542080713,
"grad_norm": 0.478515625,
"learning_rate": 0.0002277754925869576,
"loss": 0.1,
"step": 88800
},
{
"epoch": 1.8018657473129183,
"grad_norm": 0.53125,
"learning_rate": 0.00022768945737189716,
"loss": 0.1013,
"step": 88850
},
{
"epoch": 1.8028797404177652,
"grad_norm": 0.494140625,
"learning_rate": 0.00022760338721494158,
"loss": 0.1087,
"step": 88900
},
{
"epoch": 1.803893733522612,
"grad_norm": 0.5078125,
"learning_rate": 0.00022751728215480215,
"loss": 0.1056,
"step": 88950
},
{
"epoch": 1.804907726627459,
"grad_norm": 0.462890625,
"learning_rate": 0.00022743114223020572,
"loss": 0.1004,
"step": 89000
},
{
"epoch": 1.8059217197323059,
"grad_norm": 0.455078125,
"learning_rate": 0.00022734496747989496,
"loss": 0.1072,
"step": 89050
},
{
"epoch": 1.8069357128371526,
"grad_norm": 0.384765625,
"learning_rate": 0.0002272587579426281,
"loss": 0.1114,
"step": 89100
},
{
"epoch": 1.8079497059419996,
"grad_norm": 0.490234375,
"learning_rate": 0.0002271725136571791,
"loss": 0.1071,
"step": 89150
},
{
"epoch": 1.8089636990468465,
"grad_norm": 0.64453125,
"learning_rate": 0.00022708623466233748,
"loss": 0.1177,
"step": 89200
},
{
"epoch": 1.8099776921516932,
"grad_norm": 0.46484375,
"learning_rate": 0.00022699992099690843,
"loss": 0.1003,
"step": 89250
},
{
"epoch": 1.8109916852565404,
"grad_norm": 0.73046875,
"learning_rate": 0.0002269135726997126,
"loss": 0.0984,
"step": 89300
},
{
"epoch": 1.8120056783613872,
"grad_norm": 0.5390625,
"learning_rate": 0.00022682718980958645,
"loss": 0.0969,
"step": 89350
},
{
"epoch": 1.8130196714662339,
"grad_norm": 0.462890625,
"learning_rate": 0.00022674077236538182,
"loss": 0.1103,
"step": 89400
},
{
"epoch": 1.814033664571081,
"grad_norm": 0.451171875,
"learning_rate": 0.00022665432040596604,
"loss": 0.0958,
"step": 89450
},
{
"epoch": 1.8150476576759278,
"grad_norm": 0.50390625,
"learning_rate": 0.00022656783397022223,
"loss": 0.0938,
"step": 89500
},
{
"epoch": 1.8160616507807745,
"grad_norm": 0.546875,
"learning_rate": 0.00022648131309704872,
"loss": 0.0957,
"step": 89550
},
{
"epoch": 1.8170756438856217,
"grad_norm": 0.4609375,
"learning_rate": 0.00022639475782535945,
"loss": 0.0991,
"step": 89600
},
{
"epoch": 1.8180896369904684,
"grad_norm": 0.458984375,
"learning_rate": 0.00022630816819408395,
"loss": 0.0988,
"step": 89650
},
{
"epoch": 1.8191036300953154,
"grad_norm": 0.466796875,
"learning_rate": 0.000226221544242167,
"loss": 0.1055,
"step": 89700
},
{
"epoch": 1.8201176232001623,
"grad_norm": 0.546875,
"learning_rate": 0.00022613488600856885,
"loss": 0.1056,
"step": 89750
},
{
"epoch": 1.821131616305009,
"grad_norm": 0.484375,
"learning_rate": 0.0002260481935322654,
"loss": 0.0986,
"step": 89800
},
{
"epoch": 1.822145609409856,
"grad_norm": 0.49609375,
"learning_rate": 0.00022596146685224762,
"loss": 0.0908,
"step": 89850
},
{
"epoch": 1.823159602514703,
"grad_norm": 0.46484375,
"learning_rate": 0.0002258747060075221,
"loss": 0.0998,
"step": 89900
},
{
"epoch": 1.8241735956195497,
"grad_norm": 0.458984375,
"learning_rate": 0.00022578791103711069,
"loss": 0.0994,
"step": 89950
},
{
"epoch": 1.8251875887243967,
"grad_norm": 0.47265625,
"learning_rate": 0.0002257010819800506,
"loss": 0.1117,
"step": 90000
},
{
"epoch": 1.8251875887243967,
"eval_cer": 0.030026999328918415,
"eval_loss": 0.15934033691883087,
"eval_runtime": 129.703,
"eval_samples_per_second": 21.649,
"eval_steps_per_second": 0.17,
"eval_wer": 0.09049360146252285,
"step": 90000
}
],
"logging_steps": 50,
"max_steps": 246550,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 10000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}