checkpoint-35000 / trainer_state.json
markmuller's picture
Upload folder using huggingface_hub
3c0c559 verified
{
"best_global_step": 35000,
"best_metric": 30.37997340697555,
"best_model_checkpoint": "phase5_output/checkpoints/stage1/checkpoint-35000",
"epoch": 0.6196938712276135,
"eval_steps": 5000,
"global_step": 35000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0008852769588965908,
"grad_norm": 20.375,
"learning_rate": 4.900000000000001e-07,
"loss": 2.6002566528320314,
"step": 50
},
{
"epoch": 0.0017705539177931815,
"grad_norm": 16.5,
"learning_rate": 9.9e-07,
"loss": 2.6074697875976565,
"step": 100
},
{
"epoch": 0.0026558308766897725,
"grad_norm": 12.8125,
"learning_rate": 1.4900000000000001e-06,
"loss": 2.661256103515625,
"step": 150
},
{
"epoch": 0.003541107835586363,
"grad_norm": 16.125,
"learning_rate": 1.9900000000000004e-06,
"loss": 2.3539471435546875,
"step": 200
},
{
"epoch": 0.004426384794482954,
"grad_norm": 13.125,
"learning_rate": 2.4900000000000003e-06,
"loss": 2.1791415405273438,
"step": 250
},
{
"epoch": 0.005311661753379545,
"grad_norm": 14.3125,
"learning_rate": 2.99e-06,
"loss": 1.7517926025390624,
"step": 300
},
{
"epoch": 0.006196938712276136,
"grad_norm": 8.9375,
"learning_rate": 3.49e-06,
"loss": 1.5738864135742188,
"step": 350
},
{
"epoch": 0.007082215671172726,
"grad_norm": 10.375,
"learning_rate": 3.990000000000001e-06,
"loss": 1.4318115234375,
"step": 400
},
{
"epoch": 0.007967492630069318,
"grad_norm": 12.0,
"learning_rate": 4.49e-06,
"loss": 1.3452346801757813,
"step": 450
},
{
"epoch": 0.008852769588965907,
"grad_norm": 11.5,
"learning_rate": 4.9900000000000005e-06,
"loss": 1.3893937683105468,
"step": 500
},
{
"epoch": 0.009738046547862499,
"grad_norm": 20.125,
"learning_rate": 5.490000000000001e-06,
"loss": 1.3567886352539062,
"step": 550
},
{
"epoch": 0.01062332350675909,
"grad_norm": 9.5625,
"learning_rate": 5.99e-06,
"loss": 1.3436286926269532,
"step": 600
},
{
"epoch": 0.01150860046565568,
"grad_norm": 9.875,
"learning_rate": 6.4900000000000005e-06,
"loss": 1.2315786743164063,
"step": 650
},
{
"epoch": 0.012393877424552271,
"grad_norm": 8.75,
"learning_rate": 6.99e-06,
"loss": 1.2174966430664063,
"step": 700
},
{
"epoch": 0.013279154383448863,
"grad_norm": 9.375,
"learning_rate": 7.49e-06,
"loss": 1.2520484161376952,
"step": 750
},
{
"epoch": 0.014164431342345452,
"grad_norm": 8.4375,
"learning_rate": 7.990000000000001e-06,
"loss": 1.2200564575195312,
"step": 800
},
{
"epoch": 0.015049708301242044,
"grad_norm": 11.1875,
"learning_rate": 8.49e-06,
"loss": 1.1677375793457032,
"step": 850
},
{
"epoch": 0.015934985260138635,
"grad_norm": 8.9375,
"learning_rate": 8.99e-06,
"loss": 1.1773777770996094,
"step": 900
},
{
"epoch": 0.016820262219035226,
"grad_norm": 8.9375,
"learning_rate": 9.49e-06,
"loss": 1.1839574432373048,
"step": 950
},
{
"epoch": 0.017705539177931814,
"grad_norm": 8.8125,
"learning_rate": 9.990000000000001e-06,
"loss": 1.1368023681640624,
"step": 1000
},
{
"epoch": 0.018590816136828406,
"grad_norm": 9.625,
"learning_rate": 9.998757480474695e-06,
"loss": 1.1666729736328125,
"step": 1050
},
{
"epoch": 0.019476093095724997,
"grad_norm": 8.0625,
"learning_rate": 9.997489603408056e-06,
"loss": 1.1337457275390626,
"step": 1100
},
{
"epoch": 0.02036137005462159,
"grad_norm": 8.375,
"learning_rate": 9.996221726341415e-06,
"loss": 1.1557207489013672,
"step": 1150
},
{
"epoch": 0.02124664701351818,
"grad_norm": 8.375,
"learning_rate": 9.994953849274776e-06,
"loss": 1.1596089935302734,
"step": 1200
},
{
"epoch": 0.02213192397241477,
"grad_norm": 10.6875,
"learning_rate": 9.993685972208136e-06,
"loss": 1.115845718383789,
"step": 1250
},
{
"epoch": 0.02301720093131136,
"grad_norm": 11.5,
"learning_rate": 9.992418095141496e-06,
"loss": 1.0784302520751954,
"step": 1300
},
{
"epoch": 0.02390247789020795,
"grad_norm": 8.9375,
"learning_rate": 9.991150218074856e-06,
"loss": 1.0735511779785156,
"step": 1350
},
{
"epoch": 0.024787754849104542,
"grad_norm": 8.625,
"learning_rate": 9.989882341008217e-06,
"loss": 1.0815206146240235,
"step": 1400
},
{
"epoch": 0.025673031808001134,
"grad_norm": 8.5,
"learning_rate": 9.988614463941578e-06,
"loss": 1.0743460845947266,
"step": 1450
},
{
"epoch": 0.026558308766897725,
"grad_norm": 7.90625,
"learning_rate": 9.987346586874937e-06,
"loss": 1.1286388397216798,
"step": 1500
},
{
"epoch": 0.027443585725794313,
"grad_norm": 8.6875,
"learning_rate": 9.986078709808298e-06,
"loss": 1.0764491271972656,
"step": 1550
},
{
"epoch": 0.028328862684690904,
"grad_norm": 7.4375,
"learning_rate": 9.984810832741659e-06,
"loss": 1.0406829071044923,
"step": 1600
},
{
"epoch": 0.029214139643587496,
"grad_norm": 7.375,
"learning_rate": 9.983542955675018e-06,
"loss": 1.075464096069336,
"step": 1650
},
{
"epoch": 0.030099416602484087,
"grad_norm": 10.875,
"learning_rate": 9.982275078608379e-06,
"loss": 1.0150408935546875,
"step": 1700
},
{
"epoch": 0.03098469356138068,
"grad_norm": 9.3125,
"learning_rate": 9.98100720154174e-06,
"loss": 1.0287052154541017,
"step": 1750
},
{
"epoch": 0.03186997052027727,
"grad_norm": 9.5625,
"learning_rate": 9.9797393244751e-06,
"loss": 1.031718978881836,
"step": 1800
},
{
"epoch": 0.03275524747917386,
"grad_norm": 10.0625,
"learning_rate": 9.97847144740846e-06,
"loss": 1.0560354614257812,
"step": 1850
},
{
"epoch": 0.03364052443807045,
"grad_norm": 9.4375,
"learning_rate": 9.97720357034182e-06,
"loss": 0.9881591033935547,
"step": 1900
},
{
"epoch": 0.034525801396967044,
"grad_norm": 6.75,
"learning_rate": 9.975935693275181e-06,
"loss": 0.9941422271728516,
"step": 1950
},
{
"epoch": 0.03541107835586363,
"grad_norm": 7.78125,
"learning_rate": 9.97466781620854e-06,
"loss": 0.9812654876708984,
"step": 2000
},
{
"epoch": 0.03629635531476022,
"grad_norm": 7.40625,
"learning_rate": 9.973399939141901e-06,
"loss": 1.0280473327636719,
"step": 2050
},
{
"epoch": 0.03718163227365681,
"grad_norm": 6.53125,
"learning_rate": 9.972132062075262e-06,
"loss": 1.0059999084472657,
"step": 2100
},
{
"epoch": 0.0380669092325534,
"grad_norm": 9.0,
"learning_rate": 9.970864185008622e-06,
"loss": 1.0437776947021484,
"step": 2150
},
{
"epoch": 0.038952186191449995,
"grad_norm": 8.625,
"learning_rate": 9.969596307941983e-06,
"loss": 0.9704521942138672,
"step": 2200
},
{
"epoch": 0.039837463150346586,
"grad_norm": 8.75,
"learning_rate": 9.968328430875344e-06,
"loss": 1.0186034393310548,
"step": 2250
},
{
"epoch": 0.04072274010924318,
"grad_norm": 6.84375,
"learning_rate": 9.967060553808703e-06,
"loss": 0.9506314086914063,
"step": 2300
},
{
"epoch": 0.04160801706813977,
"grad_norm": 10.5625,
"learning_rate": 9.965792676742064e-06,
"loss": 1.0404967498779296,
"step": 2350
},
{
"epoch": 0.04249329402703636,
"grad_norm": 7.375,
"learning_rate": 9.964524799675425e-06,
"loss": 0.9251933288574219,
"step": 2400
},
{
"epoch": 0.04337857098593295,
"grad_norm": 8.0625,
"learning_rate": 9.963256922608786e-06,
"loss": 1.0444112396240235,
"step": 2450
},
{
"epoch": 0.04426384794482954,
"grad_norm": 10.5,
"learning_rate": 9.961989045542145e-06,
"loss": 0.9814193725585938,
"step": 2500
},
{
"epoch": 0.04514912490372613,
"grad_norm": 8.5625,
"learning_rate": 9.960721168475506e-06,
"loss": 0.9973986053466797,
"step": 2550
},
{
"epoch": 0.04603440186262272,
"grad_norm": 9.0625,
"learning_rate": 9.959453291408866e-06,
"loss": 1.0070331573486329,
"step": 2600
},
{
"epoch": 0.04691967882151931,
"grad_norm": 7.8125,
"learning_rate": 9.958185414342225e-06,
"loss": 0.9542020416259765,
"step": 2650
},
{
"epoch": 0.0478049557804159,
"grad_norm": 10.25,
"learning_rate": 9.956917537275586e-06,
"loss": 0.9571893310546875,
"step": 2700
},
{
"epoch": 0.04869023273931249,
"grad_norm": 9.375,
"learning_rate": 9.955649660208947e-06,
"loss": 0.9763975524902344,
"step": 2750
},
{
"epoch": 0.049575509698209085,
"grad_norm": 9.625,
"learning_rate": 9.954381783142308e-06,
"loss": 0.9599230194091797,
"step": 2800
},
{
"epoch": 0.050460786657105676,
"grad_norm": 11.25,
"learning_rate": 9.953113906075667e-06,
"loss": 1.0347834014892578,
"step": 2850
},
{
"epoch": 0.05134606361600227,
"grad_norm": 10.5625,
"learning_rate": 9.951846029009028e-06,
"loss": 0.9465586853027343,
"step": 2900
},
{
"epoch": 0.05223134057489886,
"grad_norm": 8.0625,
"learning_rate": 9.950578151942389e-06,
"loss": 0.9527477264404297,
"step": 2950
},
{
"epoch": 0.05311661753379545,
"grad_norm": 10.4375,
"learning_rate": 9.94931027487575e-06,
"loss": 0.9980839538574219,
"step": 3000
},
{
"epoch": 0.05400189449269204,
"grad_norm": 9.6875,
"learning_rate": 9.948042397809109e-06,
"loss": 0.9938941192626953,
"step": 3050
},
{
"epoch": 0.054887171451588626,
"grad_norm": 8.6875,
"learning_rate": 9.94677452074247e-06,
"loss": 0.9848545074462891,
"step": 3100
},
{
"epoch": 0.05577244841048522,
"grad_norm": 6.875,
"learning_rate": 9.94550664367583e-06,
"loss": 0.9058123779296875,
"step": 3150
},
{
"epoch": 0.05665772536938181,
"grad_norm": 10.0,
"learning_rate": 9.94423876660919e-06,
"loss": 0.9505638122558594,
"step": 3200
},
{
"epoch": 0.0575430023282784,
"grad_norm": 5.96875,
"learning_rate": 9.94297088954255e-06,
"loss": 0.9797083282470703,
"step": 3250
},
{
"epoch": 0.05842827928717499,
"grad_norm": 7.78125,
"learning_rate": 9.941703012475911e-06,
"loss": 0.9330976867675781,
"step": 3300
},
{
"epoch": 0.05931355624607158,
"grad_norm": 10.875,
"learning_rate": 9.940435135409272e-06,
"loss": 0.9697081756591797,
"step": 3350
},
{
"epoch": 0.060198833204968175,
"grad_norm": 8.8125,
"learning_rate": 9.939167258342633e-06,
"loss": 0.9778965759277344,
"step": 3400
},
{
"epoch": 0.061084110163864766,
"grad_norm": 6.46875,
"learning_rate": 9.937899381275993e-06,
"loss": 0.9776050567626953,
"step": 3450
},
{
"epoch": 0.06196938712276136,
"grad_norm": 8.75,
"learning_rate": 9.936631504209352e-06,
"loss": 0.9795106506347656,
"step": 3500
},
{
"epoch": 0.06285466408165795,
"grad_norm": 9.5625,
"learning_rate": 9.935363627142713e-06,
"loss": 0.9063382720947266,
"step": 3550
},
{
"epoch": 0.06373994104055454,
"grad_norm": 8.125,
"learning_rate": 9.934095750076074e-06,
"loss": 0.9287123107910156,
"step": 3600
},
{
"epoch": 0.06462521799945113,
"grad_norm": 10.1875,
"learning_rate": 9.932827873009435e-06,
"loss": 0.9231369018554687,
"step": 3650
},
{
"epoch": 0.06551049495834772,
"grad_norm": 7.5,
"learning_rate": 9.931559995942794e-06,
"loss": 0.9772643280029297,
"step": 3700
},
{
"epoch": 0.06639577191724431,
"grad_norm": 9.75,
"learning_rate": 9.930292118876155e-06,
"loss": 0.9633369445800781,
"step": 3750
},
{
"epoch": 0.0672810488761409,
"grad_norm": 10.3125,
"learning_rate": 9.929024241809516e-06,
"loss": 0.9590021514892578,
"step": 3800
},
{
"epoch": 0.0681663258350375,
"grad_norm": 8.6875,
"learning_rate": 9.927756364742875e-06,
"loss": 0.9859857177734375,
"step": 3850
},
{
"epoch": 0.06905160279393409,
"grad_norm": 7.90625,
"learning_rate": 9.926488487676236e-06,
"loss": 0.9285535430908203,
"step": 3900
},
{
"epoch": 0.06993687975283067,
"grad_norm": 9.4375,
"learning_rate": 9.925220610609596e-06,
"loss": 0.9168830871582031,
"step": 3950
},
{
"epoch": 0.07082215671172726,
"grad_norm": 8.6875,
"learning_rate": 9.923952733542957e-06,
"loss": 0.9035071563720704,
"step": 4000
},
{
"epoch": 0.07170743367062385,
"grad_norm": 7.34375,
"learning_rate": 9.922684856476316e-06,
"loss": 0.9321700286865234,
"step": 4050
},
{
"epoch": 0.07259271062952044,
"grad_norm": 8.875,
"learning_rate": 9.921416979409677e-06,
"loss": 0.9381676483154296,
"step": 4100
},
{
"epoch": 0.07347798758841703,
"grad_norm": 9.6875,
"learning_rate": 9.920149102343038e-06,
"loss": 0.9207463073730469,
"step": 4150
},
{
"epoch": 0.07436326454731362,
"grad_norm": 9.5,
"learning_rate": 9.918881225276397e-06,
"loss": 0.9887482452392579,
"step": 4200
},
{
"epoch": 0.07524854150621021,
"grad_norm": 7.125,
"learning_rate": 9.917613348209758e-06,
"loss": 0.8934328460693359,
"step": 4250
},
{
"epoch": 0.0761338184651068,
"grad_norm": 7.25,
"learning_rate": 9.916345471143119e-06,
"loss": 0.9096377563476562,
"step": 4300
},
{
"epoch": 0.0770190954240034,
"grad_norm": 7.96875,
"learning_rate": 9.91507759407648e-06,
"loss": 0.914523696899414,
"step": 4350
},
{
"epoch": 0.07790437238289999,
"grad_norm": 7.5625,
"learning_rate": 9.913809717009839e-06,
"loss": 0.9668045806884765,
"step": 4400
},
{
"epoch": 0.07878964934179658,
"grad_norm": 7.84375,
"learning_rate": 9.9125418399432e-06,
"loss": 0.8644290924072265,
"step": 4450
},
{
"epoch": 0.07967492630069317,
"grad_norm": 6.65625,
"learning_rate": 9.91127396287656e-06,
"loss": 0.9925772094726563,
"step": 4500
},
{
"epoch": 0.08056020325958976,
"grad_norm": 8.6875,
"learning_rate": 9.910006085809921e-06,
"loss": 0.9446270751953125,
"step": 4550
},
{
"epoch": 0.08144548021848635,
"grad_norm": 9.25,
"learning_rate": 9.908738208743282e-06,
"loss": 0.8774137115478515,
"step": 4600
},
{
"epoch": 0.08233075717738295,
"grad_norm": 9.0,
"learning_rate": 9.907470331676643e-06,
"loss": 0.9241300201416016,
"step": 4650
},
{
"epoch": 0.08321603413627954,
"grad_norm": 7.90625,
"learning_rate": 9.906202454610002e-06,
"loss": 0.9137750244140626,
"step": 4700
},
{
"epoch": 0.08410131109517613,
"grad_norm": 8.5,
"learning_rate": 9.904934577543363e-06,
"loss": 0.9445246887207032,
"step": 4750
},
{
"epoch": 0.08498658805407272,
"grad_norm": 8.8125,
"learning_rate": 9.903666700476723e-06,
"loss": 0.9241275024414063,
"step": 4800
},
{
"epoch": 0.08587186501296931,
"grad_norm": 11.4375,
"learning_rate": 9.902398823410082e-06,
"loss": 0.9301995849609375,
"step": 4850
},
{
"epoch": 0.0867571419718659,
"grad_norm": 9.375,
"learning_rate": 9.901130946343443e-06,
"loss": 0.8602587127685547,
"step": 4900
},
{
"epoch": 0.0876424189307625,
"grad_norm": 8.0625,
"learning_rate": 9.899863069276804e-06,
"loss": 0.8998529052734375,
"step": 4950
},
{
"epoch": 0.08852769588965909,
"grad_norm": 7.96875,
"learning_rate": 9.898595192210165e-06,
"loss": 0.9169329833984375,
"step": 5000
},
{
"epoch": 0.08852769588965909,
"eval_cer": 18.002960906275927,
"eval_loss": 0.39146754145622253,
"eval_runtime": 397.3897,
"eval_samples_per_second": 12.582,
"eval_steps_per_second": 1.573,
"eval_wer": 35.30223995090518,
"step": 5000
},
{
"epoch": 0.08941297284855568,
"grad_norm": 16.625,
"learning_rate": 9.897327315143524e-06,
"loss": 0.8970352172851562,
"step": 5050
},
{
"epoch": 0.09029824980745225,
"grad_norm": 7.3125,
"learning_rate": 9.896059438076885e-06,
"loss": 0.9074311065673828,
"step": 5100
},
{
"epoch": 0.09118352676634885,
"grad_norm": 9.3125,
"learning_rate": 9.894791561010246e-06,
"loss": 0.9284700775146484,
"step": 5150
},
{
"epoch": 0.09206880372524544,
"grad_norm": 7.15625,
"learning_rate": 9.893523683943605e-06,
"loss": 0.9229075622558593,
"step": 5200
},
{
"epoch": 0.09295408068414203,
"grad_norm": 7.96875,
"learning_rate": 9.892255806876966e-06,
"loss": 0.9389077758789063,
"step": 5250
},
{
"epoch": 0.09383935764303862,
"grad_norm": 7.46875,
"learning_rate": 9.890987929810326e-06,
"loss": 0.9642659759521485,
"step": 5300
},
{
"epoch": 0.09472463460193521,
"grad_norm": 9.0625,
"learning_rate": 9.889720052743687e-06,
"loss": 0.9125606536865234,
"step": 5350
},
{
"epoch": 0.0956099115608318,
"grad_norm": 7.3125,
"learning_rate": 9.888452175677046e-06,
"loss": 0.8953401947021484,
"step": 5400
},
{
"epoch": 0.0964951885197284,
"grad_norm": 9.8125,
"learning_rate": 9.887184298610407e-06,
"loss": 0.8888931274414062,
"step": 5450
},
{
"epoch": 0.09738046547862499,
"grad_norm": 8.875,
"learning_rate": 9.885916421543768e-06,
"loss": 0.9167032623291016,
"step": 5500
},
{
"epoch": 0.09826574243752158,
"grad_norm": 8.9375,
"learning_rate": 9.884648544477129e-06,
"loss": 0.9441605377197265,
"step": 5550
},
{
"epoch": 0.09915101939641817,
"grad_norm": 8.6875,
"learning_rate": 9.883380667410488e-06,
"loss": 0.9138188171386719,
"step": 5600
},
{
"epoch": 0.10003629635531476,
"grad_norm": 10.125,
"learning_rate": 9.882112790343849e-06,
"loss": 0.8812205505371093,
"step": 5650
},
{
"epoch": 0.10092157331421135,
"grad_norm": 7.1875,
"learning_rate": 9.88084491327721e-06,
"loss": 0.875129623413086,
"step": 5700
},
{
"epoch": 0.10180685027310794,
"grad_norm": 7.40625,
"learning_rate": 9.87957703621057e-06,
"loss": 0.8896215057373047,
"step": 5750
},
{
"epoch": 0.10269212723200453,
"grad_norm": 10.75,
"learning_rate": 9.878309159143931e-06,
"loss": 0.870993881225586,
"step": 5800
},
{
"epoch": 0.10357740419090113,
"grad_norm": 5.8125,
"learning_rate": 9.87704128207729e-06,
"loss": 0.8822001647949219,
"step": 5850
},
{
"epoch": 0.10446268114979772,
"grad_norm": 8.6875,
"learning_rate": 9.875773405010651e-06,
"loss": 0.8593311309814453,
"step": 5900
},
{
"epoch": 0.10534795810869431,
"grad_norm": 10.1875,
"learning_rate": 9.874505527944012e-06,
"loss": 0.8941629028320313,
"step": 5950
},
{
"epoch": 0.1062332350675909,
"grad_norm": 8.75,
"learning_rate": 9.873237650877373e-06,
"loss": 0.9121043395996093,
"step": 6000
},
{
"epoch": 0.10711851202648749,
"grad_norm": 9.1875,
"learning_rate": 9.871969773810732e-06,
"loss": 0.8677694702148437,
"step": 6050
},
{
"epoch": 0.10800378898538408,
"grad_norm": 7.375,
"learning_rate": 9.870701896744092e-06,
"loss": 0.8670549011230468,
"step": 6100
},
{
"epoch": 0.10888906594428067,
"grad_norm": 12.875,
"learning_rate": 9.869434019677453e-06,
"loss": 0.857596435546875,
"step": 6150
},
{
"epoch": 0.10977434290317725,
"grad_norm": 8.3125,
"learning_rate": 9.868166142610814e-06,
"loss": 0.8889055633544922,
"step": 6200
},
{
"epoch": 0.11065961986207384,
"grad_norm": 9.375,
"learning_rate": 9.866898265544173e-06,
"loss": 0.8906202697753907,
"step": 6250
},
{
"epoch": 0.11154489682097044,
"grad_norm": 8.9375,
"learning_rate": 9.865630388477534e-06,
"loss": 0.8508009338378906,
"step": 6300
},
{
"epoch": 0.11243017377986703,
"grad_norm": 9.4375,
"learning_rate": 9.864362511410895e-06,
"loss": 0.8770820617675781,
"step": 6350
},
{
"epoch": 0.11331545073876362,
"grad_norm": 7.65625,
"learning_rate": 9.863094634344254e-06,
"loss": 0.9325962829589843,
"step": 6400
},
{
"epoch": 0.11420072769766021,
"grad_norm": 10.9375,
"learning_rate": 9.861826757277615e-06,
"loss": 0.913088150024414,
"step": 6450
},
{
"epoch": 0.1150860046565568,
"grad_norm": 7.8125,
"learning_rate": 9.860558880210976e-06,
"loss": 0.8288323211669922,
"step": 6500
},
{
"epoch": 0.11597128161545339,
"grad_norm": 9.1875,
"learning_rate": 9.859291003144336e-06,
"loss": 0.8909578704833985,
"step": 6550
},
{
"epoch": 0.11685655857434998,
"grad_norm": 6.84375,
"learning_rate": 9.858023126077696e-06,
"loss": 0.8909761810302734,
"step": 6600
},
{
"epoch": 0.11774183553324657,
"grad_norm": 8.5625,
"learning_rate": 9.856755249011056e-06,
"loss": 0.913955078125,
"step": 6650
},
{
"epoch": 0.11862711249214317,
"grad_norm": 7.375,
"learning_rate": 9.855487371944417e-06,
"loss": 0.890057601928711,
"step": 6700
},
{
"epoch": 0.11951238945103976,
"grad_norm": 7.09375,
"learning_rate": 9.854219494877776e-06,
"loss": 0.8805287170410157,
"step": 6750
},
{
"epoch": 0.12039766640993635,
"grad_norm": 8.0625,
"learning_rate": 9.852951617811137e-06,
"loss": 0.8432673645019532,
"step": 6800
},
{
"epoch": 0.12128294336883294,
"grad_norm": 8.25,
"learning_rate": 9.8516837407445e-06,
"loss": 0.936988525390625,
"step": 6850
},
{
"epoch": 0.12216822032772953,
"grad_norm": 8.5,
"learning_rate": 9.850415863677859e-06,
"loss": 0.8533177185058594,
"step": 6900
},
{
"epoch": 0.12305349728662612,
"grad_norm": 7.5,
"learning_rate": 9.84914798661122e-06,
"loss": 0.8586708068847656,
"step": 6950
},
{
"epoch": 0.12393877424552271,
"grad_norm": 7.96875,
"learning_rate": 9.84788010954458e-06,
"loss": 0.8778302001953125,
"step": 7000
},
{
"epoch": 0.1248240512044193,
"grad_norm": 9.25,
"learning_rate": 9.84661223247794e-06,
"loss": 0.9390164947509766,
"step": 7050
},
{
"epoch": 0.1257093281633159,
"grad_norm": 7.875,
"learning_rate": 9.8453443554113e-06,
"loss": 0.8395907592773437,
"step": 7100
},
{
"epoch": 0.1265946051222125,
"grad_norm": 8.9375,
"learning_rate": 9.844076478344661e-06,
"loss": 0.8765547180175781,
"step": 7150
},
{
"epoch": 0.12747988208110908,
"grad_norm": 8.8125,
"learning_rate": 9.842808601278022e-06,
"loss": 0.9166593170166015,
"step": 7200
},
{
"epoch": 0.12836515904000567,
"grad_norm": 9.6875,
"learning_rate": 9.841540724211381e-06,
"loss": 0.8632067108154297,
"step": 7250
},
{
"epoch": 0.12925043599890226,
"grad_norm": 10.0,
"learning_rate": 9.840272847144742e-06,
"loss": 0.8881147003173828,
"step": 7300
},
{
"epoch": 0.13013571295779885,
"grad_norm": 9.125,
"learning_rate": 9.839004970078103e-06,
"loss": 0.8959363555908203,
"step": 7350
},
{
"epoch": 0.13102098991669545,
"grad_norm": 5.46875,
"learning_rate": 9.837737093011462e-06,
"loss": 0.8682516479492187,
"step": 7400
},
{
"epoch": 0.13190626687559204,
"grad_norm": 8.875,
"learning_rate": 9.836469215944822e-06,
"loss": 0.8162654113769531,
"step": 7450
},
{
"epoch": 0.13279154383448863,
"grad_norm": 8.125,
"learning_rate": 9.835201338878183e-06,
"loss": 0.9132343292236328,
"step": 7500
},
{
"epoch": 0.13367682079338522,
"grad_norm": 8.1875,
"learning_rate": 9.833933461811544e-06,
"loss": 0.9420564270019531,
"step": 7550
},
{
"epoch": 0.1345620977522818,
"grad_norm": 8.125,
"learning_rate": 9.832665584744903e-06,
"loss": 0.9325301361083984,
"step": 7600
},
{
"epoch": 0.1354473747111784,
"grad_norm": 9.0625,
"learning_rate": 9.831397707678264e-06,
"loss": 0.9296858978271484,
"step": 7650
},
{
"epoch": 0.136332651670075,
"grad_norm": 8.25,
"learning_rate": 9.830129830611625e-06,
"loss": 0.9119468688964844,
"step": 7700
},
{
"epoch": 0.13721792862897159,
"grad_norm": 8.4375,
"learning_rate": 9.828861953544984e-06,
"loss": 0.8511313629150391,
"step": 7750
},
{
"epoch": 0.13810320558786818,
"grad_norm": 9.375,
"learning_rate": 9.827594076478345e-06,
"loss": 0.8683940124511719,
"step": 7800
},
{
"epoch": 0.13898848254676477,
"grad_norm": 8.75,
"learning_rate": 9.826326199411706e-06,
"loss": 0.8960696411132812,
"step": 7850
},
{
"epoch": 0.13987375950566133,
"grad_norm": 7.875,
"learning_rate": 9.825058322345066e-06,
"loss": 0.9292098999023437,
"step": 7900
},
{
"epoch": 0.14075903646455792,
"grad_norm": 8.375,
"learning_rate": 9.823790445278425e-06,
"loss": 0.8068239593505859,
"step": 7950
},
{
"epoch": 0.14164431342345452,
"grad_norm": 7.59375,
"learning_rate": 9.822522568211788e-06,
"loss": 0.8778212738037109,
"step": 8000
},
{
"epoch": 0.1425295903823511,
"grad_norm": 8.25,
"learning_rate": 9.821254691145147e-06,
"loss": 0.8837771606445313,
"step": 8050
},
{
"epoch": 0.1434148673412477,
"grad_norm": 5.875,
"learning_rate": 9.819986814078508e-06,
"loss": 0.9285024261474609,
"step": 8100
},
{
"epoch": 0.1443001443001443,
"grad_norm": 7.71875,
"learning_rate": 9.818718937011869e-06,
"loss": 0.9287461853027343,
"step": 8150
},
{
"epoch": 0.14518542125904088,
"grad_norm": 8.0625,
"learning_rate": 9.81745105994523e-06,
"loss": 0.8639019775390625,
"step": 8200
},
{
"epoch": 0.14607069821793747,
"grad_norm": 8.6875,
"learning_rate": 9.816183182878589e-06,
"loss": 0.8503567504882813,
"step": 8250
},
{
"epoch": 0.14695597517683406,
"grad_norm": 7.90625,
"learning_rate": 9.81491530581195e-06,
"loss": 0.8940105438232422,
"step": 8300
},
{
"epoch": 0.14784125213573066,
"grad_norm": 9.375,
"learning_rate": 9.81364742874531e-06,
"loss": 0.8853314208984375,
"step": 8350
},
{
"epoch": 0.14872652909462725,
"grad_norm": 9.0625,
"learning_rate": 9.81237955167867e-06,
"loss": 0.9398179626464844,
"step": 8400
},
{
"epoch": 0.14961180605352384,
"grad_norm": 9.0625,
"learning_rate": 9.81111167461203e-06,
"loss": 0.9009015655517578,
"step": 8450
},
{
"epoch": 0.15049708301242043,
"grad_norm": 9.5625,
"learning_rate": 9.809843797545391e-06,
"loss": 0.8552869415283203,
"step": 8500
},
{
"epoch": 0.15138235997131702,
"grad_norm": 8.75,
"learning_rate": 9.808575920478752e-06,
"loss": 0.8683760070800781,
"step": 8550
},
{
"epoch": 0.1522676369302136,
"grad_norm": 8.875,
"learning_rate": 9.807308043412111e-06,
"loss": 0.8234300994873047,
"step": 8600
},
{
"epoch": 0.1531529138891102,
"grad_norm": 8.875,
"learning_rate": 9.806040166345472e-06,
"loss": 0.7992705535888672,
"step": 8650
},
{
"epoch": 0.1540381908480068,
"grad_norm": 7.71875,
"learning_rate": 9.804772289278833e-06,
"loss": 0.8522439575195313,
"step": 8700
},
{
"epoch": 0.1549234678069034,
"grad_norm": 10.25,
"learning_rate": 9.803504412212193e-06,
"loss": 0.8569031524658203,
"step": 8750
},
{
"epoch": 0.15580874476579998,
"grad_norm": 8.25,
"learning_rate": 9.802236535145552e-06,
"loss": 0.9121205139160157,
"step": 8800
},
{
"epoch": 0.15669402172469657,
"grad_norm": 8.9375,
"learning_rate": 9.800968658078913e-06,
"loss": 0.8695069122314453,
"step": 8850
},
{
"epoch": 0.15757929868359316,
"grad_norm": 7.875,
"learning_rate": 9.799700781012274e-06,
"loss": 0.8624824523925781,
"step": 8900
},
{
"epoch": 0.15846457564248975,
"grad_norm": 7.21875,
"learning_rate": 9.798432903945633e-06,
"loss": 0.8402172088623047,
"step": 8950
},
{
"epoch": 0.15934985260138634,
"grad_norm": 7.8125,
"learning_rate": 9.797165026878994e-06,
"loss": 0.8360052490234375,
"step": 9000
},
{
"epoch": 0.16023512956028294,
"grad_norm": 10.9375,
"learning_rate": 9.795897149812355e-06,
"loss": 0.9017723083496094,
"step": 9050
},
{
"epoch": 0.16112040651917953,
"grad_norm": 8.4375,
"learning_rate": 9.794629272745716e-06,
"loss": 0.8305178833007812,
"step": 9100
},
{
"epoch": 0.16200568347807612,
"grad_norm": 8.6875,
"learning_rate": 9.793361395679076e-06,
"loss": 0.8787906646728516,
"step": 9150
},
{
"epoch": 0.1628909604369727,
"grad_norm": 8.625,
"learning_rate": 9.792093518612437e-06,
"loss": 0.9024432373046875,
"step": 9200
},
{
"epoch": 0.1637762373958693,
"grad_norm": 10.125,
"learning_rate": 9.790825641545796e-06,
"loss": 0.8455127716064453,
"step": 9250
},
{
"epoch": 0.1646615143547659,
"grad_norm": 10.9375,
"learning_rate": 9.789557764479157e-06,
"loss": 0.8886133575439453,
"step": 9300
},
{
"epoch": 0.16554679131366248,
"grad_norm": 8.8125,
"learning_rate": 9.788289887412518e-06,
"loss": 0.9232273101806641,
"step": 9350
},
{
"epoch": 0.16643206827255907,
"grad_norm": 9.5625,
"learning_rate": 9.787022010345879e-06,
"loss": 0.7960693359375,
"step": 9400
},
{
"epoch": 0.16731734523145567,
"grad_norm": 9.875,
"learning_rate": 9.785754133279238e-06,
"loss": 0.9040877532958984,
"step": 9450
},
{
"epoch": 0.16820262219035226,
"grad_norm": 8.375,
"learning_rate": 9.784486256212599e-06,
"loss": 0.8423170471191406,
"step": 9500
},
{
"epoch": 0.16908789914924885,
"grad_norm": 10.75,
"learning_rate": 9.78321837914596e-06,
"loss": 0.7995271301269531,
"step": 9550
},
{
"epoch": 0.16997317610814544,
"grad_norm": 6.78125,
"learning_rate": 9.781950502079319e-06,
"loss": 0.8801698303222656,
"step": 9600
},
{
"epoch": 0.17085845306704203,
"grad_norm": 6.84375,
"learning_rate": 9.78068262501268e-06,
"loss": 0.902987060546875,
"step": 9650
},
{
"epoch": 0.17174373002593862,
"grad_norm": 8.3125,
"learning_rate": 9.77941474794604e-06,
"loss": 0.9009125518798828,
"step": 9700
},
{
"epoch": 0.17262900698483521,
"grad_norm": 10.4375,
"learning_rate": 9.778146870879401e-06,
"loss": 0.8579206085205078,
"step": 9750
},
{
"epoch": 0.1735142839437318,
"grad_norm": 7.4375,
"learning_rate": 9.77687899381276e-06,
"loss": 0.8464696502685547,
"step": 9800
},
{
"epoch": 0.1743995609026284,
"grad_norm": 11.625,
"learning_rate": 9.775611116746121e-06,
"loss": 0.85698974609375,
"step": 9850
},
{
"epoch": 0.175284837861525,
"grad_norm": 6.5625,
"learning_rate": 9.774343239679482e-06,
"loss": 0.8342364501953125,
"step": 9900
},
{
"epoch": 0.17617011482042158,
"grad_norm": 8.4375,
"learning_rate": 9.773075362612841e-06,
"loss": 0.8839446258544922,
"step": 9950
},
{
"epoch": 0.17705539177931817,
"grad_norm": 8.75,
"learning_rate": 9.771807485546202e-06,
"loss": 0.8295965576171875,
"step": 10000
},
{
"epoch": 0.17705539177931817,
"eval_cer": 16.53660732200669,
"eval_loss": 0.36992114782333374,
"eval_runtime": 390.2086,
"eval_samples_per_second": 12.814,
"eval_steps_per_second": 1.602,
"eval_wer": 33.00347754935052,
"step": 10000
},
{
"epoch": 0.17794066873821476,
"grad_norm": 10.0,
"learning_rate": 9.770539608479563e-06,
"loss": 0.9520655059814453,
"step": 10050
},
{
"epoch": 0.17882594569711135,
"grad_norm": 7.5,
"learning_rate": 9.769271731412923e-06,
"loss": 0.9026583099365234,
"step": 10100
},
{
"epoch": 0.17971122265600792,
"grad_norm": 6.8125,
"learning_rate": 9.768003854346282e-06,
"loss": 0.8356916046142578,
"step": 10150
},
{
"epoch": 0.1805964996149045,
"grad_norm": 10.125,
"learning_rate": 9.766735977279643e-06,
"loss": 0.8295375823974609,
"step": 10200
},
{
"epoch": 0.1814817765738011,
"grad_norm": 8.8125,
"learning_rate": 9.765468100213004e-06,
"loss": 0.8467240905761719,
"step": 10250
},
{
"epoch": 0.1823670535326977,
"grad_norm": 7.9375,
"learning_rate": 9.764200223146365e-06,
"loss": 0.8381356811523437,
"step": 10300
},
{
"epoch": 0.18325233049159428,
"grad_norm": 8.6875,
"learning_rate": 9.762932346079726e-06,
"loss": 0.8656709289550781,
"step": 10350
},
{
"epoch": 0.18413760745049088,
"grad_norm": 8.875,
"learning_rate": 9.761664469013086e-06,
"loss": 0.874046401977539,
"step": 10400
},
{
"epoch": 0.18502288440938747,
"grad_norm": 7.78125,
"learning_rate": 9.760396591946446e-06,
"loss": 0.8614305877685546,
"step": 10450
},
{
"epoch": 0.18590816136828406,
"grad_norm": 9.125,
"learning_rate": 9.759128714879806e-06,
"loss": 0.8775393676757812,
"step": 10500
},
{
"epoch": 0.18679343832718065,
"grad_norm": 7.625,
"learning_rate": 9.757860837813167e-06,
"loss": 0.8610476684570313,
"step": 10550
},
{
"epoch": 0.18767871528607724,
"grad_norm": 7.71875,
"learning_rate": 9.756592960746526e-06,
"loss": 0.9277496337890625,
"step": 10600
},
{
"epoch": 0.18856399224497383,
"grad_norm": 8.5,
"learning_rate": 9.755325083679887e-06,
"loss": 0.8972523498535157,
"step": 10650
},
{
"epoch": 0.18944926920387042,
"grad_norm": 8.0,
"learning_rate": 9.754057206613248e-06,
"loss": 0.854305648803711,
"step": 10700
},
{
"epoch": 0.19033454616276702,
"grad_norm": 5.65625,
"learning_rate": 9.752789329546609e-06,
"loss": 0.8421508026123047,
"step": 10750
},
{
"epoch": 0.1912198231216636,
"grad_norm": 6.625,
"learning_rate": 9.751521452479968e-06,
"loss": 0.8855830383300781,
"step": 10800
},
{
"epoch": 0.1921051000805602,
"grad_norm": 11.0625,
"learning_rate": 9.750253575413329e-06,
"loss": 0.8550155639648438,
"step": 10850
},
{
"epoch": 0.1929903770394568,
"grad_norm": 8.5,
"learning_rate": 9.74898569834669e-06,
"loss": 0.8865677642822266,
"step": 10900
},
{
"epoch": 0.19387565399835338,
"grad_norm": 6.9375,
"learning_rate": 9.747717821280049e-06,
"loss": 0.8427695465087891,
"step": 10950
},
{
"epoch": 0.19476093095724997,
"grad_norm": 7.9375,
"learning_rate": 9.74644994421341e-06,
"loss": 0.8303961181640624,
"step": 11000
},
{
"epoch": 0.19564620791614656,
"grad_norm": 9.25,
"learning_rate": 9.74518206714677e-06,
"loss": 0.8993209838867188,
"step": 11050
},
{
"epoch": 0.19653148487504316,
"grad_norm": 8.8125,
"learning_rate": 9.743914190080131e-06,
"loss": 0.8488899993896485,
"step": 11100
},
{
"epoch": 0.19741676183393975,
"grad_norm": 6.03125,
"learning_rate": 9.74264631301349e-06,
"loss": 0.852935791015625,
"step": 11150
},
{
"epoch": 0.19830203879283634,
"grad_norm": 7.84375,
"learning_rate": 9.741378435946851e-06,
"loss": 0.8230840301513672,
"step": 11200
},
{
"epoch": 0.19918731575173293,
"grad_norm": 6.8125,
"learning_rate": 9.740110558880212e-06,
"loss": 0.8860896301269531,
"step": 11250
},
{
"epoch": 0.20007259271062952,
"grad_norm": 7.5,
"learning_rate": 9.738842681813573e-06,
"loss": 0.8350762939453125,
"step": 11300
},
{
"epoch": 0.2009578696695261,
"grad_norm": 7.75,
"learning_rate": 9.737574804746932e-06,
"loss": 0.835966796875,
"step": 11350
},
{
"epoch": 0.2018431466284227,
"grad_norm": 12.3125,
"learning_rate": 9.736306927680292e-06,
"loss": 0.85518798828125,
"step": 11400
},
{
"epoch": 0.2027284235873193,
"grad_norm": 6.84375,
"learning_rate": 9.735039050613653e-06,
"loss": 0.8386080169677734,
"step": 11450
},
{
"epoch": 0.2036137005462159,
"grad_norm": 6.1875,
"learning_rate": 9.733771173547014e-06,
"loss": 0.8899297332763672,
"step": 11500
},
{
"epoch": 0.20449897750511248,
"grad_norm": 6.8125,
"learning_rate": 9.732503296480375e-06,
"loss": 0.8508304595947266,
"step": 11550
},
{
"epoch": 0.20538425446400907,
"grad_norm": 8.1875,
"learning_rate": 9.731235419413734e-06,
"loss": 0.8747320556640625,
"step": 11600
},
{
"epoch": 0.20626953142290566,
"grad_norm": 9.5,
"learning_rate": 9.729967542347095e-06,
"loss": 0.8832579803466797,
"step": 11650
},
{
"epoch": 0.20715480838180225,
"grad_norm": 7.0,
"learning_rate": 9.728699665280456e-06,
"loss": 0.8430067443847656,
"step": 11700
},
{
"epoch": 0.20804008534069884,
"grad_norm": 8.625,
"learning_rate": 9.727431788213816e-06,
"loss": 0.9135202026367187,
"step": 11750
},
{
"epoch": 0.20892536229959544,
"grad_norm": 11.5,
"learning_rate": 9.726163911147176e-06,
"loss": 0.8933136749267578,
"step": 11800
},
{
"epoch": 0.20981063925849203,
"grad_norm": 9.25,
"learning_rate": 9.724896034080536e-06,
"loss": 0.8763120269775391,
"step": 11850
},
{
"epoch": 0.21069591621738862,
"grad_norm": 9.875,
"learning_rate": 9.723628157013897e-06,
"loss": 0.9074213409423828,
"step": 11900
},
{
"epoch": 0.2115811931762852,
"grad_norm": 8.0,
"learning_rate": 9.722360279947258e-06,
"loss": 0.8514747619628906,
"step": 11950
},
{
"epoch": 0.2124664701351818,
"grad_norm": 8.5,
"learning_rate": 9.721092402880617e-06,
"loss": 0.8526225280761719,
"step": 12000
},
{
"epoch": 0.2133517470940784,
"grad_norm": 8.5,
"learning_rate": 9.719824525813978e-06,
"loss": 0.8410261535644531,
"step": 12050
},
{
"epoch": 0.21423702405297498,
"grad_norm": 9.1875,
"learning_rate": 9.718556648747339e-06,
"loss": 0.8436747741699219,
"step": 12100
},
{
"epoch": 0.21512230101187158,
"grad_norm": 7.21875,
"learning_rate": 9.717288771680698e-06,
"loss": 0.8132114410400391,
"step": 12150
},
{
"epoch": 0.21600757797076817,
"grad_norm": 10.0625,
"learning_rate": 9.716020894614059e-06,
"loss": 0.8950498962402343,
"step": 12200
},
{
"epoch": 0.21689285492966476,
"grad_norm": 9.375,
"learning_rate": 9.71475301754742e-06,
"loss": 0.8579686737060547,
"step": 12250
},
{
"epoch": 0.21777813188856135,
"grad_norm": 7.5625,
"learning_rate": 9.71348514048078e-06,
"loss": 0.9161724853515625,
"step": 12300
},
{
"epoch": 0.21866340884745794,
"grad_norm": 6.65625,
"learning_rate": 9.71221726341414e-06,
"loss": 0.7839602661132813,
"step": 12350
},
{
"epoch": 0.2195486858063545,
"grad_norm": 7.78125,
"learning_rate": 9.7109493863475e-06,
"loss": 0.8397283935546875,
"step": 12400
},
{
"epoch": 0.2204339627652511,
"grad_norm": 9.0625,
"learning_rate": 9.709681509280861e-06,
"loss": 0.8791749572753906,
"step": 12450
},
{
"epoch": 0.2213192397241477,
"grad_norm": 8.1875,
"learning_rate": 9.70841363221422e-06,
"loss": 0.8308121490478516,
"step": 12500
},
{
"epoch": 0.22220451668304428,
"grad_norm": 6.34375,
"learning_rate": 9.707145755147581e-06,
"loss": 0.8770150756835937,
"step": 12550
},
{
"epoch": 0.22308979364194087,
"grad_norm": 9.4375,
"learning_rate": 9.705877878080943e-06,
"loss": 0.8016796875,
"step": 12600
},
{
"epoch": 0.22397507060083746,
"grad_norm": 8.25,
"learning_rate": 9.704610001014303e-06,
"loss": 0.880452880859375,
"step": 12650
},
{
"epoch": 0.22486034755973405,
"grad_norm": 8.875,
"learning_rate": 9.703342123947663e-06,
"loss": 0.8713301849365235,
"step": 12700
},
{
"epoch": 0.22574562451863064,
"grad_norm": 8.4375,
"learning_rate": 9.702074246881024e-06,
"loss": 0.8404985046386719,
"step": 12750
},
{
"epoch": 0.22663090147752724,
"grad_norm": 9.5,
"learning_rate": 9.700806369814383e-06,
"loss": 0.8488478088378906,
"step": 12800
},
{
"epoch": 0.22751617843642383,
"grad_norm": 6.15625,
"learning_rate": 9.699538492747744e-06,
"loss": 0.8541165161132812,
"step": 12850
},
{
"epoch": 0.22840145539532042,
"grad_norm": 7.9375,
"learning_rate": 9.698270615681105e-06,
"loss": 0.8624703216552735,
"step": 12900
},
{
"epoch": 0.229286732354217,
"grad_norm": 7.6875,
"learning_rate": 9.697002738614466e-06,
"loss": 0.8818684387207031,
"step": 12950
},
{
"epoch": 0.2301720093131136,
"grad_norm": 9.125,
"learning_rate": 9.695734861547825e-06,
"loss": 0.8864445495605469,
"step": 13000
},
{
"epoch": 0.2310572862720102,
"grad_norm": 8.125,
"learning_rate": 9.694466984481186e-06,
"loss": 0.8283074951171875,
"step": 13050
},
{
"epoch": 0.23194256323090678,
"grad_norm": 7.6875,
"learning_rate": 9.693199107414546e-06,
"loss": 0.8552584075927734,
"step": 13100
},
{
"epoch": 0.23282784018980338,
"grad_norm": 9.5,
"learning_rate": 9.691931230347906e-06,
"loss": 0.8421883392333984,
"step": 13150
},
{
"epoch": 0.23371311714869997,
"grad_norm": 7.0625,
"learning_rate": 9.690663353281266e-06,
"loss": 0.8718794250488281,
"step": 13200
},
{
"epoch": 0.23459839410759656,
"grad_norm": 6.8125,
"learning_rate": 9.689395476214627e-06,
"loss": 0.8517426300048828,
"step": 13250
},
{
"epoch": 0.23548367106649315,
"grad_norm": 9.4375,
"learning_rate": 9.688127599147988e-06,
"loss": 0.8756562042236328,
"step": 13300
},
{
"epoch": 0.23636894802538974,
"grad_norm": 9.625,
"learning_rate": 9.686859722081347e-06,
"loss": 0.8349308776855469,
"step": 13350
},
{
"epoch": 0.23725422498428633,
"grad_norm": 7.75,
"learning_rate": 9.685591845014708e-06,
"loss": 0.8565451049804688,
"step": 13400
},
{
"epoch": 0.23813950194318292,
"grad_norm": 9.1875,
"learning_rate": 9.684323967948069e-06,
"loss": 0.8808267974853515,
"step": 13450
},
{
"epoch": 0.23902477890207952,
"grad_norm": 13.9375,
"learning_rate": 9.683056090881428e-06,
"loss": 0.7940772247314453,
"step": 13500
},
{
"epoch": 0.2399100558609761,
"grad_norm": 7.0625,
"learning_rate": 9.681788213814789e-06,
"loss": 0.8729141998291016,
"step": 13550
},
{
"epoch": 0.2407953328198727,
"grad_norm": 9.5625,
"learning_rate": 9.68052033674815e-06,
"loss": 0.8781705474853516,
"step": 13600
},
{
"epoch": 0.2416806097787693,
"grad_norm": 6.40625,
"learning_rate": 9.67925245968151e-06,
"loss": 0.895041732788086,
"step": 13650
},
{
"epoch": 0.24256588673766588,
"grad_norm": 8.1875,
"learning_rate": 9.67798458261487e-06,
"loss": 0.8775433349609375,
"step": 13700
},
{
"epoch": 0.24345116369656247,
"grad_norm": 8.125,
"learning_rate": 9.67671670554823e-06,
"loss": 0.79046630859375,
"step": 13750
},
{
"epoch": 0.24433644065545906,
"grad_norm": 6.21875,
"learning_rate": 9.675448828481591e-06,
"loss": 0.8892935180664062,
"step": 13800
},
{
"epoch": 0.24522171761435566,
"grad_norm": 7.6875,
"learning_rate": 9.674180951414952e-06,
"loss": 0.8445626068115234,
"step": 13850
},
{
"epoch": 0.24610699457325225,
"grad_norm": 7.84375,
"learning_rate": 9.672913074348313e-06,
"loss": 0.8559996795654297,
"step": 13900
},
{
"epoch": 0.24699227153214884,
"grad_norm": 10.25,
"learning_rate": 9.671645197281673e-06,
"loss": 0.889148941040039,
"step": 13950
},
{
"epoch": 0.24787754849104543,
"grad_norm": 9.9375,
"learning_rate": 9.670377320215033e-06,
"loss": 0.81448486328125,
"step": 14000
},
{
"epoch": 0.24876282544994202,
"grad_norm": 7.5625,
"learning_rate": 9.669109443148393e-06,
"loss": 0.8375322723388672,
"step": 14050
},
{
"epoch": 0.2496481024088386,
"grad_norm": 9.1875,
"learning_rate": 9.667841566081754e-06,
"loss": 0.7957274627685547,
"step": 14100
},
{
"epoch": 0.2505333793677352,
"grad_norm": 8.4375,
"learning_rate": 9.666573689015113e-06,
"loss": 0.8567101287841797,
"step": 14150
},
{
"epoch": 0.2514186563266318,
"grad_norm": 9.5625,
"learning_rate": 9.665305811948474e-06,
"loss": 0.8208657073974609,
"step": 14200
},
{
"epoch": 0.25230393328552836,
"grad_norm": 6.9375,
"learning_rate": 9.664037934881835e-06,
"loss": 0.8073037719726562,
"step": 14250
},
{
"epoch": 0.253189210244425,
"grad_norm": 8.125,
"learning_rate": 9.662770057815196e-06,
"loss": 0.8132960510253906,
"step": 14300
},
{
"epoch": 0.25407448720332154,
"grad_norm": 8.125,
"learning_rate": 9.661502180748555e-06,
"loss": 0.839927749633789,
"step": 14350
},
{
"epoch": 0.25495976416221816,
"grad_norm": 8.0,
"learning_rate": 9.660234303681916e-06,
"loss": 0.8778898620605469,
"step": 14400
},
{
"epoch": 0.2558450411211147,
"grad_norm": 8.3125,
"learning_rate": 9.658966426615276e-06,
"loss": 0.833895492553711,
"step": 14450
},
{
"epoch": 0.25673031808001134,
"grad_norm": 7.9375,
"learning_rate": 9.657698549548637e-06,
"loss": 0.8810472869873047,
"step": 14500
},
{
"epoch": 0.2576155950389079,
"grad_norm": 7.46875,
"learning_rate": 9.656430672481996e-06,
"loss": 0.7839117431640625,
"step": 14550
},
{
"epoch": 0.2585008719978045,
"grad_norm": 8.5,
"learning_rate": 9.655162795415357e-06,
"loss": 0.9003073120117188,
"step": 14600
},
{
"epoch": 0.2593861489567011,
"grad_norm": 8.375,
"learning_rate": 9.653894918348718e-06,
"loss": 0.8191262054443359,
"step": 14650
},
{
"epoch": 0.2602714259155977,
"grad_norm": 9.0625,
"learning_rate": 9.652627041282077e-06,
"loss": 0.8235029602050781,
"step": 14700
},
{
"epoch": 0.2611567028744943,
"grad_norm": 7.75,
"learning_rate": 9.651359164215438e-06,
"loss": 0.8247006225585938,
"step": 14750
},
{
"epoch": 0.2620419798333909,
"grad_norm": 6.875,
"learning_rate": 9.650091287148799e-06,
"loss": 0.8273910522460938,
"step": 14800
},
{
"epoch": 0.26292725679228746,
"grad_norm": 8.25,
"learning_rate": 9.64882341008216e-06,
"loss": 0.7925537109375,
"step": 14850
},
{
"epoch": 0.2638125337511841,
"grad_norm": 6.78125,
"learning_rate": 9.647555533015519e-06,
"loss": 0.8129417419433593,
"step": 14900
},
{
"epoch": 0.26469781071008064,
"grad_norm": 8.4375,
"learning_rate": 9.646287655948881e-06,
"loss": 0.7914369964599609,
"step": 14950
},
{
"epoch": 0.26558308766897726,
"grad_norm": 7.53125,
"learning_rate": 9.64501977888224e-06,
"loss": 0.8484162139892578,
"step": 15000
},
{
"epoch": 0.26558308766897726,
"eval_cer": 16.190625965955,
"eval_loss": 0.3607212007045746,
"eval_runtime": 393.1507,
"eval_samples_per_second": 12.718,
"eval_steps_per_second": 1.59,
"eval_wer": 32.338651938222355,
"step": 15000
},
{
"epoch": 0.2664683646278738,
"grad_norm": 7.75,
"learning_rate": 9.643751901815601e-06,
"loss": 0.9018843078613281,
"step": 15050
},
{
"epoch": 0.26735364158677044,
"grad_norm": 9.5625,
"learning_rate": 9.642484024748962e-06,
"loss": 0.8514089965820313,
"step": 15100
},
{
"epoch": 0.268238918545667,
"grad_norm": 8.0,
"learning_rate": 9.641216147682323e-06,
"loss": 0.832979507446289,
"step": 15150
},
{
"epoch": 0.2691241955045636,
"grad_norm": 10.0,
"learning_rate": 9.639948270615682e-06,
"loss": 0.843365707397461,
"step": 15200
},
{
"epoch": 0.2700094724634602,
"grad_norm": 7.78125,
"learning_rate": 9.638680393549043e-06,
"loss": 0.8681787109375,
"step": 15250
},
{
"epoch": 0.2708947494223568,
"grad_norm": 9.25,
"learning_rate": 9.637412516482403e-06,
"loss": 0.8352089691162109,
"step": 15300
},
{
"epoch": 0.27178002638125337,
"grad_norm": 6.59375,
"learning_rate": 9.636144639415763e-06,
"loss": 0.890997543334961,
"step": 15350
},
{
"epoch": 0.27266530334015,
"grad_norm": 10.1875,
"learning_rate": 9.634876762349123e-06,
"loss": 0.8376169586181641,
"step": 15400
},
{
"epoch": 0.27355058029904655,
"grad_norm": 9.75,
"learning_rate": 9.633608885282484e-06,
"loss": 0.8699169921875,
"step": 15450
},
{
"epoch": 0.27443585725794317,
"grad_norm": 8.625,
"learning_rate": 9.632341008215845e-06,
"loss": 0.8817887115478515,
"step": 15500
},
{
"epoch": 0.27532113421683974,
"grad_norm": 8.625,
"learning_rate": 9.631073131149204e-06,
"loss": 0.8531747436523438,
"step": 15550
},
{
"epoch": 0.27620641117573635,
"grad_norm": 7.09375,
"learning_rate": 9.629805254082565e-06,
"loss": 0.78208740234375,
"step": 15600
},
{
"epoch": 0.2770916881346329,
"grad_norm": 7.28125,
"learning_rate": 9.628537377015926e-06,
"loss": 0.7966637420654297,
"step": 15650
},
{
"epoch": 0.27797696509352954,
"grad_norm": 6.0,
"learning_rate": 9.627269499949285e-06,
"loss": 0.8817159271240235,
"step": 15700
},
{
"epoch": 0.2788622420524261,
"grad_norm": 9.25,
"learning_rate": 9.626001622882646e-06,
"loss": 0.8878803253173828,
"step": 15750
},
{
"epoch": 0.27974751901132267,
"grad_norm": 8.0,
"learning_rate": 9.624733745816006e-06,
"loss": 0.803402328491211,
"step": 15800
},
{
"epoch": 0.2806327959702193,
"grad_norm": 11.5625,
"learning_rate": 9.623465868749367e-06,
"loss": 0.8275116729736328,
"step": 15850
},
{
"epoch": 0.28151807292911585,
"grad_norm": 10.5625,
"learning_rate": 9.622197991682726e-06,
"loss": 0.887125244140625,
"step": 15900
},
{
"epoch": 0.28240334988801247,
"grad_norm": 7.84375,
"learning_rate": 9.620930114616087e-06,
"loss": 0.9281369781494141,
"step": 15950
},
{
"epoch": 0.28328862684690903,
"grad_norm": 8.125,
"learning_rate": 9.619662237549448e-06,
"loss": 0.7792628479003906,
"step": 16000
},
{
"epoch": 0.28417390380580565,
"grad_norm": 7.84375,
"learning_rate": 9.618394360482807e-06,
"loss": 0.797691650390625,
"step": 16050
},
{
"epoch": 0.2850591807647022,
"grad_norm": 7.34375,
"learning_rate": 9.61712648341617e-06,
"loss": 0.8892618560791016,
"step": 16100
},
{
"epoch": 0.28594445772359883,
"grad_norm": 10.875,
"learning_rate": 9.61585860634953e-06,
"loss": 0.8775372314453125,
"step": 16150
},
{
"epoch": 0.2868297346824954,
"grad_norm": 7.21875,
"learning_rate": 9.61459072928289e-06,
"loss": 0.8037387084960937,
"step": 16200
},
{
"epoch": 0.287715011641392,
"grad_norm": 11.25,
"learning_rate": 9.61332285221625e-06,
"loss": 0.8707780456542968,
"step": 16250
},
{
"epoch": 0.2886002886002886,
"grad_norm": 8.25,
"learning_rate": 9.612054975149611e-06,
"loss": 0.8491946411132812,
"step": 16300
},
{
"epoch": 0.2894855655591852,
"grad_norm": 8.4375,
"learning_rate": 9.61078709808297e-06,
"loss": 0.8137165069580078,
"step": 16350
},
{
"epoch": 0.29037084251808176,
"grad_norm": 13.0625,
"learning_rate": 9.609519221016331e-06,
"loss": 0.8208762359619141,
"step": 16400
},
{
"epoch": 0.2912561194769784,
"grad_norm": 8.75,
"learning_rate": 9.608251343949692e-06,
"loss": 0.8479267120361328,
"step": 16450
},
{
"epoch": 0.29214139643587494,
"grad_norm": 9.25,
"learning_rate": 9.606983466883053e-06,
"loss": 0.8337993621826172,
"step": 16500
},
{
"epoch": 0.29302667339477156,
"grad_norm": 8.75,
"learning_rate": 9.605715589816412e-06,
"loss": 0.8054754638671875,
"step": 16550
},
{
"epoch": 0.2939119503536681,
"grad_norm": 6.53125,
"learning_rate": 9.604447712749773e-06,
"loss": 0.7765091705322266,
"step": 16600
},
{
"epoch": 0.29479722731256475,
"grad_norm": 7.625,
"learning_rate": 9.603179835683133e-06,
"loss": 0.8374893188476562,
"step": 16650
},
{
"epoch": 0.2956825042714613,
"grad_norm": 9.625,
"learning_rate": 9.601911958616492e-06,
"loss": 0.8807014465332031,
"step": 16700
},
{
"epoch": 0.29656778123035793,
"grad_norm": 7.375,
"learning_rate": 9.600644081549853e-06,
"loss": 0.7978987884521485,
"step": 16750
},
{
"epoch": 0.2974530581892545,
"grad_norm": 9.75,
"learning_rate": 9.599376204483214e-06,
"loss": 0.8720764923095703,
"step": 16800
},
{
"epoch": 0.2983383351481511,
"grad_norm": 9.5625,
"learning_rate": 9.598108327416575e-06,
"loss": 0.8416900634765625,
"step": 16850
},
{
"epoch": 0.2992236121070477,
"grad_norm": 9.0625,
"learning_rate": 9.596840450349934e-06,
"loss": 0.8841259002685546,
"step": 16900
},
{
"epoch": 0.3001088890659443,
"grad_norm": 8.4375,
"learning_rate": 9.595572573283295e-06,
"loss": 0.8410957336425782,
"step": 16950
},
{
"epoch": 0.30099416602484086,
"grad_norm": 10.75,
"learning_rate": 9.594304696216656e-06,
"loss": 0.8281800079345704,
"step": 17000
},
{
"epoch": 0.3018794429837375,
"grad_norm": 9.1875,
"learning_rate": 9.593036819150016e-06,
"loss": 0.8017253875732422,
"step": 17050
},
{
"epoch": 0.30276471994263404,
"grad_norm": 10.75,
"learning_rate": 9.591768942083376e-06,
"loss": 0.7658233642578125,
"step": 17100
},
{
"epoch": 0.30364999690153066,
"grad_norm": 9.8125,
"learning_rate": 9.590501065016736e-06,
"loss": 0.8435968780517578,
"step": 17150
},
{
"epoch": 0.3045352738604272,
"grad_norm": 10.9375,
"learning_rate": 9.589233187950097e-06,
"loss": 0.8984470367431641,
"step": 17200
},
{
"epoch": 0.30542055081932384,
"grad_norm": 7.75,
"learning_rate": 9.587965310883458e-06,
"loss": 0.8200258636474609,
"step": 17250
},
{
"epoch": 0.3063058277782204,
"grad_norm": 7.09375,
"learning_rate": 9.586697433816819e-06,
"loss": 0.8967515563964844,
"step": 17300
},
{
"epoch": 0.307191104737117,
"grad_norm": 7.40625,
"learning_rate": 9.585429556750178e-06,
"loss": 0.81177734375,
"step": 17350
},
{
"epoch": 0.3080763816960136,
"grad_norm": 9.3125,
"learning_rate": 9.584161679683539e-06,
"loss": 0.8331631469726563,
"step": 17400
},
{
"epoch": 0.3089616586549102,
"grad_norm": 7.6875,
"learning_rate": 9.5828938026169e-06,
"loss": 0.834053726196289,
"step": 17450
},
{
"epoch": 0.3098469356138068,
"grad_norm": 7.625,
"learning_rate": 9.58162592555026e-06,
"loss": 0.7964566040039063,
"step": 17500
},
{
"epoch": 0.3107322125727034,
"grad_norm": 9.0,
"learning_rate": 9.58035804848362e-06,
"loss": 0.8436365509033203,
"step": 17550
},
{
"epoch": 0.31161748953159996,
"grad_norm": 8.625,
"learning_rate": 9.57909017141698e-06,
"loss": 0.8416962432861328,
"step": 17600
},
{
"epoch": 0.3125027664904966,
"grad_norm": 9.3125,
"learning_rate": 9.577822294350341e-06,
"loss": 0.7992512512207032,
"step": 17650
},
{
"epoch": 0.31338804344939314,
"grad_norm": 7.25,
"learning_rate": 9.576554417283702e-06,
"loss": 0.7787315368652343,
"step": 17700
},
{
"epoch": 0.31427332040828976,
"grad_norm": 10.25,
"learning_rate": 9.575286540217061e-06,
"loss": 0.8611086273193359,
"step": 17750
},
{
"epoch": 0.3151585973671863,
"grad_norm": 7.625,
"learning_rate": 9.574018663150422e-06,
"loss": 0.8016007995605469,
"step": 17800
},
{
"epoch": 0.31604387432608294,
"grad_norm": 7.0625,
"learning_rate": 9.572750786083783e-06,
"loss": 0.8051242828369141,
"step": 17850
},
{
"epoch": 0.3169291512849795,
"grad_norm": 7.96875,
"learning_rate": 9.571482909017142e-06,
"loss": 0.8546369171142578,
"step": 17900
},
{
"epoch": 0.3178144282438761,
"grad_norm": 9.75,
"learning_rate": 9.570215031950503e-06,
"loss": 0.8925285339355469,
"step": 17950
},
{
"epoch": 0.3186997052027727,
"grad_norm": 7.4375,
"learning_rate": 9.568947154883863e-06,
"loss": 0.852982177734375,
"step": 18000
},
{
"epoch": 0.31958498216166925,
"grad_norm": 7.5625,
"learning_rate": 9.567679277817224e-06,
"loss": 0.8864115142822265,
"step": 18050
},
{
"epoch": 0.32047025912056587,
"grad_norm": 8.625,
"learning_rate": 9.566411400750583e-06,
"loss": 0.7933511352539062,
"step": 18100
},
{
"epoch": 0.32135553607946243,
"grad_norm": 6.71875,
"learning_rate": 9.565143523683944e-06,
"loss": 0.816483154296875,
"step": 18150
},
{
"epoch": 0.32224081303835905,
"grad_norm": 8.9375,
"learning_rate": 9.563875646617305e-06,
"loss": 0.8137828063964844,
"step": 18200
},
{
"epoch": 0.3231260899972556,
"grad_norm": 7.5,
"learning_rate": 9.562607769550664e-06,
"loss": 0.8930496978759765,
"step": 18250
},
{
"epoch": 0.32401136695615224,
"grad_norm": 7.875,
"learning_rate": 9.561339892484025e-06,
"loss": 0.8350536346435546,
"step": 18300
},
{
"epoch": 0.3248966439150488,
"grad_norm": 7.5625,
"learning_rate": 9.560072015417386e-06,
"loss": 0.7733663940429687,
"step": 18350
},
{
"epoch": 0.3257819208739454,
"grad_norm": 9.9375,
"learning_rate": 9.558804138350746e-06,
"loss": 0.8673963165283203,
"step": 18400
},
{
"epoch": 0.326667197832842,
"grad_norm": 8.5,
"learning_rate": 9.557536261284107e-06,
"loss": 0.8465771484375,
"step": 18450
},
{
"epoch": 0.3275524747917386,
"grad_norm": 8.9375,
"learning_rate": 9.556268384217468e-06,
"loss": 0.8279172515869141,
"step": 18500
},
{
"epoch": 0.32843775175063517,
"grad_norm": 9.4375,
"learning_rate": 9.555000507150827e-06,
"loss": 0.8542655181884765,
"step": 18550
},
{
"epoch": 0.3293230287095318,
"grad_norm": 7.09375,
"learning_rate": 9.553732630084188e-06,
"loss": 0.8693686676025391,
"step": 18600
},
{
"epoch": 0.33020830566842835,
"grad_norm": 9.625,
"learning_rate": 9.552464753017549e-06,
"loss": 0.7420355224609375,
"step": 18650
},
{
"epoch": 0.33109358262732497,
"grad_norm": 9.75,
"learning_rate": 9.55119687595091e-06,
"loss": 0.8566489410400391,
"step": 18700
},
{
"epoch": 0.33197885958622153,
"grad_norm": 6.65625,
"learning_rate": 9.549928998884269e-06,
"loss": 0.8141315460205079,
"step": 18750
},
{
"epoch": 0.33286413654511815,
"grad_norm": 9.3125,
"learning_rate": 9.54866112181763e-06,
"loss": 0.8337672424316406,
"step": 18800
},
{
"epoch": 0.3337494135040147,
"grad_norm": 9.0625,
"learning_rate": 9.54739324475099e-06,
"loss": 0.8334447479248047,
"step": 18850
},
{
"epoch": 0.33463469046291133,
"grad_norm": 8.4375,
"learning_rate": 9.54612536768435e-06,
"loss": 0.8280233764648437,
"step": 18900
},
{
"epoch": 0.3355199674218079,
"grad_norm": 8.875,
"learning_rate": 9.54485749061771e-06,
"loss": 0.8619183349609375,
"step": 18950
},
{
"epoch": 0.3364052443807045,
"grad_norm": 7.28125,
"learning_rate": 9.543589613551071e-06,
"loss": 0.792462158203125,
"step": 19000
},
{
"epoch": 0.3372905213396011,
"grad_norm": 10.0,
"learning_rate": 9.542321736484432e-06,
"loss": 0.8707679748535156,
"step": 19050
},
{
"epoch": 0.3381757982984977,
"grad_norm": 9.4375,
"learning_rate": 9.541053859417791e-06,
"loss": 0.8130400085449219,
"step": 19100
},
{
"epoch": 0.33906107525739426,
"grad_norm": 8.5,
"learning_rate": 9.539785982351152e-06,
"loss": 0.7939989471435547,
"step": 19150
},
{
"epoch": 0.3399463522162909,
"grad_norm": 7.46875,
"learning_rate": 9.538518105284513e-06,
"loss": 0.8232540893554687,
"step": 19200
},
{
"epoch": 0.34083162917518744,
"grad_norm": 9.0,
"learning_rate": 9.537250228217872e-06,
"loss": 0.8494704437255859,
"step": 19250
},
{
"epoch": 0.34171690613408406,
"grad_norm": 7.125,
"learning_rate": 9.535982351151233e-06,
"loss": 0.8644766235351562,
"step": 19300
},
{
"epoch": 0.34260218309298063,
"grad_norm": 8.75,
"learning_rate": 9.534714474084593e-06,
"loss": 0.8599738311767579,
"step": 19350
},
{
"epoch": 0.34348746005187725,
"grad_norm": 7.75,
"learning_rate": 9.533446597017954e-06,
"loss": 0.8332124328613282,
"step": 19400
},
{
"epoch": 0.3443727370107738,
"grad_norm": 8.6875,
"learning_rate": 9.532178719951313e-06,
"loss": 0.8581776428222656,
"step": 19450
},
{
"epoch": 0.34525801396967043,
"grad_norm": 6.875,
"learning_rate": 9.530910842884674e-06,
"loss": 0.8054019927978515,
"step": 19500
},
{
"epoch": 0.346143290928567,
"grad_norm": 8.9375,
"learning_rate": 9.529642965818035e-06,
"loss": 0.833067398071289,
"step": 19550
},
{
"epoch": 0.3470285678874636,
"grad_norm": 8.6875,
"learning_rate": 9.528375088751396e-06,
"loss": 0.8120539855957031,
"step": 19600
},
{
"epoch": 0.3479138448463602,
"grad_norm": 8.6875,
"learning_rate": 9.527107211684756e-06,
"loss": 0.7809496307373047,
"step": 19650
},
{
"epoch": 0.3487991218052568,
"grad_norm": 9.4375,
"learning_rate": 9.525839334618117e-06,
"loss": 0.8102002716064454,
"step": 19700
},
{
"epoch": 0.34968439876415336,
"grad_norm": 7.9375,
"learning_rate": 9.524571457551476e-06,
"loss": 0.8498989868164063,
"step": 19750
},
{
"epoch": 0.35056967572305,
"grad_norm": 10.1875,
"learning_rate": 9.523303580484837e-06,
"loss": 0.8017991638183594,
"step": 19800
},
{
"epoch": 0.35145495268194654,
"grad_norm": 10.9375,
"learning_rate": 9.522035703418198e-06,
"loss": 0.7946707153320313,
"step": 19850
},
{
"epoch": 0.35234022964084316,
"grad_norm": 7.03125,
"learning_rate": 9.520767826351557e-06,
"loss": 0.8487144470214844,
"step": 19900
},
{
"epoch": 0.3532255065997397,
"grad_norm": 9.375,
"learning_rate": 9.519499949284918e-06,
"loss": 0.7888392639160157,
"step": 19950
},
{
"epoch": 0.35411078355863634,
"grad_norm": 7.84375,
"learning_rate": 9.518232072218279e-06,
"loss": 0.8027859497070312,
"step": 20000
},
{
"epoch": 0.35411078355863634,
"eval_cer": 16.348974799759223,
"eval_loss": 0.35486724972724915,
"eval_runtime": 393.2747,
"eval_samples_per_second": 12.714,
"eval_steps_per_second": 1.589,
"eval_wer": 32.70174900276158,
"step": 20000
},
{
"epoch": 0.3549960605175329,
"grad_norm": 9.6875,
"learning_rate": 9.51696419515164e-06,
"loss": 0.8532256317138672,
"step": 20050
},
{
"epoch": 0.3558813374764295,
"grad_norm": 10.0625,
"learning_rate": 9.515696318084999e-06,
"loss": 0.884788589477539,
"step": 20100
},
{
"epoch": 0.3567666144353261,
"grad_norm": 8.875,
"learning_rate": 9.51442844101836e-06,
"loss": 0.7734761047363281,
"step": 20150
},
{
"epoch": 0.3576518913942227,
"grad_norm": 8.125,
"learning_rate": 9.51316056395172e-06,
"loss": 0.8991136169433593,
"step": 20200
},
{
"epoch": 0.3585371683531193,
"grad_norm": 8.0625,
"learning_rate": 9.511892686885081e-06,
"loss": 0.8061054229736329,
"step": 20250
},
{
"epoch": 0.35942244531201584,
"grad_norm": 8.0,
"learning_rate": 9.51062480981844e-06,
"loss": 0.8342051696777344,
"step": 20300
},
{
"epoch": 0.36030772227091246,
"grad_norm": 6.46875,
"learning_rate": 9.509356932751801e-06,
"loss": 0.8063926696777344,
"step": 20350
},
{
"epoch": 0.361192999229809,
"grad_norm": 8.875,
"learning_rate": 9.508089055685162e-06,
"loss": 0.9006285095214843,
"step": 20400
},
{
"epoch": 0.36207827618870564,
"grad_norm": 8.0625,
"learning_rate": 9.506821178618521e-06,
"loss": 0.7908558654785156,
"step": 20450
},
{
"epoch": 0.3629635531476022,
"grad_norm": 7.0625,
"learning_rate": 9.505553301551882e-06,
"loss": 0.8398319244384765,
"step": 20500
},
{
"epoch": 0.3638488301064988,
"grad_norm": 5.8125,
"learning_rate": 9.504285424485243e-06,
"loss": 0.8094285583496094,
"step": 20550
},
{
"epoch": 0.3647341070653954,
"grad_norm": 9.1875,
"learning_rate": 9.503017547418603e-06,
"loss": 0.8495778656005859,
"step": 20600
},
{
"epoch": 0.365619384024292,
"grad_norm": 7.5,
"learning_rate": 9.501749670351963e-06,
"loss": 0.8105506134033204,
"step": 20650
},
{
"epoch": 0.36650466098318857,
"grad_norm": 8.5625,
"learning_rate": 9.500481793285325e-06,
"loss": 0.7994151306152344,
"step": 20700
},
{
"epoch": 0.3673899379420852,
"grad_norm": 8.375,
"learning_rate": 9.499213916218684e-06,
"loss": 0.8393498992919922,
"step": 20750
},
{
"epoch": 0.36827521490098175,
"grad_norm": 10.6875,
"learning_rate": 9.497946039152045e-06,
"loss": 0.82013427734375,
"step": 20800
},
{
"epoch": 0.36916049185987837,
"grad_norm": 9.5,
"learning_rate": 9.496678162085406e-06,
"loss": 0.8179158782958984,
"step": 20850
},
{
"epoch": 0.37004576881877493,
"grad_norm": 12.875,
"learning_rate": 9.495410285018767e-06,
"loss": 0.7779678344726563,
"step": 20900
},
{
"epoch": 0.37093104577767155,
"grad_norm": 7.46875,
"learning_rate": 9.494142407952126e-06,
"loss": 0.8509443664550781,
"step": 20950
},
{
"epoch": 0.3718163227365681,
"grad_norm": 8.375,
"learning_rate": 9.492874530885486e-06,
"loss": 0.7994340515136719,
"step": 21000
},
{
"epoch": 0.37270159969546474,
"grad_norm": 8.875,
"learning_rate": 9.491606653818847e-06,
"loss": 0.8334093475341797,
"step": 21050
},
{
"epoch": 0.3735868766543613,
"grad_norm": 10.3125,
"learning_rate": 9.490338776752206e-06,
"loss": 0.8529891967773438,
"step": 21100
},
{
"epoch": 0.3744721536132579,
"grad_norm": 9.125,
"learning_rate": 9.489070899685567e-06,
"loss": 0.7969075012207031,
"step": 21150
},
{
"epoch": 0.3753574305721545,
"grad_norm": 11.3125,
"learning_rate": 9.487803022618928e-06,
"loss": 0.8441764068603516,
"step": 21200
},
{
"epoch": 0.3762427075310511,
"grad_norm": 7.71875,
"learning_rate": 9.486535145552289e-06,
"loss": 0.8383098602294922,
"step": 21250
},
{
"epoch": 0.37712798448994767,
"grad_norm": 7.8125,
"learning_rate": 9.485267268485648e-06,
"loss": 0.8604541778564453,
"step": 21300
},
{
"epoch": 0.3780132614488443,
"grad_norm": 9.25,
"learning_rate": 9.483999391419009e-06,
"loss": 0.8413866424560547,
"step": 21350
},
{
"epoch": 0.37889853840774085,
"grad_norm": 9.625,
"learning_rate": 9.48273151435237e-06,
"loss": 0.7846895599365235,
"step": 21400
},
{
"epoch": 0.37978381536663747,
"grad_norm": 7.375,
"learning_rate": 9.481463637285729e-06,
"loss": 0.8002585601806641,
"step": 21450
},
{
"epoch": 0.38066909232553403,
"grad_norm": 8.0,
"learning_rate": 9.48019576021909e-06,
"loss": 0.8195709991455078,
"step": 21500
},
{
"epoch": 0.38155436928443065,
"grad_norm": 9.0625,
"learning_rate": 9.47892788315245e-06,
"loss": 0.8092010498046875,
"step": 21550
},
{
"epoch": 0.3824396462433272,
"grad_norm": 8.8125,
"learning_rate": 9.477660006085811e-06,
"loss": 0.8117552947998047,
"step": 21600
},
{
"epoch": 0.38332492320222383,
"grad_norm": 6.21875,
"learning_rate": 9.47639212901917e-06,
"loss": 0.8895623779296875,
"step": 21650
},
{
"epoch": 0.3842102001611204,
"grad_norm": 8.75,
"learning_rate": 9.475124251952531e-06,
"loss": 0.7886461639404296,
"step": 21700
},
{
"epoch": 0.385095477120017,
"grad_norm": 8.3125,
"learning_rate": 9.473856374885892e-06,
"loss": 0.802349853515625,
"step": 21750
},
{
"epoch": 0.3859807540789136,
"grad_norm": 12.0625,
"learning_rate": 9.472588497819251e-06,
"loss": 0.8550609588623047,
"step": 21800
},
{
"epoch": 0.3868660310378102,
"grad_norm": 8.125,
"learning_rate": 9.471320620752612e-06,
"loss": 0.7605663299560547,
"step": 21850
},
{
"epoch": 0.38775130799670676,
"grad_norm": 7.53125,
"learning_rate": 9.470052743685974e-06,
"loss": 0.8628280639648438,
"step": 21900
},
{
"epoch": 0.3886365849556034,
"grad_norm": 8.625,
"learning_rate": 9.468784866619333e-06,
"loss": 0.8964498138427734,
"step": 21950
},
{
"epoch": 0.38952186191449995,
"grad_norm": 10.75,
"learning_rate": 9.467516989552694e-06,
"loss": 0.7898972320556641,
"step": 22000
},
{
"epoch": 0.39040713887339656,
"grad_norm": 6.375,
"learning_rate": 9.466249112486055e-06,
"loss": 0.8010289001464844,
"step": 22050
},
{
"epoch": 0.39129241583229313,
"grad_norm": 8.5,
"learning_rate": 9.464981235419414e-06,
"loss": 0.8129051208496094,
"step": 22100
},
{
"epoch": 0.39217769279118975,
"grad_norm": 10.4375,
"learning_rate": 9.463713358352775e-06,
"loss": 0.8285366058349609,
"step": 22150
},
{
"epoch": 0.3930629697500863,
"grad_norm": 10.9375,
"learning_rate": 9.462445481286136e-06,
"loss": 0.8392569732666015,
"step": 22200
},
{
"epoch": 0.39394824670898293,
"grad_norm": 8.6875,
"learning_rate": 9.461177604219497e-06,
"loss": 0.8171870422363281,
"step": 22250
},
{
"epoch": 0.3948335236678795,
"grad_norm": 8.9375,
"learning_rate": 9.459909727152856e-06,
"loss": 0.8369279479980469,
"step": 22300
},
{
"epoch": 0.3957188006267761,
"grad_norm": 9.9375,
"learning_rate": 9.458641850086216e-06,
"loss": 0.7750814819335937,
"step": 22350
},
{
"epoch": 0.3966040775856727,
"grad_norm": 11.0625,
"learning_rate": 9.457373973019577e-06,
"loss": 0.8476492309570313,
"step": 22400
},
{
"epoch": 0.3974893545445693,
"grad_norm": 8.375,
"learning_rate": 9.456106095952936e-06,
"loss": 0.8161160278320313,
"step": 22450
},
{
"epoch": 0.39837463150346586,
"grad_norm": 8.9375,
"learning_rate": 9.454838218886297e-06,
"loss": 0.861170654296875,
"step": 22500
},
{
"epoch": 0.3992599084623624,
"grad_norm": 8.125,
"learning_rate": 9.453570341819658e-06,
"loss": 0.7938341522216796,
"step": 22550
},
{
"epoch": 0.40014518542125904,
"grad_norm": 6.8125,
"learning_rate": 9.452302464753019e-06,
"loss": 0.7918325805664063,
"step": 22600
},
{
"epoch": 0.4010304623801556,
"grad_norm": 8.5,
"learning_rate": 9.451034587686378e-06,
"loss": 0.8244574737548828,
"step": 22650
},
{
"epoch": 0.4019157393390522,
"grad_norm": 7.75,
"learning_rate": 9.449766710619739e-06,
"loss": 0.759022216796875,
"step": 22700
},
{
"epoch": 0.4028010162979488,
"grad_norm": 9.0625,
"learning_rate": 9.4484988335531e-06,
"loss": 0.7742694854736328,
"step": 22750
},
{
"epoch": 0.4036862932568454,
"grad_norm": 7.84375,
"learning_rate": 9.447230956486459e-06,
"loss": 0.8123777008056641,
"step": 22800
},
{
"epoch": 0.40457157021574197,
"grad_norm": 8.6875,
"learning_rate": 9.44596307941982e-06,
"loss": 0.7599580383300781,
"step": 22850
},
{
"epoch": 0.4054568471746386,
"grad_norm": 7.625,
"learning_rate": 9.44469520235318e-06,
"loss": 0.7971720123291015,
"step": 22900
},
{
"epoch": 0.40634212413353515,
"grad_norm": 9.625,
"learning_rate": 9.443427325286541e-06,
"loss": 0.7970821380615234,
"step": 22950
},
{
"epoch": 0.4072274010924318,
"grad_norm": 8.6875,
"learning_rate": 9.4421594482199e-06,
"loss": 0.8227862548828125,
"step": 23000
},
{
"epoch": 0.40811267805132834,
"grad_norm": 8.25,
"learning_rate": 9.440891571153263e-06,
"loss": 0.8271920013427735,
"step": 23050
},
{
"epoch": 0.40899795501022496,
"grad_norm": 8.5,
"learning_rate": 9.439623694086622e-06,
"loss": 0.83154541015625,
"step": 23100
},
{
"epoch": 0.4098832319691215,
"grad_norm": 8.6875,
"learning_rate": 9.438355817019983e-06,
"loss": 0.8107095336914063,
"step": 23150
},
{
"epoch": 0.41076850892801814,
"grad_norm": 8.9375,
"learning_rate": 9.437087939953343e-06,
"loss": 0.8437236785888672,
"step": 23200
},
{
"epoch": 0.4116537858869147,
"grad_norm": 8.5625,
"learning_rate": 9.435820062886704e-06,
"loss": 0.8471569061279297,
"step": 23250
},
{
"epoch": 0.4125390628458113,
"grad_norm": 7.875,
"learning_rate": 9.434552185820063e-06,
"loss": 0.885667724609375,
"step": 23300
},
{
"epoch": 0.4134243398047079,
"grad_norm": 8.3125,
"learning_rate": 9.433284308753424e-06,
"loss": 0.8514400482177734,
"step": 23350
},
{
"epoch": 0.4143096167636045,
"grad_norm": 10.1875,
"learning_rate": 9.432016431686785e-06,
"loss": 0.7581684112548828,
"step": 23400
},
{
"epoch": 0.41519489372250107,
"grad_norm": 10.0,
"learning_rate": 9.430748554620146e-06,
"loss": 0.824618911743164,
"step": 23450
},
{
"epoch": 0.4160801706813977,
"grad_norm": 7.59375,
"learning_rate": 9.429480677553505e-06,
"loss": 0.8305864715576172,
"step": 23500
},
{
"epoch": 0.41696544764029425,
"grad_norm": 9.1875,
"learning_rate": 9.428212800486866e-06,
"loss": 0.8161003875732422,
"step": 23550
},
{
"epoch": 0.41785072459919087,
"grad_norm": 12.0,
"learning_rate": 9.426944923420227e-06,
"loss": 0.8694761657714843,
"step": 23600
},
{
"epoch": 0.41873600155808743,
"grad_norm": 9.4375,
"learning_rate": 9.425677046353586e-06,
"loss": 0.8649164581298828,
"step": 23650
},
{
"epoch": 0.41962127851698405,
"grad_norm": 7.9375,
"learning_rate": 9.424409169286946e-06,
"loss": 0.8016709899902343,
"step": 23700
},
{
"epoch": 0.4205065554758806,
"grad_norm": 6.625,
"learning_rate": 9.423141292220307e-06,
"loss": 0.8073011016845704,
"step": 23750
},
{
"epoch": 0.42139183243477724,
"grad_norm": 8.5,
"learning_rate": 9.421873415153668e-06,
"loss": 0.8519166564941406,
"step": 23800
},
{
"epoch": 0.4222771093936738,
"grad_norm": 8.6875,
"learning_rate": 9.420605538087027e-06,
"loss": 0.7963951110839844,
"step": 23850
},
{
"epoch": 0.4231623863525704,
"grad_norm": 9.4375,
"learning_rate": 9.419337661020388e-06,
"loss": 0.8319783020019531,
"step": 23900
},
{
"epoch": 0.424047663311467,
"grad_norm": 8.5,
"learning_rate": 9.418069783953749e-06,
"loss": 0.8024713134765625,
"step": 23950
},
{
"epoch": 0.4249329402703636,
"grad_norm": 6.5,
"learning_rate": 9.416801906887108e-06,
"loss": 0.8116042327880859,
"step": 24000
},
{
"epoch": 0.42581821722926017,
"grad_norm": 5.96875,
"learning_rate": 9.415534029820469e-06,
"loss": 0.8232134246826172,
"step": 24050
},
{
"epoch": 0.4267034941881568,
"grad_norm": 9.5625,
"learning_rate": 9.41426615275383e-06,
"loss": 0.8045470428466797,
"step": 24100
},
{
"epoch": 0.42758877114705335,
"grad_norm": 8.5625,
"learning_rate": 9.41299827568719e-06,
"loss": 0.8654727935791016,
"step": 24150
},
{
"epoch": 0.42847404810594997,
"grad_norm": 7.5625,
"learning_rate": 9.411730398620551e-06,
"loss": 0.8151998138427734,
"step": 24200
},
{
"epoch": 0.42935932506484653,
"grad_norm": 7.1875,
"learning_rate": 9.410462521553912e-06,
"loss": 0.8104602813720703,
"step": 24250
},
{
"epoch": 0.43024460202374315,
"grad_norm": 8.625,
"learning_rate": 9.409194644487271e-06,
"loss": 0.8054197692871093,
"step": 24300
},
{
"epoch": 0.4311298789826397,
"grad_norm": 7.40625,
"learning_rate": 9.407926767420632e-06,
"loss": 0.8097126770019532,
"step": 24350
},
{
"epoch": 0.43201515594153633,
"grad_norm": 8.3125,
"learning_rate": 9.406658890353993e-06,
"loss": 0.7878807067871094,
"step": 24400
},
{
"epoch": 0.4329004329004329,
"grad_norm": 9.4375,
"learning_rate": 9.405391013287353e-06,
"loss": 0.8757616424560547,
"step": 24450
},
{
"epoch": 0.4337857098593295,
"grad_norm": 7.53125,
"learning_rate": 9.404123136220713e-06,
"loss": 0.7997799682617187,
"step": 24500
},
{
"epoch": 0.4346709868182261,
"grad_norm": 11.0625,
"learning_rate": 9.402855259154073e-06,
"loss": 0.7962652587890625,
"step": 24550
},
{
"epoch": 0.4355562637771227,
"grad_norm": 8.25,
"learning_rate": 9.401587382087434e-06,
"loss": 0.7983963775634766,
"step": 24600
},
{
"epoch": 0.43644154073601926,
"grad_norm": 6.34375,
"learning_rate": 9.400319505020793e-06,
"loss": 0.8001494598388672,
"step": 24650
},
{
"epoch": 0.4373268176949159,
"grad_norm": 9.5,
"learning_rate": 9.399051627954154e-06,
"loss": 0.7819596099853515,
"step": 24700
},
{
"epoch": 0.43821209465381245,
"grad_norm": 8.0625,
"learning_rate": 9.397783750887515e-06,
"loss": 0.8707284545898437,
"step": 24750
},
{
"epoch": 0.439097371612709,
"grad_norm": 10.3125,
"learning_rate": 9.396515873820876e-06,
"loss": 0.805533447265625,
"step": 24800
},
{
"epoch": 0.43998264857160563,
"grad_norm": 6.46875,
"learning_rate": 9.395247996754235e-06,
"loss": 0.8091240692138671,
"step": 24850
},
{
"epoch": 0.4408679255305022,
"grad_norm": 6.5625,
"learning_rate": 9.393980119687596e-06,
"loss": 0.7720470428466797,
"step": 24900
},
{
"epoch": 0.4417532024893988,
"grad_norm": 7.71875,
"learning_rate": 9.392712242620956e-06,
"loss": 0.7984862518310547,
"step": 24950
},
{
"epoch": 0.4426384794482954,
"grad_norm": 10.1875,
"learning_rate": 9.391444365554316e-06,
"loss": 0.85141845703125,
"step": 25000
},
{
"epoch": 0.4426384794482954,
"eval_cer": 15.512762807546515,
"eval_loss": 0.3506615161895752,
"eval_runtime": 388.5865,
"eval_samples_per_second": 12.867,
"eval_steps_per_second": 1.608,
"eval_wer": 31.382325866830318,
"step": 25000
},
{
"epoch": 0.443523756407192,
"grad_norm": 8.1875,
"learning_rate": 9.390176488487676e-06,
"loss": 0.799303207397461,
"step": 25050
},
{
"epoch": 0.44440903336608856,
"grad_norm": 7.5625,
"learning_rate": 9.388908611421037e-06,
"loss": 0.7852619934082031,
"step": 25100
},
{
"epoch": 0.4452943103249852,
"grad_norm": 9.125,
"learning_rate": 9.387640734354398e-06,
"loss": 0.7992433166503906,
"step": 25150
},
{
"epoch": 0.44617958728388174,
"grad_norm": 7.5625,
"learning_rate": 9.386372857287757e-06,
"loss": 0.8536985015869141,
"step": 25200
},
{
"epoch": 0.44706486424277836,
"grad_norm": 8.375,
"learning_rate": 9.385104980221118e-06,
"loss": 0.7951334381103515,
"step": 25250
},
{
"epoch": 0.4479501412016749,
"grad_norm": 6.15625,
"learning_rate": 9.383837103154479e-06,
"loss": 0.8157552337646484,
"step": 25300
},
{
"epoch": 0.44883541816057154,
"grad_norm": 7.875,
"learning_rate": 9.38256922608784e-06,
"loss": 0.8191168212890625,
"step": 25350
},
{
"epoch": 0.4497206951194681,
"grad_norm": 10.5625,
"learning_rate": 9.3813013490212e-06,
"loss": 0.7932091522216796,
"step": 25400
},
{
"epoch": 0.4506059720783647,
"grad_norm": 7.0625,
"learning_rate": 9.380033471954561e-06,
"loss": 0.8196167755126953,
"step": 25450
},
{
"epoch": 0.4514912490372613,
"grad_norm": 7.96875,
"learning_rate": 9.37876559488792e-06,
"loss": 0.80684326171875,
"step": 25500
},
{
"epoch": 0.4523765259961579,
"grad_norm": 8.375,
"learning_rate": 9.377497717821281e-06,
"loss": 0.8326478576660157,
"step": 25550
},
{
"epoch": 0.45326180295505447,
"grad_norm": 10.5625,
"learning_rate": 9.376229840754642e-06,
"loss": 0.8006377410888672,
"step": 25600
},
{
"epoch": 0.4541470799139511,
"grad_norm": 8.8125,
"learning_rate": 9.374961963688001e-06,
"loss": 0.8118048858642578,
"step": 25650
},
{
"epoch": 0.45503235687284765,
"grad_norm": 6.75,
"learning_rate": 9.373694086621362e-06,
"loss": 0.8060718536376953,
"step": 25700
},
{
"epoch": 0.4559176338317443,
"grad_norm": 10.0625,
"learning_rate": 9.372426209554723e-06,
"loss": 0.7967828369140625,
"step": 25750
},
{
"epoch": 0.45680291079064084,
"grad_norm": 9.3125,
"learning_rate": 9.371158332488083e-06,
"loss": 0.8676046752929687,
"step": 25800
},
{
"epoch": 0.45768818774953746,
"grad_norm": 6.5625,
"learning_rate": 9.369890455421443e-06,
"loss": 0.83003662109375,
"step": 25850
},
{
"epoch": 0.458573464708434,
"grad_norm": 7.84375,
"learning_rate": 9.368622578354803e-06,
"loss": 0.7804559326171875,
"step": 25900
},
{
"epoch": 0.45945874166733064,
"grad_norm": 10.0625,
"learning_rate": 9.367354701288164e-06,
"loss": 0.8414221954345703,
"step": 25950
},
{
"epoch": 0.4603440186262272,
"grad_norm": 10.0625,
"learning_rate": 9.366086824221523e-06,
"loss": 0.8436249542236328,
"step": 26000
},
{
"epoch": 0.4612292955851238,
"grad_norm": 9.75,
"learning_rate": 9.364818947154884e-06,
"loss": 0.8198944091796875,
"step": 26050
},
{
"epoch": 0.4621145725440204,
"grad_norm": 7.21875,
"learning_rate": 9.363551070088245e-06,
"loss": 0.822802505493164,
"step": 26100
},
{
"epoch": 0.462999849502917,
"grad_norm": 9.5,
"learning_rate": 9.362283193021606e-06,
"loss": 0.8616585540771484,
"step": 26150
},
{
"epoch": 0.46388512646181357,
"grad_norm": 10.5625,
"learning_rate": 9.361015315954965e-06,
"loss": 0.874574203491211,
"step": 26200
},
{
"epoch": 0.4647704034207102,
"grad_norm": 8.25,
"learning_rate": 9.359747438888326e-06,
"loss": 0.8157247161865234,
"step": 26250
},
{
"epoch": 0.46565568037960675,
"grad_norm": 9.5,
"learning_rate": 9.358479561821686e-06,
"loss": 0.8261668395996093,
"step": 26300
},
{
"epoch": 0.46654095733850337,
"grad_norm": 9.25,
"learning_rate": 9.357211684755047e-06,
"loss": 0.7921039581298828,
"step": 26350
},
{
"epoch": 0.46742623429739993,
"grad_norm": 8.125,
"learning_rate": 9.355943807688406e-06,
"loss": 0.8063948059082031,
"step": 26400
},
{
"epoch": 0.46831151125629655,
"grad_norm": 8.4375,
"learning_rate": 9.354675930621767e-06,
"loss": 0.8948915100097656,
"step": 26450
},
{
"epoch": 0.4691967882151931,
"grad_norm": 8.375,
"learning_rate": 9.353408053555128e-06,
"loss": 0.8333416748046875,
"step": 26500
},
{
"epoch": 0.47008206517408974,
"grad_norm": 8.6875,
"learning_rate": 9.352140176488489e-06,
"loss": 0.8180615997314453,
"step": 26550
},
{
"epoch": 0.4709673421329863,
"grad_norm": 8.3125,
"learning_rate": 9.35087229942185e-06,
"loss": 0.8063065338134766,
"step": 26600
},
{
"epoch": 0.4718526190918829,
"grad_norm": 8.5625,
"learning_rate": 9.34960442235521e-06,
"loss": 0.7838368225097656,
"step": 26650
},
{
"epoch": 0.4727378960507795,
"grad_norm": 9.5,
"learning_rate": 9.34833654528857e-06,
"loss": 0.7889875030517578,
"step": 26700
},
{
"epoch": 0.4736231730096761,
"grad_norm": 8.875,
"learning_rate": 9.34706866822193e-06,
"loss": 0.8149432373046875,
"step": 26750
},
{
"epoch": 0.47450844996857267,
"grad_norm": 7.59375,
"learning_rate": 9.345800791155291e-06,
"loss": 0.8343911743164063,
"step": 26800
},
{
"epoch": 0.4753937269274693,
"grad_norm": 6.09375,
"learning_rate": 9.34453291408865e-06,
"loss": 0.8231613159179687,
"step": 26850
},
{
"epoch": 0.47627900388636585,
"grad_norm": 8.875,
"learning_rate": 9.343265037022011e-06,
"loss": 0.8221437835693359,
"step": 26900
},
{
"epoch": 0.47716428084526247,
"grad_norm": 7.90625,
"learning_rate": 9.341997159955372e-06,
"loss": 0.8173371124267578,
"step": 26950
},
{
"epoch": 0.47804955780415903,
"grad_norm": 8.25,
"learning_rate": 9.340729282888733e-06,
"loss": 0.8692239379882812,
"step": 27000
},
{
"epoch": 0.4789348347630556,
"grad_norm": 6.40625,
"learning_rate": 9.339461405822092e-06,
"loss": 0.7678931427001953,
"step": 27050
},
{
"epoch": 0.4798201117219522,
"grad_norm": 9.0625,
"learning_rate": 9.338193528755453e-06,
"loss": 0.8847496032714843,
"step": 27100
},
{
"epoch": 0.4807053886808488,
"grad_norm": 7.96875,
"learning_rate": 9.336925651688813e-06,
"loss": 0.8440608978271484,
"step": 27150
},
{
"epoch": 0.4815906656397454,
"grad_norm": 7.34375,
"learning_rate": 9.335657774622173e-06,
"loss": 0.7591238403320313,
"step": 27200
},
{
"epoch": 0.48247594259864196,
"grad_norm": 7.4375,
"learning_rate": 9.334389897555533e-06,
"loss": 0.8198709106445312,
"step": 27250
},
{
"epoch": 0.4833612195575386,
"grad_norm": 8.9375,
"learning_rate": 9.333122020488894e-06,
"loss": 0.7931713104248047,
"step": 27300
},
{
"epoch": 0.48424649651643514,
"grad_norm": 9.25,
"learning_rate": 9.331854143422255e-06,
"loss": 0.8829562377929687,
"step": 27350
},
{
"epoch": 0.48513177347533176,
"grad_norm": 8.3125,
"learning_rate": 9.330586266355614e-06,
"loss": 0.857093734741211,
"step": 27400
},
{
"epoch": 0.4860170504342283,
"grad_norm": 8.25,
"learning_rate": 9.329318389288975e-06,
"loss": 0.8017523193359375,
"step": 27450
},
{
"epoch": 0.48690232739312495,
"grad_norm": 9.9375,
"learning_rate": 9.328050512222336e-06,
"loss": 0.8276552581787109,
"step": 27500
},
{
"epoch": 0.4877876043520215,
"grad_norm": 8.125,
"learning_rate": 9.326782635155695e-06,
"loss": 0.7625294494628906,
"step": 27550
},
{
"epoch": 0.48867288131091813,
"grad_norm": 6.4375,
"learning_rate": 9.325514758089056e-06,
"loss": 0.8343047332763672,
"step": 27600
},
{
"epoch": 0.4895581582698147,
"grad_norm": 6.59375,
"learning_rate": 9.324246881022418e-06,
"loss": 0.8407593536376953,
"step": 27650
},
{
"epoch": 0.4904434352287113,
"grad_norm": 9.75,
"learning_rate": 9.322979003955777e-06,
"loss": 0.7998301696777343,
"step": 27700
},
{
"epoch": 0.4913287121876079,
"grad_norm": 9.125,
"learning_rate": 9.321711126889138e-06,
"loss": 0.8573383331298828,
"step": 27750
},
{
"epoch": 0.4922139891465045,
"grad_norm": 10.875,
"learning_rate": 9.320443249822499e-06,
"loss": 0.8192384338378906,
"step": 27800
},
{
"epoch": 0.49309926610540106,
"grad_norm": 8.8125,
"learning_rate": 9.319175372755858e-06,
"loss": 0.7779326629638672,
"step": 27850
},
{
"epoch": 0.4939845430642977,
"grad_norm": 7.90625,
"learning_rate": 9.317907495689219e-06,
"loss": 0.8377117156982422,
"step": 27900
},
{
"epoch": 0.49486982002319424,
"grad_norm": 6.9375,
"learning_rate": 9.31663961862258e-06,
"loss": 0.8032022094726563,
"step": 27950
},
{
"epoch": 0.49575509698209086,
"grad_norm": 9.4375,
"learning_rate": 9.31537174155594e-06,
"loss": 0.8107691955566406,
"step": 28000
},
{
"epoch": 0.4966403739409874,
"grad_norm": 7.75,
"learning_rate": 9.3141038644893e-06,
"loss": 0.8325393676757813,
"step": 28050
},
{
"epoch": 0.49752565089988404,
"grad_norm": 9.625,
"learning_rate": 9.31283598742266e-06,
"loss": 0.80835693359375,
"step": 28100
},
{
"epoch": 0.4984109278587806,
"grad_norm": 9.0625,
"learning_rate": 9.311568110356021e-06,
"loss": 0.8398920440673828,
"step": 28150
},
{
"epoch": 0.4992962048176772,
"grad_norm": 9.5,
"learning_rate": 9.31030023328938e-06,
"loss": 0.8618205261230468,
"step": 28200
},
{
"epoch": 0.5001814817765738,
"grad_norm": 7.3125,
"learning_rate": 9.309032356222741e-06,
"loss": 0.7820648956298828,
"step": 28250
},
{
"epoch": 0.5010667587354704,
"grad_norm": 5.96875,
"learning_rate": 9.307764479156102e-06,
"loss": 0.8232012176513672,
"step": 28300
},
{
"epoch": 0.501952035694367,
"grad_norm": 7.15625,
"learning_rate": 9.306496602089463e-06,
"loss": 0.7935179138183593,
"step": 28350
},
{
"epoch": 0.5028373126532636,
"grad_norm": 7.75,
"learning_rate": 9.305228725022822e-06,
"loss": 0.812857666015625,
"step": 28400
},
{
"epoch": 0.5037225896121602,
"grad_norm": 6.75,
"learning_rate": 9.303960847956183e-06,
"loss": 0.7825227355957032,
"step": 28450
},
{
"epoch": 0.5046078665710567,
"grad_norm": 6.875,
"learning_rate": 9.302692970889543e-06,
"loss": 0.8190470886230469,
"step": 28500
},
{
"epoch": 0.5054931435299533,
"grad_norm": 6.28125,
"learning_rate": 9.301425093822903e-06,
"loss": 0.855802993774414,
"step": 28550
},
{
"epoch": 0.50637842048885,
"grad_norm": 10.5625,
"learning_rate": 9.300157216756263e-06,
"loss": 0.848614501953125,
"step": 28600
},
{
"epoch": 0.5072636974477466,
"grad_norm": 8.125,
"learning_rate": 9.298889339689624e-06,
"loss": 0.7645280456542969,
"step": 28650
},
{
"epoch": 0.5081489744066431,
"grad_norm": 9.1875,
"learning_rate": 9.297621462622985e-06,
"loss": 0.8297785949707032,
"step": 28700
},
{
"epoch": 0.5090342513655397,
"grad_norm": 6.40625,
"learning_rate": 9.296353585556344e-06,
"loss": 0.8629362487792969,
"step": 28750
},
{
"epoch": 0.5099195283244363,
"grad_norm": 9.375,
"learning_rate": 9.295085708489707e-06,
"loss": 0.8396317291259766,
"step": 28800
},
{
"epoch": 0.5108048052833329,
"grad_norm": 9.9375,
"learning_rate": 9.293817831423066e-06,
"loss": 0.8256559753417969,
"step": 28850
},
{
"epoch": 0.5116900822422294,
"grad_norm": 8.9375,
"learning_rate": 9.292549954356427e-06,
"loss": 0.8319975280761719,
"step": 28900
},
{
"epoch": 0.5125753592011261,
"grad_norm": 8.0,
"learning_rate": 9.291282077289787e-06,
"loss": 0.7743982696533203,
"step": 28950
},
{
"epoch": 0.5134606361600227,
"grad_norm": 7.21875,
"learning_rate": 9.290014200223148e-06,
"loss": 0.8560353088378906,
"step": 29000
},
{
"epoch": 0.5143459131189193,
"grad_norm": 9.25,
"learning_rate": 9.288746323156507e-06,
"loss": 0.872972640991211,
"step": 29050
},
{
"epoch": 0.5152311900778158,
"grad_norm": 9.0625,
"learning_rate": 9.287478446089868e-06,
"loss": 0.8381455230712891,
"step": 29100
},
{
"epoch": 0.5161164670367124,
"grad_norm": 9.3125,
"learning_rate": 9.286210569023229e-06,
"loss": 0.7603321838378906,
"step": 29150
},
{
"epoch": 0.517001743995609,
"grad_norm": 7.28125,
"learning_rate": 9.284942691956588e-06,
"loss": 0.8029882049560547,
"step": 29200
},
{
"epoch": 0.5178870209545057,
"grad_norm": 8.5625,
"learning_rate": 9.283674814889949e-06,
"loss": 0.8445874786376953,
"step": 29250
},
{
"epoch": 0.5187722979134022,
"grad_norm": 7.21875,
"learning_rate": 9.28240693782331e-06,
"loss": 0.806130142211914,
"step": 29300
},
{
"epoch": 0.5196575748722988,
"grad_norm": 8.625,
"learning_rate": 9.28113906075667e-06,
"loss": 0.8128162384033203,
"step": 29350
},
{
"epoch": 0.5205428518311954,
"grad_norm": 9.3125,
"learning_rate": 9.27987118369003e-06,
"loss": 0.8138497161865235,
"step": 29400
},
{
"epoch": 0.5214281287900919,
"grad_norm": 8.875,
"learning_rate": 9.27860330662339e-06,
"loss": 0.7841197967529296,
"step": 29450
},
{
"epoch": 0.5223134057489885,
"grad_norm": 7.46875,
"learning_rate": 9.277335429556751e-06,
"loss": 0.8085577392578125,
"step": 29500
},
{
"epoch": 0.5231986827078852,
"grad_norm": 10.1875,
"learning_rate": 9.276067552490112e-06,
"loss": 0.8392274475097656,
"step": 29550
},
{
"epoch": 0.5240839596667818,
"grad_norm": 8.1875,
"learning_rate": 9.274799675423471e-06,
"loss": 0.8423346710205079,
"step": 29600
},
{
"epoch": 0.5249692366256783,
"grad_norm": 8.9375,
"learning_rate": 9.273531798356832e-06,
"loss": 0.7886857604980468,
"step": 29650
},
{
"epoch": 0.5258545135845749,
"grad_norm": 9.5,
"learning_rate": 9.272263921290193e-06,
"loss": 0.8614559936523437,
"step": 29700
},
{
"epoch": 0.5267397905434715,
"grad_norm": 8.6875,
"learning_rate": 9.270996044223552e-06,
"loss": 0.7886000061035157,
"step": 29750
},
{
"epoch": 0.5276250675023682,
"grad_norm": 9.0,
"learning_rate": 9.269728167156913e-06,
"loss": 0.8309735870361328,
"step": 29800
},
{
"epoch": 0.5285103444612647,
"grad_norm": 10.25,
"learning_rate": 9.268460290090273e-06,
"loss": 0.8716236877441407,
"step": 29850
},
{
"epoch": 0.5293956214201613,
"grad_norm": 8.625,
"learning_rate": 9.267192413023634e-06,
"loss": 0.8325408935546875,
"step": 29900
},
{
"epoch": 0.5302808983790579,
"grad_norm": 6.96875,
"learning_rate": 9.265924535956993e-06,
"loss": 0.7999062347412109,
"step": 29950
},
{
"epoch": 0.5311661753379545,
"grad_norm": 8.3125,
"learning_rate": 9.264656658890356e-06,
"loss": 0.8242444610595703,
"step": 30000
},
{
"epoch": 0.5311661753379545,
"eval_cer": 14.722645510105584,
"eval_loss": 0.3480595052242279,
"eval_runtime": 379.8738,
"eval_samples_per_second": 13.162,
"eval_steps_per_second": 1.645,
"eval_wer": 30.56407896082643,
"step": 30000
},
{
"epoch": 0.532051452296851,
"grad_norm": 8.6875,
"learning_rate": 9.263388781823715e-06,
"loss": 0.9031015777587891,
"step": 30050
},
{
"epoch": 0.5329367292557476,
"grad_norm": 13.3125,
"learning_rate": 9.262120904757076e-06,
"loss": 0.8271858978271485,
"step": 30100
},
{
"epoch": 0.5338220062146443,
"grad_norm": 7.3125,
"learning_rate": 9.260853027690437e-06,
"loss": 0.7925968170166016,
"step": 30150
},
{
"epoch": 0.5347072831735409,
"grad_norm": 10.25,
"learning_rate": 9.259585150623797e-06,
"loss": 0.8345429229736329,
"step": 30200
},
{
"epoch": 0.5355925601324374,
"grad_norm": 9.625,
"learning_rate": 9.258317273557156e-06,
"loss": 0.8198999786376953,
"step": 30250
},
{
"epoch": 0.536477837091334,
"grad_norm": 8.0,
"learning_rate": 9.257049396490517e-06,
"loss": 0.8118746185302734,
"step": 30300
},
{
"epoch": 0.5373631140502306,
"grad_norm": 9.125,
"learning_rate": 9.255781519423878e-06,
"loss": 0.7819632720947266,
"step": 30350
},
{
"epoch": 0.5382483910091272,
"grad_norm": 8.125,
"learning_rate": 9.254513642357237e-06,
"loss": 0.84641845703125,
"step": 30400
},
{
"epoch": 0.5391336679680238,
"grad_norm": 8.5625,
"learning_rate": 9.253245765290598e-06,
"loss": 0.8433683776855468,
"step": 30450
},
{
"epoch": 0.5400189449269204,
"grad_norm": 9.375,
"learning_rate": 9.251977888223959e-06,
"loss": 0.7750820922851562,
"step": 30500
},
{
"epoch": 0.540904221885817,
"grad_norm": 8.75,
"learning_rate": 9.25071001115732e-06,
"loss": 0.8537428283691406,
"step": 30550
},
{
"epoch": 0.5417894988447136,
"grad_norm": 8.6875,
"learning_rate": 9.249442134090679e-06,
"loss": 0.8174424743652344,
"step": 30600
},
{
"epoch": 0.5426747758036101,
"grad_norm": 7.28125,
"learning_rate": 9.24817425702404e-06,
"loss": 0.7803053283691406,
"step": 30650
},
{
"epoch": 0.5435600527625067,
"grad_norm": 8.875,
"learning_rate": 9.2469063799574e-06,
"loss": 0.8024676513671875,
"step": 30700
},
{
"epoch": 0.5444453297214034,
"grad_norm": 8.125,
"learning_rate": 9.24563850289076e-06,
"loss": 0.830534439086914,
"step": 30750
},
{
"epoch": 0.5453306066803,
"grad_norm": 8.25,
"learning_rate": 9.24437062582412e-06,
"loss": 0.8581230926513672,
"step": 30800
},
{
"epoch": 0.5462158836391965,
"grad_norm": 9.25,
"learning_rate": 9.243102748757481e-06,
"loss": 0.8423960876464843,
"step": 30850
},
{
"epoch": 0.5471011605980931,
"grad_norm": 6.78125,
"learning_rate": 9.241834871690842e-06,
"loss": 0.8226587677001953,
"step": 30900
},
{
"epoch": 0.5479864375569897,
"grad_norm": 9.6875,
"learning_rate": 9.240566994624201e-06,
"loss": 0.7333302307128906,
"step": 30950
},
{
"epoch": 0.5488717145158863,
"grad_norm": 9.4375,
"learning_rate": 9.239299117557562e-06,
"loss": 0.8279609680175781,
"step": 31000
},
{
"epoch": 0.5497569914747829,
"grad_norm": 10.3125,
"learning_rate": 9.238031240490923e-06,
"loss": 0.8205570983886719,
"step": 31050
},
{
"epoch": 0.5506422684336795,
"grad_norm": 10.5625,
"learning_rate": 9.236763363424282e-06,
"loss": 0.7977956390380859,
"step": 31100
},
{
"epoch": 0.5515275453925761,
"grad_norm": 9.4375,
"learning_rate": 9.235495486357644e-06,
"loss": 0.7922000885009766,
"step": 31150
},
{
"epoch": 0.5524128223514727,
"grad_norm": 8.1875,
"learning_rate": 9.234227609291005e-06,
"loss": 0.8065330505371093,
"step": 31200
},
{
"epoch": 0.5532980993103692,
"grad_norm": 6.5625,
"learning_rate": 9.232959732224364e-06,
"loss": 0.766522445678711,
"step": 31250
},
{
"epoch": 0.5541833762692658,
"grad_norm": 8.4375,
"learning_rate": 9.231691855157725e-06,
"loss": 0.7584080505371094,
"step": 31300
},
{
"epoch": 0.5550686532281625,
"grad_norm": 7.8125,
"learning_rate": 9.230423978091086e-06,
"loss": 0.835227279663086,
"step": 31350
},
{
"epoch": 0.5559539301870591,
"grad_norm": 7.71875,
"learning_rate": 9.229156101024445e-06,
"loss": 0.8134999084472656,
"step": 31400
},
{
"epoch": 0.5568392071459556,
"grad_norm": 8.625,
"learning_rate": 9.227888223957806e-06,
"loss": 0.8568966674804688,
"step": 31450
},
{
"epoch": 0.5577244841048522,
"grad_norm": 9.1875,
"learning_rate": 9.226620346891167e-06,
"loss": 0.8286125946044922,
"step": 31500
},
{
"epoch": 0.5586097610637488,
"grad_norm": 8.1875,
"learning_rate": 9.225352469824527e-06,
"loss": 0.806230697631836,
"step": 31550
},
{
"epoch": 0.5594950380226453,
"grad_norm": 7.0625,
"learning_rate": 9.224084592757886e-06,
"loss": 0.8486292266845703,
"step": 31600
},
{
"epoch": 0.560380314981542,
"grad_norm": 9.875,
"learning_rate": 9.222816715691247e-06,
"loss": 0.8534250640869141,
"step": 31650
},
{
"epoch": 0.5612655919404386,
"grad_norm": 9.0,
"learning_rate": 9.221548838624608e-06,
"loss": 0.7652369689941406,
"step": 31700
},
{
"epoch": 0.5621508688993352,
"grad_norm": 9.75,
"learning_rate": 9.220280961557967e-06,
"loss": 0.8141221618652343,
"step": 31750
},
{
"epoch": 0.5630361458582317,
"grad_norm": 7.75,
"learning_rate": 9.219013084491328e-06,
"loss": 0.7593769073486328,
"step": 31800
},
{
"epoch": 0.5639214228171283,
"grad_norm": 9.5625,
"learning_rate": 9.217745207424689e-06,
"loss": 0.89138671875,
"step": 31850
},
{
"epoch": 0.5648066997760249,
"grad_norm": 8.1875,
"learning_rate": 9.21647733035805e-06,
"loss": 0.7844825744628906,
"step": 31900
},
{
"epoch": 0.5656919767349216,
"grad_norm": 7.9375,
"learning_rate": 9.215209453291409e-06,
"loss": 0.7966393280029297,
"step": 31950
},
{
"epoch": 0.5665772536938181,
"grad_norm": 7.71875,
"learning_rate": 9.21394157622477e-06,
"loss": 0.7827631378173828,
"step": 32000
},
{
"epoch": 0.5674625306527147,
"grad_norm": 9.0,
"learning_rate": 9.21267369915813e-06,
"loss": 0.8383135986328125,
"step": 32050
},
{
"epoch": 0.5683478076116113,
"grad_norm": 8.3125,
"learning_rate": 9.211405822091491e-06,
"loss": 0.8304233551025391,
"step": 32100
},
{
"epoch": 0.5692330845705079,
"grad_norm": 9.5,
"learning_rate": 9.21013794502485e-06,
"loss": 0.8089006042480469,
"step": 32150
},
{
"epoch": 0.5701183615294044,
"grad_norm": 6.40625,
"learning_rate": 9.208870067958211e-06,
"loss": 0.8643374633789063,
"step": 32200
},
{
"epoch": 0.571003638488301,
"grad_norm": 8.5,
"learning_rate": 9.207602190891572e-06,
"loss": 0.87065185546875,
"step": 32250
},
{
"epoch": 0.5718889154471977,
"grad_norm": 8.375,
"learning_rate": 9.206334313824933e-06,
"loss": 0.8011983489990234,
"step": 32300
},
{
"epoch": 0.5727741924060943,
"grad_norm": 9.5,
"learning_rate": 9.205066436758294e-06,
"loss": 0.8257938385009765,
"step": 32350
},
{
"epoch": 0.5736594693649908,
"grad_norm": 9.25,
"learning_rate": 9.203798559691654e-06,
"loss": 0.7791096496582032,
"step": 32400
},
{
"epoch": 0.5745447463238874,
"grad_norm": 9.6875,
"learning_rate": 9.202530682625013e-06,
"loss": 0.8195112609863281,
"step": 32450
},
{
"epoch": 0.575430023282784,
"grad_norm": 8.25,
"learning_rate": 9.201262805558374e-06,
"loss": 0.8126992797851562,
"step": 32500
},
{
"epoch": 0.5763153002416807,
"grad_norm": 6.9375,
"learning_rate": 9.199994928491735e-06,
"loss": 0.8560022735595703,
"step": 32550
},
{
"epoch": 0.5772005772005772,
"grad_norm": 8.6875,
"learning_rate": 9.198727051425094e-06,
"loss": 0.8273929595947266,
"step": 32600
},
{
"epoch": 0.5780858541594738,
"grad_norm": 9.25,
"learning_rate": 9.197459174358455e-06,
"loss": 0.8422256469726562,
"step": 32650
},
{
"epoch": 0.5789711311183704,
"grad_norm": 7.625,
"learning_rate": 9.196191297291816e-06,
"loss": 0.7996526336669922,
"step": 32700
},
{
"epoch": 0.579856408077267,
"grad_norm": 9.125,
"learning_rate": 9.194923420225177e-06,
"loss": 0.8200563812255859,
"step": 32750
},
{
"epoch": 0.5807416850361635,
"grad_norm": 8.4375,
"learning_rate": 9.193655543158536e-06,
"loss": 0.7688130187988281,
"step": 32800
},
{
"epoch": 0.5816269619950601,
"grad_norm": 7.625,
"learning_rate": 9.192387666091897e-06,
"loss": 0.8218675231933594,
"step": 32850
},
{
"epoch": 0.5825122389539568,
"grad_norm": 10.5625,
"learning_rate": 9.191119789025257e-06,
"loss": 0.8125396728515625,
"step": 32900
},
{
"epoch": 0.5833975159128534,
"grad_norm": 9.0,
"learning_rate": 9.189851911958616e-06,
"loss": 0.7877046966552734,
"step": 32950
},
{
"epoch": 0.5842827928717499,
"grad_norm": 10.5,
"learning_rate": 9.188584034891977e-06,
"loss": 0.8003798675537109,
"step": 33000
},
{
"epoch": 0.5851680698306465,
"grad_norm": 10.1875,
"learning_rate": 9.187316157825338e-06,
"loss": 0.768984146118164,
"step": 33050
},
{
"epoch": 0.5860533467895431,
"grad_norm": 9.75,
"learning_rate": 9.186048280758699e-06,
"loss": 0.8237137603759765,
"step": 33100
},
{
"epoch": 0.5869386237484397,
"grad_norm": 7.75,
"learning_rate": 9.184780403692058e-06,
"loss": 0.7769164276123047,
"step": 33150
},
{
"epoch": 0.5878239007073363,
"grad_norm": 9.375,
"learning_rate": 9.183512526625419e-06,
"loss": 0.8206394958496094,
"step": 33200
},
{
"epoch": 0.5887091776662329,
"grad_norm": 8.125,
"learning_rate": 9.18224464955878e-06,
"loss": 0.811323013305664,
"step": 33250
},
{
"epoch": 0.5895944546251295,
"grad_norm": 7.3125,
"learning_rate": 9.180976772492139e-06,
"loss": 0.8037760925292968,
"step": 33300
},
{
"epoch": 0.5904797315840261,
"grad_norm": 9.4375,
"learning_rate": 9.1797088954255e-06,
"loss": 0.8595541381835937,
"step": 33350
},
{
"epoch": 0.5913650085429226,
"grad_norm": 9.5625,
"learning_rate": 9.17844101835886e-06,
"loss": 0.7697556304931641,
"step": 33400
},
{
"epoch": 0.5922502855018192,
"grad_norm": 9.25,
"learning_rate": 9.177173141292221e-06,
"loss": 0.7896424865722657,
"step": 33450
},
{
"epoch": 0.5931355624607159,
"grad_norm": 8.3125,
"learning_rate": 9.175905264225582e-06,
"loss": 0.8332701873779297,
"step": 33500
},
{
"epoch": 0.5940208394196125,
"grad_norm": 7.96875,
"learning_rate": 9.174637387158943e-06,
"loss": 0.7919515228271484,
"step": 33550
},
{
"epoch": 0.594906116378509,
"grad_norm": 8.1875,
"learning_rate": 9.173369510092302e-06,
"loss": 0.800345687866211,
"step": 33600
},
{
"epoch": 0.5957913933374056,
"grad_norm": 9.3125,
"learning_rate": 9.172101633025663e-06,
"loss": 0.7806795501708984,
"step": 33650
},
{
"epoch": 0.5966766702963022,
"grad_norm": 7.96875,
"learning_rate": 9.170833755959023e-06,
"loss": 0.8437120056152344,
"step": 33700
},
{
"epoch": 0.5975619472551988,
"grad_norm": 10.4375,
"learning_rate": 9.169565878892384e-06,
"loss": 0.854691162109375,
"step": 33750
},
{
"epoch": 0.5984472242140954,
"grad_norm": 9.125,
"learning_rate": 9.168298001825743e-06,
"loss": 0.7852024078369141,
"step": 33800
},
{
"epoch": 0.599332501172992,
"grad_norm": 10.25,
"learning_rate": 9.167030124759104e-06,
"loss": 0.7816255187988281,
"step": 33850
},
{
"epoch": 0.6002177781318886,
"grad_norm": 9.375,
"learning_rate": 9.165762247692465e-06,
"loss": 0.7625868988037109,
"step": 33900
},
{
"epoch": 0.6011030550907851,
"grad_norm": 7.71875,
"learning_rate": 9.164494370625824e-06,
"loss": 0.8304756164550782,
"step": 33950
},
{
"epoch": 0.6019883320496817,
"grad_norm": 9.6875,
"learning_rate": 9.163226493559185e-06,
"loss": 0.8507473754882813,
"step": 34000
},
{
"epoch": 0.6028736090085783,
"grad_norm": 9.9375,
"learning_rate": 9.161958616492546e-06,
"loss": 0.8916117858886718,
"step": 34050
},
{
"epoch": 0.603758885967475,
"grad_norm": 9.0625,
"learning_rate": 9.160690739425907e-06,
"loss": 0.7812516021728516,
"step": 34100
},
{
"epoch": 0.6046441629263715,
"grad_norm": 9.9375,
"learning_rate": 9.159422862359266e-06,
"loss": 0.85893310546875,
"step": 34150
},
{
"epoch": 0.6055294398852681,
"grad_norm": 8.75,
"learning_rate": 9.158154985292627e-06,
"loss": 0.8037278747558594,
"step": 34200
},
{
"epoch": 0.6064147168441647,
"grad_norm": 9.5,
"learning_rate": 9.156887108225987e-06,
"loss": 0.8571352386474609,
"step": 34250
},
{
"epoch": 0.6072999938030613,
"grad_norm": 9.25,
"learning_rate": 9.155619231159346e-06,
"loss": 0.8393865203857422,
"step": 34300
},
{
"epoch": 0.6081852707619578,
"grad_norm": 8.8125,
"learning_rate": 9.154351354092707e-06,
"loss": 0.7959491729736328,
"step": 34350
},
{
"epoch": 0.6090705477208544,
"grad_norm": 8.375,
"learning_rate": 9.153083477026068e-06,
"loss": 0.8323846435546876,
"step": 34400
},
{
"epoch": 0.6099558246797511,
"grad_norm": 8.1875,
"learning_rate": 9.151815599959429e-06,
"loss": 0.7896021270751953,
"step": 34450
},
{
"epoch": 0.6108411016386477,
"grad_norm": 10.4375,
"learning_rate": 9.150547722892788e-06,
"loss": 0.8387126922607422,
"step": 34500
},
{
"epoch": 0.6117263785975442,
"grad_norm": 8.75,
"learning_rate": 9.149279845826149e-06,
"loss": 0.8292191314697266,
"step": 34550
},
{
"epoch": 0.6126116555564408,
"grad_norm": 6.75,
"learning_rate": 9.14801196875951e-06,
"loss": 0.8881966400146485,
"step": 34600
},
{
"epoch": 0.6134969325153374,
"grad_norm": 8.875,
"learning_rate": 9.14674409169287e-06,
"loss": 0.7790008544921875,
"step": 34650
},
{
"epoch": 0.614382209474234,
"grad_norm": 9.875,
"learning_rate": 9.145476214626231e-06,
"loss": 0.772136459350586,
"step": 34700
},
{
"epoch": 0.6152674864331306,
"grad_norm": 8.9375,
"learning_rate": 9.144208337559592e-06,
"loss": 0.8583509063720703,
"step": 34750
},
{
"epoch": 0.6161527633920272,
"grad_norm": 9.5,
"learning_rate": 9.142940460492951e-06,
"loss": 0.7817726898193359,
"step": 34800
},
{
"epoch": 0.6170380403509238,
"grad_norm": 7.96875,
"learning_rate": 9.141672583426312e-06,
"loss": 0.7983211517333985,
"step": 34850
},
{
"epoch": 0.6179233173098204,
"grad_norm": 7.75,
"learning_rate": 9.140404706359673e-06,
"loss": 0.8518927001953125,
"step": 34900
},
{
"epoch": 0.6188085942687169,
"grad_norm": 7.15625,
"learning_rate": 9.139136829293032e-06,
"loss": 0.7762453460693359,
"step": 34950
},
{
"epoch": 0.6196938712276135,
"grad_norm": 8.6875,
"learning_rate": 9.137868952226393e-06,
"loss": 0.7606196594238281,
"step": 35000
},
{
"epoch": 0.6196938712276135,
"eval_cer": 14.630998411088756,
"eval_loss": 0.34649306535720825,
"eval_runtime": 381.1753,
"eval_samples_per_second": 13.117,
"eval_steps_per_second": 1.64,
"eval_wer": 30.37997340697555,
"step": 35000
}
],
"logging_steps": 50,
"max_steps": 395360,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.8051981033472e+21,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}