{ "best_global_step": 35000, "best_metric": 30.37997340697555, "best_model_checkpoint": "phase5_output/checkpoints/stage1/checkpoint-35000", "epoch": 0.6196938712276135, "eval_steps": 5000, "global_step": 35000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0008852769588965908, "grad_norm": 20.375, "learning_rate": 4.900000000000001e-07, "loss": 2.6002566528320314, "step": 50 }, { "epoch": 0.0017705539177931815, "grad_norm": 16.5, "learning_rate": 9.9e-07, "loss": 2.6074697875976565, "step": 100 }, { "epoch": 0.0026558308766897725, "grad_norm": 12.8125, "learning_rate": 1.4900000000000001e-06, "loss": 2.661256103515625, "step": 150 }, { "epoch": 0.003541107835586363, "grad_norm": 16.125, "learning_rate": 1.9900000000000004e-06, "loss": 2.3539471435546875, "step": 200 }, { "epoch": 0.004426384794482954, "grad_norm": 13.125, "learning_rate": 2.4900000000000003e-06, "loss": 2.1791415405273438, "step": 250 }, { "epoch": 0.005311661753379545, "grad_norm": 14.3125, "learning_rate": 2.99e-06, "loss": 1.7517926025390624, "step": 300 }, { "epoch": 0.006196938712276136, "grad_norm": 8.9375, "learning_rate": 3.49e-06, "loss": 1.5738864135742188, "step": 350 }, { "epoch": 0.007082215671172726, "grad_norm": 10.375, "learning_rate": 3.990000000000001e-06, "loss": 1.4318115234375, "step": 400 }, { "epoch": 0.007967492630069318, "grad_norm": 12.0, "learning_rate": 4.49e-06, "loss": 1.3452346801757813, "step": 450 }, { "epoch": 0.008852769588965907, "grad_norm": 11.5, "learning_rate": 4.9900000000000005e-06, "loss": 1.3893937683105468, "step": 500 }, { "epoch": 0.009738046547862499, "grad_norm": 20.125, "learning_rate": 5.490000000000001e-06, "loss": 1.3567886352539062, "step": 550 }, { "epoch": 0.01062332350675909, "grad_norm": 9.5625, "learning_rate": 5.99e-06, "loss": 1.3436286926269532, "step": 600 }, { "epoch": 0.01150860046565568, "grad_norm": 9.875, "learning_rate": 6.4900000000000005e-06, "loss": 1.2315786743164063, "step": 650 }, { "epoch": 0.012393877424552271, "grad_norm": 8.75, "learning_rate": 6.99e-06, "loss": 1.2174966430664063, "step": 700 }, { "epoch": 0.013279154383448863, "grad_norm": 9.375, "learning_rate": 7.49e-06, "loss": 1.2520484161376952, "step": 750 }, { "epoch": 0.014164431342345452, "grad_norm": 8.4375, "learning_rate": 7.990000000000001e-06, "loss": 1.2200564575195312, "step": 800 }, { "epoch": 0.015049708301242044, "grad_norm": 11.1875, "learning_rate": 8.49e-06, "loss": 1.1677375793457032, "step": 850 }, { "epoch": 0.015934985260138635, "grad_norm": 8.9375, "learning_rate": 8.99e-06, "loss": 1.1773777770996094, "step": 900 }, { "epoch": 0.016820262219035226, "grad_norm": 8.9375, "learning_rate": 9.49e-06, "loss": 1.1839574432373048, "step": 950 }, { "epoch": 0.017705539177931814, "grad_norm": 8.8125, "learning_rate": 9.990000000000001e-06, "loss": 1.1368023681640624, "step": 1000 }, { "epoch": 0.018590816136828406, "grad_norm": 9.625, "learning_rate": 9.998757480474695e-06, "loss": 1.1666729736328125, "step": 1050 }, { "epoch": 0.019476093095724997, "grad_norm": 8.0625, "learning_rate": 9.997489603408056e-06, "loss": 1.1337457275390626, "step": 1100 }, { "epoch": 0.02036137005462159, "grad_norm": 8.375, "learning_rate": 9.996221726341415e-06, "loss": 1.1557207489013672, "step": 1150 }, { "epoch": 0.02124664701351818, "grad_norm": 8.375, "learning_rate": 9.994953849274776e-06, "loss": 1.1596089935302734, "step": 1200 }, { "epoch": 0.02213192397241477, "grad_norm": 10.6875, "learning_rate": 9.993685972208136e-06, "loss": 1.115845718383789, "step": 1250 }, { "epoch": 0.02301720093131136, "grad_norm": 11.5, "learning_rate": 9.992418095141496e-06, "loss": 1.0784302520751954, "step": 1300 }, { "epoch": 0.02390247789020795, "grad_norm": 8.9375, "learning_rate": 9.991150218074856e-06, "loss": 1.0735511779785156, "step": 1350 }, { "epoch": 0.024787754849104542, "grad_norm": 8.625, "learning_rate": 9.989882341008217e-06, "loss": 1.0815206146240235, "step": 1400 }, { "epoch": 0.025673031808001134, "grad_norm": 8.5, "learning_rate": 9.988614463941578e-06, "loss": 1.0743460845947266, "step": 1450 }, { "epoch": 0.026558308766897725, "grad_norm": 7.90625, "learning_rate": 9.987346586874937e-06, "loss": 1.1286388397216798, "step": 1500 }, { "epoch": 0.027443585725794313, "grad_norm": 8.6875, "learning_rate": 9.986078709808298e-06, "loss": 1.0764491271972656, "step": 1550 }, { "epoch": 0.028328862684690904, "grad_norm": 7.4375, "learning_rate": 9.984810832741659e-06, "loss": 1.0406829071044923, "step": 1600 }, { "epoch": 0.029214139643587496, "grad_norm": 7.375, "learning_rate": 9.983542955675018e-06, "loss": 1.075464096069336, "step": 1650 }, { "epoch": 0.030099416602484087, "grad_norm": 10.875, "learning_rate": 9.982275078608379e-06, "loss": 1.0150408935546875, "step": 1700 }, { "epoch": 0.03098469356138068, "grad_norm": 9.3125, "learning_rate": 9.98100720154174e-06, "loss": 1.0287052154541017, "step": 1750 }, { "epoch": 0.03186997052027727, "grad_norm": 9.5625, "learning_rate": 9.9797393244751e-06, "loss": 1.031718978881836, "step": 1800 }, { "epoch": 0.03275524747917386, "grad_norm": 10.0625, "learning_rate": 9.97847144740846e-06, "loss": 1.0560354614257812, "step": 1850 }, { "epoch": 0.03364052443807045, "grad_norm": 9.4375, "learning_rate": 9.97720357034182e-06, "loss": 0.9881591033935547, "step": 1900 }, { "epoch": 0.034525801396967044, "grad_norm": 6.75, "learning_rate": 9.975935693275181e-06, "loss": 0.9941422271728516, "step": 1950 }, { "epoch": 0.03541107835586363, "grad_norm": 7.78125, "learning_rate": 9.97466781620854e-06, "loss": 0.9812654876708984, "step": 2000 }, { "epoch": 0.03629635531476022, "grad_norm": 7.40625, "learning_rate": 9.973399939141901e-06, "loss": 1.0280473327636719, "step": 2050 }, { "epoch": 0.03718163227365681, "grad_norm": 6.53125, "learning_rate": 9.972132062075262e-06, "loss": 1.0059999084472657, "step": 2100 }, { "epoch": 0.0380669092325534, "grad_norm": 9.0, "learning_rate": 9.970864185008622e-06, "loss": 1.0437776947021484, "step": 2150 }, { "epoch": 0.038952186191449995, "grad_norm": 8.625, "learning_rate": 9.969596307941983e-06, "loss": 0.9704521942138672, "step": 2200 }, { "epoch": 0.039837463150346586, "grad_norm": 8.75, "learning_rate": 9.968328430875344e-06, "loss": 1.0186034393310548, "step": 2250 }, { "epoch": 0.04072274010924318, "grad_norm": 6.84375, "learning_rate": 9.967060553808703e-06, "loss": 0.9506314086914063, "step": 2300 }, { "epoch": 0.04160801706813977, "grad_norm": 10.5625, "learning_rate": 9.965792676742064e-06, "loss": 1.0404967498779296, "step": 2350 }, { "epoch": 0.04249329402703636, "grad_norm": 7.375, "learning_rate": 9.964524799675425e-06, "loss": 0.9251933288574219, "step": 2400 }, { "epoch": 0.04337857098593295, "grad_norm": 8.0625, "learning_rate": 9.963256922608786e-06, "loss": 1.0444112396240235, "step": 2450 }, { "epoch": 0.04426384794482954, "grad_norm": 10.5, "learning_rate": 9.961989045542145e-06, "loss": 0.9814193725585938, "step": 2500 }, { "epoch": 0.04514912490372613, "grad_norm": 8.5625, "learning_rate": 9.960721168475506e-06, "loss": 0.9973986053466797, "step": 2550 }, { "epoch": 0.04603440186262272, "grad_norm": 9.0625, "learning_rate": 9.959453291408866e-06, "loss": 1.0070331573486329, "step": 2600 }, { "epoch": 0.04691967882151931, "grad_norm": 7.8125, "learning_rate": 9.958185414342225e-06, "loss": 0.9542020416259765, "step": 2650 }, { "epoch": 0.0478049557804159, "grad_norm": 10.25, "learning_rate": 9.956917537275586e-06, "loss": 0.9571893310546875, "step": 2700 }, { "epoch": 0.04869023273931249, "grad_norm": 9.375, "learning_rate": 9.955649660208947e-06, "loss": 0.9763975524902344, "step": 2750 }, { "epoch": 0.049575509698209085, "grad_norm": 9.625, "learning_rate": 9.954381783142308e-06, "loss": 0.9599230194091797, "step": 2800 }, { "epoch": 0.050460786657105676, "grad_norm": 11.25, "learning_rate": 9.953113906075667e-06, "loss": 1.0347834014892578, "step": 2850 }, { "epoch": 0.05134606361600227, "grad_norm": 10.5625, "learning_rate": 9.951846029009028e-06, "loss": 0.9465586853027343, "step": 2900 }, { "epoch": 0.05223134057489886, "grad_norm": 8.0625, "learning_rate": 9.950578151942389e-06, "loss": 0.9527477264404297, "step": 2950 }, { "epoch": 0.05311661753379545, "grad_norm": 10.4375, "learning_rate": 9.94931027487575e-06, "loss": 0.9980839538574219, "step": 3000 }, { "epoch": 0.05400189449269204, "grad_norm": 9.6875, "learning_rate": 9.948042397809109e-06, "loss": 0.9938941192626953, "step": 3050 }, { "epoch": 0.054887171451588626, "grad_norm": 8.6875, "learning_rate": 9.94677452074247e-06, "loss": 0.9848545074462891, "step": 3100 }, { "epoch": 0.05577244841048522, "grad_norm": 6.875, "learning_rate": 9.94550664367583e-06, "loss": 0.9058123779296875, "step": 3150 }, { "epoch": 0.05665772536938181, "grad_norm": 10.0, "learning_rate": 9.94423876660919e-06, "loss": 0.9505638122558594, "step": 3200 }, { "epoch": 0.0575430023282784, "grad_norm": 5.96875, "learning_rate": 9.94297088954255e-06, "loss": 0.9797083282470703, "step": 3250 }, { "epoch": 0.05842827928717499, "grad_norm": 7.78125, "learning_rate": 9.941703012475911e-06, "loss": 0.9330976867675781, "step": 3300 }, { "epoch": 0.05931355624607158, "grad_norm": 10.875, "learning_rate": 9.940435135409272e-06, "loss": 0.9697081756591797, "step": 3350 }, { "epoch": 0.060198833204968175, "grad_norm": 8.8125, "learning_rate": 9.939167258342633e-06, "loss": 0.9778965759277344, "step": 3400 }, { "epoch": 0.061084110163864766, "grad_norm": 6.46875, "learning_rate": 9.937899381275993e-06, "loss": 0.9776050567626953, "step": 3450 }, { "epoch": 0.06196938712276136, "grad_norm": 8.75, "learning_rate": 9.936631504209352e-06, "loss": 0.9795106506347656, "step": 3500 }, { "epoch": 0.06285466408165795, "grad_norm": 9.5625, "learning_rate": 9.935363627142713e-06, "loss": 0.9063382720947266, "step": 3550 }, { "epoch": 0.06373994104055454, "grad_norm": 8.125, "learning_rate": 9.934095750076074e-06, "loss": 0.9287123107910156, "step": 3600 }, { "epoch": 0.06462521799945113, "grad_norm": 10.1875, "learning_rate": 9.932827873009435e-06, "loss": 0.9231369018554687, "step": 3650 }, { "epoch": 0.06551049495834772, "grad_norm": 7.5, "learning_rate": 9.931559995942794e-06, "loss": 0.9772643280029297, "step": 3700 }, { "epoch": 0.06639577191724431, "grad_norm": 9.75, "learning_rate": 9.930292118876155e-06, "loss": 0.9633369445800781, "step": 3750 }, { "epoch": 0.0672810488761409, "grad_norm": 10.3125, "learning_rate": 9.929024241809516e-06, "loss": 0.9590021514892578, "step": 3800 }, { "epoch": 0.0681663258350375, "grad_norm": 8.6875, "learning_rate": 9.927756364742875e-06, "loss": 0.9859857177734375, "step": 3850 }, { "epoch": 0.06905160279393409, "grad_norm": 7.90625, "learning_rate": 9.926488487676236e-06, "loss": 0.9285535430908203, "step": 3900 }, { "epoch": 0.06993687975283067, "grad_norm": 9.4375, "learning_rate": 9.925220610609596e-06, "loss": 0.9168830871582031, "step": 3950 }, { "epoch": 0.07082215671172726, "grad_norm": 8.6875, "learning_rate": 9.923952733542957e-06, "loss": 0.9035071563720704, "step": 4000 }, { "epoch": 0.07170743367062385, "grad_norm": 7.34375, "learning_rate": 9.922684856476316e-06, "loss": 0.9321700286865234, "step": 4050 }, { "epoch": 0.07259271062952044, "grad_norm": 8.875, "learning_rate": 9.921416979409677e-06, "loss": 0.9381676483154296, "step": 4100 }, { "epoch": 0.07347798758841703, "grad_norm": 9.6875, "learning_rate": 9.920149102343038e-06, "loss": 0.9207463073730469, "step": 4150 }, { "epoch": 0.07436326454731362, "grad_norm": 9.5, "learning_rate": 9.918881225276397e-06, "loss": 0.9887482452392579, "step": 4200 }, { "epoch": 0.07524854150621021, "grad_norm": 7.125, "learning_rate": 9.917613348209758e-06, "loss": 0.8934328460693359, "step": 4250 }, { "epoch": 0.0761338184651068, "grad_norm": 7.25, "learning_rate": 9.916345471143119e-06, "loss": 0.9096377563476562, "step": 4300 }, { "epoch": 0.0770190954240034, "grad_norm": 7.96875, "learning_rate": 9.91507759407648e-06, "loss": 0.914523696899414, "step": 4350 }, { "epoch": 0.07790437238289999, "grad_norm": 7.5625, "learning_rate": 9.913809717009839e-06, "loss": 0.9668045806884765, "step": 4400 }, { "epoch": 0.07878964934179658, "grad_norm": 7.84375, "learning_rate": 9.9125418399432e-06, "loss": 0.8644290924072265, "step": 4450 }, { "epoch": 0.07967492630069317, "grad_norm": 6.65625, "learning_rate": 9.91127396287656e-06, "loss": 0.9925772094726563, "step": 4500 }, { "epoch": 0.08056020325958976, "grad_norm": 8.6875, "learning_rate": 9.910006085809921e-06, "loss": 0.9446270751953125, "step": 4550 }, { "epoch": 0.08144548021848635, "grad_norm": 9.25, "learning_rate": 9.908738208743282e-06, "loss": 0.8774137115478515, "step": 4600 }, { "epoch": 0.08233075717738295, "grad_norm": 9.0, "learning_rate": 9.907470331676643e-06, "loss": 0.9241300201416016, "step": 4650 }, { "epoch": 0.08321603413627954, "grad_norm": 7.90625, "learning_rate": 9.906202454610002e-06, "loss": 0.9137750244140626, "step": 4700 }, { "epoch": 0.08410131109517613, "grad_norm": 8.5, "learning_rate": 9.904934577543363e-06, "loss": 0.9445246887207032, "step": 4750 }, { "epoch": 0.08498658805407272, "grad_norm": 8.8125, "learning_rate": 9.903666700476723e-06, "loss": 0.9241275024414063, "step": 4800 }, { "epoch": 0.08587186501296931, "grad_norm": 11.4375, "learning_rate": 9.902398823410082e-06, "loss": 0.9301995849609375, "step": 4850 }, { "epoch": 0.0867571419718659, "grad_norm": 9.375, "learning_rate": 9.901130946343443e-06, "loss": 0.8602587127685547, "step": 4900 }, { "epoch": 0.0876424189307625, "grad_norm": 8.0625, "learning_rate": 9.899863069276804e-06, "loss": 0.8998529052734375, "step": 4950 }, { "epoch": 0.08852769588965909, "grad_norm": 7.96875, "learning_rate": 9.898595192210165e-06, "loss": 0.9169329833984375, "step": 5000 }, { "epoch": 0.08852769588965909, "eval_cer": 18.002960906275927, "eval_loss": 0.39146754145622253, "eval_runtime": 397.3897, "eval_samples_per_second": 12.582, "eval_steps_per_second": 1.573, "eval_wer": 35.30223995090518, "step": 5000 }, { "epoch": 0.08941297284855568, "grad_norm": 16.625, "learning_rate": 9.897327315143524e-06, "loss": 0.8970352172851562, "step": 5050 }, { "epoch": 0.09029824980745225, "grad_norm": 7.3125, "learning_rate": 9.896059438076885e-06, "loss": 0.9074311065673828, "step": 5100 }, { "epoch": 0.09118352676634885, "grad_norm": 9.3125, "learning_rate": 9.894791561010246e-06, "loss": 0.9284700775146484, "step": 5150 }, { "epoch": 0.09206880372524544, "grad_norm": 7.15625, "learning_rate": 9.893523683943605e-06, "loss": 0.9229075622558593, "step": 5200 }, { "epoch": 0.09295408068414203, "grad_norm": 7.96875, "learning_rate": 9.892255806876966e-06, "loss": 0.9389077758789063, "step": 5250 }, { "epoch": 0.09383935764303862, "grad_norm": 7.46875, "learning_rate": 9.890987929810326e-06, "loss": 0.9642659759521485, "step": 5300 }, { "epoch": 0.09472463460193521, "grad_norm": 9.0625, "learning_rate": 9.889720052743687e-06, "loss": 0.9125606536865234, "step": 5350 }, { "epoch": 0.0956099115608318, "grad_norm": 7.3125, "learning_rate": 9.888452175677046e-06, "loss": 0.8953401947021484, "step": 5400 }, { "epoch": 0.0964951885197284, "grad_norm": 9.8125, "learning_rate": 9.887184298610407e-06, "loss": 0.8888931274414062, "step": 5450 }, { "epoch": 0.09738046547862499, "grad_norm": 8.875, "learning_rate": 9.885916421543768e-06, "loss": 0.9167032623291016, "step": 5500 }, { "epoch": 0.09826574243752158, "grad_norm": 8.9375, "learning_rate": 9.884648544477129e-06, "loss": 0.9441605377197265, "step": 5550 }, { "epoch": 0.09915101939641817, "grad_norm": 8.6875, "learning_rate": 9.883380667410488e-06, "loss": 0.9138188171386719, "step": 5600 }, { "epoch": 0.10003629635531476, "grad_norm": 10.125, "learning_rate": 9.882112790343849e-06, "loss": 0.8812205505371093, "step": 5650 }, { "epoch": 0.10092157331421135, "grad_norm": 7.1875, "learning_rate": 9.88084491327721e-06, "loss": 0.875129623413086, "step": 5700 }, { "epoch": 0.10180685027310794, "grad_norm": 7.40625, "learning_rate": 9.87957703621057e-06, "loss": 0.8896215057373047, "step": 5750 }, { "epoch": 0.10269212723200453, "grad_norm": 10.75, "learning_rate": 9.878309159143931e-06, "loss": 0.870993881225586, "step": 5800 }, { "epoch": 0.10357740419090113, "grad_norm": 5.8125, "learning_rate": 9.87704128207729e-06, "loss": 0.8822001647949219, "step": 5850 }, { "epoch": 0.10446268114979772, "grad_norm": 8.6875, "learning_rate": 9.875773405010651e-06, "loss": 0.8593311309814453, "step": 5900 }, { "epoch": 0.10534795810869431, "grad_norm": 10.1875, "learning_rate": 9.874505527944012e-06, "loss": 0.8941629028320313, "step": 5950 }, { "epoch": 0.1062332350675909, "grad_norm": 8.75, "learning_rate": 9.873237650877373e-06, "loss": 0.9121043395996093, "step": 6000 }, { "epoch": 0.10711851202648749, "grad_norm": 9.1875, "learning_rate": 9.871969773810732e-06, "loss": 0.8677694702148437, "step": 6050 }, { "epoch": 0.10800378898538408, "grad_norm": 7.375, "learning_rate": 9.870701896744092e-06, "loss": 0.8670549011230468, "step": 6100 }, { "epoch": 0.10888906594428067, "grad_norm": 12.875, "learning_rate": 9.869434019677453e-06, "loss": 0.857596435546875, "step": 6150 }, { "epoch": 0.10977434290317725, "grad_norm": 8.3125, "learning_rate": 9.868166142610814e-06, "loss": 0.8889055633544922, "step": 6200 }, { "epoch": 0.11065961986207384, "grad_norm": 9.375, "learning_rate": 9.866898265544173e-06, "loss": 0.8906202697753907, "step": 6250 }, { "epoch": 0.11154489682097044, "grad_norm": 8.9375, "learning_rate": 9.865630388477534e-06, "loss": 0.8508009338378906, "step": 6300 }, { "epoch": 0.11243017377986703, "grad_norm": 9.4375, "learning_rate": 9.864362511410895e-06, "loss": 0.8770820617675781, "step": 6350 }, { "epoch": 0.11331545073876362, "grad_norm": 7.65625, "learning_rate": 9.863094634344254e-06, "loss": 0.9325962829589843, "step": 6400 }, { "epoch": 0.11420072769766021, "grad_norm": 10.9375, "learning_rate": 9.861826757277615e-06, "loss": 0.913088150024414, "step": 6450 }, { "epoch": 0.1150860046565568, "grad_norm": 7.8125, "learning_rate": 9.860558880210976e-06, "loss": 0.8288323211669922, "step": 6500 }, { "epoch": 0.11597128161545339, "grad_norm": 9.1875, "learning_rate": 9.859291003144336e-06, "loss": 0.8909578704833985, "step": 6550 }, { "epoch": 0.11685655857434998, "grad_norm": 6.84375, "learning_rate": 9.858023126077696e-06, "loss": 0.8909761810302734, "step": 6600 }, { "epoch": 0.11774183553324657, "grad_norm": 8.5625, "learning_rate": 9.856755249011056e-06, "loss": 0.913955078125, "step": 6650 }, { "epoch": 0.11862711249214317, "grad_norm": 7.375, "learning_rate": 9.855487371944417e-06, "loss": 0.890057601928711, "step": 6700 }, { "epoch": 0.11951238945103976, "grad_norm": 7.09375, "learning_rate": 9.854219494877776e-06, "loss": 0.8805287170410157, "step": 6750 }, { "epoch": 0.12039766640993635, "grad_norm": 8.0625, "learning_rate": 9.852951617811137e-06, "loss": 0.8432673645019532, "step": 6800 }, { "epoch": 0.12128294336883294, "grad_norm": 8.25, "learning_rate": 9.8516837407445e-06, "loss": 0.936988525390625, "step": 6850 }, { "epoch": 0.12216822032772953, "grad_norm": 8.5, "learning_rate": 9.850415863677859e-06, "loss": 0.8533177185058594, "step": 6900 }, { "epoch": 0.12305349728662612, "grad_norm": 7.5, "learning_rate": 9.84914798661122e-06, "loss": 0.8586708068847656, "step": 6950 }, { "epoch": 0.12393877424552271, "grad_norm": 7.96875, "learning_rate": 9.84788010954458e-06, "loss": 0.8778302001953125, "step": 7000 }, { "epoch": 0.1248240512044193, "grad_norm": 9.25, "learning_rate": 9.84661223247794e-06, "loss": 0.9390164947509766, "step": 7050 }, { "epoch": 0.1257093281633159, "grad_norm": 7.875, "learning_rate": 9.8453443554113e-06, "loss": 0.8395907592773437, "step": 7100 }, { "epoch": 0.1265946051222125, "grad_norm": 8.9375, "learning_rate": 9.844076478344661e-06, "loss": 0.8765547180175781, "step": 7150 }, { "epoch": 0.12747988208110908, "grad_norm": 8.8125, "learning_rate": 9.842808601278022e-06, "loss": 0.9166593170166015, "step": 7200 }, { "epoch": 0.12836515904000567, "grad_norm": 9.6875, "learning_rate": 9.841540724211381e-06, "loss": 0.8632067108154297, "step": 7250 }, { "epoch": 0.12925043599890226, "grad_norm": 10.0, "learning_rate": 9.840272847144742e-06, "loss": 0.8881147003173828, "step": 7300 }, { "epoch": 0.13013571295779885, "grad_norm": 9.125, "learning_rate": 9.839004970078103e-06, "loss": 0.8959363555908203, "step": 7350 }, { "epoch": 0.13102098991669545, "grad_norm": 5.46875, "learning_rate": 9.837737093011462e-06, "loss": 0.8682516479492187, "step": 7400 }, { "epoch": 0.13190626687559204, "grad_norm": 8.875, "learning_rate": 9.836469215944822e-06, "loss": 0.8162654113769531, "step": 7450 }, { "epoch": 0.13279154383448863, "grad_norm": 8.125, "learning_rate": 9.835201338878183e-06, "loss": 0.9132343292236328, "step": 7500 }, { "epoch": 0.13367682079338522, "grad_norm": 8.1875, "learning_rate": 9.833933461811544e-06, "loss": 0.9420564270019531, "step": 7550 }, { "epoch": 0.1345620977522818, "grad_norm": 8.125, "learning_rate": 9.832665584744903e-06, "loss": 0.9325301361083984, "step": 7600 }, { "epoch": 0.1354473747111784, "grad_norm": 9.0625, "learning_rate": 9.831397707678264e-06, "loss": 0.9296858978271484, "step": 7650 }, { "epoch": 0.136332651670075, "grad_norm": 8.25, "learning_rate": 9.830129830611625e-06, "loss": 0.9119468688964844, "step": 7700 }, { "epoch": 0.13721792862897159, "grad_norm": 8.4375, "learning_rate": 9.828861953544984e-06, "loss": 0.8511313629150391, "step": 7750 }, { "epoch": 0.13810320558786818, "grad_norm": 9.375, "learning_rate": 9.827594076478345e-06, "loss": 0.8683940124511719, "step": 7800 }, { "epoch": 0.13898848254676477, "grad_norm": 8.75, "learning_rate": 9.826326199411706e-06, "loss": 0.8960696411132812, "step": 7850 }, { "epoch": 0.13987375950566133, "grad_norm": 7.875, "learning_rate": 9.825058322345066e-06, "loss": 0.9292098999023437, "step": 7900 }, { "epoch": 0.14075903646455792, "grad_norm": 8.375, "learning_rate": 9.823790445278425e-06, "loss": 0.8068239593505859, "step": 7950 }, { "epoch": 0.14164431342345452, "grad_norm": 7.59375, "learning_rate": 9.822522568211788e-06, "loss": 0.8778212738037109, "step": 8000 }, { "epoch": 0.1425295903823511, "grad_norm": 8.25, "learning_rate": 9.821254691145147e-06, "loss": 0.8837771606445313, "step": 8050 }, { "epoch": 0.1434148673412477, "grad_norm": 5.875, "learning_rate": 9.819986814078508e-06, "loss": 0.9285024261474609, "step": 8100 }, { "epoch": 0.1443001443001443, "grad_norm": 7.71875, "learning_rate": 9.818718937011869e-06, "loss": 0.9287461853027343, "step": 8150 }, { "epoch": 0.14518542125904088, "grad_norm": 8.0625, "learning_rate": 9.81745105994523e-06, "loss": 0.8639019775390625, "step": 8200 }, { "epoch": 0.14607069821793747, "grad_norm": 8.6875, "learning_rate": 9.816183182878589e-06, "loss": 0.8503567504882813, "step": 8250 }, { "epoch": 0.14695597517683406, "grad_norm": 7.90625, "learning_rate": 9.81491530581195e-06, "loss": 0.8940105438232422, "step": 8300 }, { "epoch": 0.14784125213573066, "grad_norm": 9.375, "learning_rate": 9.81364742874531e-06, "loss": 0.8853314208984375, "step": 8350 }, { "epoch": 0.14872652909462725, "grad_norm": 9.0625, "learning_rate": 9.81237955167867e-06, "loss": 0.9398179626464844, "step": 8400 }, { "epoch": 0.14961180605352384, "grad_norm": 9.0625, "learning_rate": 9.81111167461203e-06, "loss": 0.9009015655517578, "step": 8450 }, { "epoch": 0.15049708301242043, "grad_norm": 9.5625, "learning_rate": 9.809843797545391e-06, "loss": 0.8552869415283203, "step": 8500 }, { "epoch": 0.15138235997131702, "grad_norm": 8.75, "learning_rate": 9.808575920478752e-06, "loss": 0.8683760070800781, "step": 8550 }, { "epoch": 0.1522676369302136, "grad_norm": 8.875, "learning_rate": 9.807308043412111e-06, "loss": 0.8234300994873047, "step": 8600 }, { "epoch": 0.1531529138891102, "grad_norm": 8.875, "learning_rate": 9.806040166345472e-06, "loss": 0.7992705535888672, "step": 8650 }, { "epoch": 0.1540381908480068, "grad_norm": 7.71875, "learning_rate": 9.804772289278833e-06, "loss": 0.8522439575195313, "step": 8700 }, { "epoch": 0.1549234678069034, "grad_norm": 10.25, "learning_rate": 9.803504412212193e-06, "loss": 0.8569031524658203, "step": 8750 }, { "epoch": 0.15580874476579998, "grad_norm": 8.25, "learning_rate": 9.802236535145552e-06, "loss": 0.9121205139160157, "step": 8800 }, { "epoch": 0.15669402172469657, "grad_norm": 8.9375, "learning_rate": 9.800968658078913e-06, "loss": 0.8695069122314453, "step": 8850 }, { "epoch": 0.15757929868359316, "grad_norm": 7.875, "learning_rate": 9.799700781012274e-06, "loss": 0.8624824523925781, "step": 8900 }, { "epoch": 0.15846457564248975, "grad_norm": 7.21875, "learning_rate": 9.798432903945633e-06, "loss": 0.8402172088623047, "step": 8950 }, { "epoch": 0.15934985260138634, "grad_norm": 7.8125, "learning_rate": 9.797165026878994e-06, "loss": 0.8360052490234375, "step": 9000 }, { "epoch": 0.16023512956028294, "grad_norm": 10.9375, "learning_rate": 9.795897149812355e-06, "loss": 0.9017723083496094, "step": 9050 }, { "epoch": 0.16112040651917953, "grad_norm": 8.4375, "learning_rate": 9.794629272745716e-06, "loss": 0.8305178833007812, "step": 9100 }, { "epoch": 0.16200568347807612, "grad_norm": 8.6875, "learning_rate": 9.793361395679076e-06, "loss": 0.8787906646728516, "step": 9150 }, { "epoch": 0.1628909604369727, "grad_norm": 8.625, "learning_rate": 9.792093518612437e-06, "loss": 0.9024432373046875, "step": 9200 }, { "epoch": 0.1637762373958693, "grad_norm": 10.125, "learning_rate": 9.790825641545796e-06, "loss": 0.8455127716064453, "step": 9250 }, { "epoch": 0.1646615143547659, "grad_norm": 10.9375, "learning_rate": 9.789557764479157e-06, "loss": 0.8886133575439453, "step": 9300 }, { "epoch": 0.16554679131366248, "grad_norm": 8.8125, "learning_rate": 9.788289887412518e-06, "loss": 0.9232273101806641, "step": 9350 }, { "epoch": 0.16643206827255907, "grad_norm": 9.5625, "learning_rate": 9.787022010345879e-06, "loss": 0.7960693359375, "step": 9400 }, { "epoch": 0.16731734523145567, "grad_norm": 9.875, "learning_rate": 9.785754133279238e-06, "loss": 0.9040877532958984, "step": 9450 }, { "epoch": 0.16820262219035226, "grad_norm": 8.375, "learning_rate": 9.784486256212599e-06, "loss": 0.8423170471191406, "step": 9500 }, { "epoch": 0.16908789914924885, "grad_norm": 10.75, "learning_rate": 9.78321837914596e-06, "loss": 0.7995271301269531, "step": 9550 }, { "epoch": 0.16997317610814544, "grad_norm": 6.78125, "learning_rate": 9.781950502079319e-06, "loss": 0.8801698303222656, "step": 9600 }, { "epoch": 0.17085845306704203, "grad_norm": 6.84375, "learning_rate": 9.78068262501268e-06, "loss": 0.902987060546875, "step": 9650 }, { "epoch": 0.17174373002593862, "grad_norm": 8.3125, "learning_rate": 9.77941474794604e-06, "loss": 0.9009125518798828, "step": 9700 }, { "epoch": 0.17262900698483521, "grad_norm": 10.4375, "learning_rate": 9.778146870879401e-06, "loss": 0.8579206085205078, "step": 9750 }, { "epoch": 0.1735142839437318, "grad_norm": 7.4375, "learning_rate": 9.77687899381276e-06, "loss": 0.8464696502685547, "step": 9800 }, { "epoch": 0.1743995609026284, "grad_norm": 11.625, "learning_rate": 9.775611116746121e-06, "loss": 0.85698974609375, "step": 9850 }, { "epoch": 0.175284837861525, "grad_norm": 6.5625, "learning_rate": 9.774343239679482e-06, "loss": 0.8342364501953125, "step": 9900 }, { "epoch": 0.17617011482042158, "grad_norm": 8.4375, "learning_rate": 9.773075362612841e-06, "loss": 0.8839446258544922, "step": 9950 }, { "epoch": 0.17705539177931817, "grad_norm": 8.75, "learning_rate": 9.771807485546202e-06, "loss": 0.8295965576171875, "step": 10000 }, { "epoch": 0.17705539177931817, "eval_cer": 16.53660732200669, "eval_loss": 0.36992114782333374, "eval_runtime": 390.2086, "eval_samples_per_second": 12.814, "eval_steps_per_second": 1.602, "eval_wer": 33.00347754935052, "step": 10000 }, { "epoch": 0.17794066873821476, "grad_norm": 10.0, "learning_rate": 9.770539608479563e-06, "loss": 0.9520655059814453, "step": 10050 }, { "epoch": 0.17882594569711135, "grad_norm": 7.5, "learning_rate": 9.769271731412923e-06, "loss": 0.9026583099365234, "step": 10100 }, { "epoch": 0.17971122265600792, "grad_norm": 6.8125, "learning_rate": 9.768003854346282e-06, "loss": 0.8356916046142578, "step": 10150 }, { "epoch": 0.1805964996149045, "grad_norm": 10.125, "learning_rate": 9.766735977279643e-06, "loss": 0.8295375823974609, "step": 10200 }, { "epoch": 0.1814817765738011, "grad_norm": 8.8125, "learning_rate": 9.765468100213004e-06, "loss": 0.8467240905761719, "step": 10250 }, { "epoch": 0.1823670535326977, "grad_norm": 7.9375, "learning_rate": 9.764200223146365e-06, "loss": 0.8381356811523437, "step": 10300 }, { "epoch": 0.18325233049159428, "grad_norm": 8.6875, "learning_rate": 9.762932346079726e-06, "loss": 0.8656709289550781, "step": 10350 }, { "epoch": 0.18413760745049088, "grad_norm": 8.875, "learning_rate": 9.761664469013086e-06, "loss": 0.874046401977539, "step": 10400 }, { "epoch": 0.18502288440938747, "grad_norm": 7.78125, "learning_rate": 9.760396591946446e-06, "loss": 0.8614305877685546, "step": 10450 }, { "epoch": 0.18590816136828406, "grad_norm": 9.125, "learning_rate": 9.759128714879806e-06, "loss": 0.8775393676757812, "step": 10500 }, { "epoch": 0.18679343832718065, "grad_norm": 7.625, "learning_rate": 9.757860837813167e-06, "loss": 0.8610476684570313, "step": 10550 }, { "epoch": 0.18767871528607724, "grad_norm": 7.71875, "learning_rate": 9.756592960746526e-06, "loss": 0.9277496337890625, "step": 10600 }, { "epoch": 0.18856399224497383, "grad_norm": 8.5, "learning_rate": 9.755325083679887e-06, "loss": 0.8972523498535157, "step": 10650 }, { "epoch": 0.18944926920387042, "grad_norm": 8.0, "learning_rate": 9.754057206613248e-06, "loss": 0.854305648803711, "step": 10700 }, { "epoch": 0.19033454616276702, "grad_norm": 5.65625, "learning_rate": 9.752789329546609e-06, "loss": 0.8421508026123047, "step": 10750 }, { "epoch": 0.1912198231216636, "grad_norm": 6.625, "learning_rate": 9.751521452479968e-06, "loss": 0.8855830383300781, "step": 10800 }, { "epoch": 0.1921051000805602, "grad_norm": 11.0625, "learning_rate": 9.750253575413329e-06, "loss": 0.8550155639648438, "step": 10850 }, { "epoch": 0.1929903770394568, "grad_norm": 8.5, "learning_rate": 9.74898569834669e-06, "loss": 0.8865677642822266, "step": 10900 }, { "epoch": 0.19387565399835338, "grad_norm": 6.9375, "learning_rate": 9.747717821280049e-06, "loss": 0.8427695465087891, "step": 10950 }, { "epoch": 0.19476093095724997, "grad_norm": 7.9375, "learning_rate": 9.74644994421341e-06, "loss": 0.8303961181640624, "step": 11000 }, { "epoch": 0.19564620791614656, "grad_norm": 9.25, "learning_rate": 9.74518206714677e-06, "loss": 0.8993209838867188, "step": 11050 }, { "epoch": 0.19653148487504316, "grad_norm": 8.8125, "learning_rate": 9.743914190080131e-06, "loss": 0.8488899993896485, "step": 11100 }, { "epoch": 0.19741676183393975, "grad_norm": 6.03125, "learning_rate": 9.74264631301349e-06, "loss": 0.852935791015625, "step": 11150 }, { "epoch": 0.19830203879283634, "grad_norm": 7.84375, "learning_rate": 9.741378435946851e-06, "loss": 0.8230840301513672, "step": 11200 }, { "epoch": 0.19918731575173293, "grad_norm": 6.8125, "learning_rate": 9.740110558880212e-06, "loss": 0.8860896301269531, "step": 11250 }, { "epoch": 0.20007259271062952, "grad_norm": 7.5, "learning_rate": 9.738842681813573e-06, "loss": 0.8350762939453125, "step": 11300 }, { "epoch": 0.2009578696695261, "grad_norm": 7.75, "learning_rate": 9.737574804746932e-06, "loss": 0.835966796875, "step": 11350 }, { "epoch": 0.2018431466284227, "grad_norm": 12.3125, "learning_rate": 9.736306927680292e-06, "loss": 0.85518798828125, "step": 11400 }, { "epoch": 0.2027284235873193, "grad_norm": 6.84375, "learning_rate": 9.735039050613653e-06, "loss": 0.8386080169677734, "step": 11450 }, { "epoch": 0.2036137005462159, "grad_norm": 6.1875, "learning_rate": 9.733771173547014e-06, "loss": 0.8899297332763672, "step": 11500 }, { "epoch": 0.20449897750511248, "grad_norm": 6.8125, "learning_rate": 9.732503296480375e-06, "loss": 0.8508304595947266, "step": 11550 }, { "epoch": 0.20538425446400907, "grad_norm": 8.1875, "learning_rate": 9.731235419413734e-06, "loss": 0.8747320556640625, "step": 11600 }, { "epoch": 0.20626953142290566, "grad_norm": 9.5, "learning_rate": 9.729967542347095e-06, "loss": 0.8832579803466797, "step": 11650 }, { "epoch": 0.20715480838180225, "grad_norm": 7.0, "learning_rate": 9.728699665280456e-06, "loss": 0.8430067443847656, "step": 11700 }, { "epoch": 0.20804008534069884, "grad_norm": 8.625, "learning_rate": 9.727431788213816e-06, "loss": 0.9135202026367187, "step": 11750 }, { "epoch": 0.20892536229959544, "grad_norm": 11.5, "learning_rate": 9.726163911147176e-06, "loss": 0.8933136749267578, "step": 11800 }, { "epoch": 0.20981063925849203, "grad_norm": 9.25, "learning_rate": 9.724896034080536e-06, "loss": 0.8763120269775391, "step": 11850 }, { "epoch": 0.21069591621738862, "grad_norm": 9.875, "learning_rate": 9.723628157013897e-06, "loss": 0.9074213409423828, "step": 11900 }, { "epoch": 0.2115811931762852, "grad_norm": 8.0, "learning_rate": 9.722360279947258e-06, "loss": 0.8514747619628906, "step": 11950 }, { "epoch": 0.2124664701351818, "grad_norm": 8.5, "learning_rate": 9.721092402880617e-06, "loss": 0.8526225280761719, "step": 12000 }, { "epoch": 0.2133517470940784, "grad_norm": 8.5, "learning_rate": 9.719824525813978e-06, "loss": 0.8410261535644531, "step": 12050 }, { "epoch": 0.21423702405297498, "grad_norm": 9.1875, "learning_rate": 9.718556648747339e-06, "loss": 0.8436747741699219, "step": 12100 }, { "epoch": 0.21512230101187158, "grad_norm": 7.21875, "learning_rate": 9.717288771680698e-06, "loss": 0.8132114410400391, "step": 12150 }, { "epoch": 0.21600757797076817, "grad_norm": 10.0625, "learning_rate": 9.716020894614059e-06, "loss": 0.8950498962402343, "step": 12200 }, { "epoch": 0.21689285492966476, "grad_norm": 9.375, "learning_rate": 9.71475301754742e-06, "loss": 0.8579686737060547, "step": 12250 }, { "epoch": 0.21777813188856135, "grad_norm": 7.5625, "learning_rate": 9.71348514048078e-06, "loss": 0.9161724853515625, "step": 12300 }, { "epoch": 0.21866340884745794, "grad_norm": 6.65625, "learning_rate": 9.71221726341414e-06, "loss": 0.7839602661132813, "step": 12350 }, { "epoch": 0.2195486858063545, "grad_norm": 7.78125, "learning_rate": 9.7109493863475e-06, "loss": 0.8397283935546875, "step": 12400 }, { "epoch": 0.2204339627652511, "grad_norm": 9.0625, "learning_rate": 9.709681509280861e-06, "loss": 0.8791749572753906, "step": 12450 }, { "epoch": 0.2213192397241477, "grad_norm": 8.1875, "learning_rate": 9.70841363221422e-06, "loss": 0.8308121490478516, "step": 12500 }, { "epoch": 0.22220451668304428, "grad_norm": 6.34375, "learning_rate": 9.707145755147581e-06, "loss": 0.8770150756835937, "step": 12550 }, { "epoch": 0.22308979364194087, "grad_norm": 9.4375, "learning_rate": 9.705877878080943e-06, "loss": 0.8016796875, "step": 12600 }, { "epoch": 0.22397507060083746, "grad_norm": 8.25, "learning_rate": 9.704610001014303e-06, "loss": 0.880452880859375, "step": 12650 }, { "epoch": 0.22486034755973405, "grad_norm": 8.875, "learning_rate": 9.703342123947663e-06, "loss": 0.8713301849365235, "step": 12700 }, { "epoch": 0.22574562451863064, "grad_norm": 8.4375, "learning_rate": 9.702074246881024e-06, "loss": 0.8404985046386719, "step": 12750 }, { "epoch": 0.22663090147752724, "grad_norm": 9.5, "learning_rate": 9.700806369814383e-06, "loss": 0.8488478088378906, "step": 12800 }, { "epoch": 0.22751617843642383, "grad_norm": 6.15625, "learning_rate": 9.699538492747744e-06, "loss": 0.8541165161132812, "step": 12850 }, { "epoch": 0.22840145539532042, "grad_norm": 7.9375, "learning_rate": 9.698270615681105e-06, "loss": 0.8624703216552735, "step": 12900 }, { "epoch": 0.229286732354217, "grad_norm": 7.6875, "learning_rate": 9.697002738614466e-06, "loss": 0.8818684387207031, "step": 12950 }, { "epoch": 0.2301720093131136, "grad_norm": 9.125, "learning_rate": 9.695734861547825e-06, "loss": 0.8864445495605469, "step": 13000 }, { "epoch": 0.2310572862720102, "grad_norm": 8.125, "learning_rate": 9.694466984481186e-06, "loss": 0.8283074951171875, "step": 13050 }, { "epoch": 0.23194256323090678, "grad_norm": 7.6875, "learning_rate": 9.693199107414546e-06, "loss": 0.8552584075927734, "step": 13100 }, { "epoch": 0.23282784018980338, "grad_norm": 9.5, "learning_rate": 9.691931230347906e-06, "loss": 0.8421883392333984, "step": 13150 }, { "epoch": 0.23371311714869997, "grad_norm": 7.0625, "learning_rate": 9.690663353281266e-06, "loss": 0.8718794250488281, "step": 13200 }, { "epoch": 0.23459839410759656, "grad_norm": 6.8125, "learning_rate": 9.689395476214627e-06, "loss": 0.8517426300048828, "step": 13250 }, { "epoch": 0.23548367106649315, "grad_norm": 9.4375, "learning_rate": 9.688127599147988e-06, "loss": 0.8756562042236328, "step": 13300 }, { "epoch": 0.23636894802538974, "grad_norm": 9.625, "learning_rate": 9.686859722081347e-06, "loss": 0.8349308776855469, "step": 13350 }, { "epoch": 0.23725422498428633, "grad_norm": 7.75, "learning_rate": 9.685591845014708e-06, "loss": 0.8565451049804688, "step": 13400 }, { "epoch": 0.23813950194318292, "grad_norm": 9.1875, "learning_rate": 9.684323967948069e-06, "loss": 0.8808267974853515, "step": 13450 }, { "epoch": 0.23902477890207952, "grad_norm": 13.9375, "learning_rate": 9.683056090881428e-06, "loss": 0.7940772247314453, "step": 13500 }, { "epoch": 0.2399100558609761, "grad_norm": 7.0625, "learning_rate": 9.681788213814789e-06, "loss": 0.8729141998291016, "step": 13550 }, { "epoch": 0.2407953328198727, "grad_norm": 9.5625, "learning_rate": 9.68052033674815e-06, "loss": 0.8781705474853516, "step": 13600 }, { "epoch": 0.2416806097787693, "grad_norm": 6.40625, "learning_rate": 9.67925245968151e-06, "loss": 0.895041732788086, "step": 13650 }, { "epoch": 0.24256588673766588, "grad_norm": 8.1875, "learning_rate": 9.67798458261487e-06, "loss": 0.8775433349609375, "step": 13700 }, { "epoch": 0.24345116369656247, "grad_norm": 8.125, "learning_rate": 9.67671670554823e-06, "loss": 0.79046630859375, "step": 13750 }, { "epoch": 0.24433644065545906, "grad_norm": 6.21875, "learning_rate": 9.675448828481591e-06, "loss": 0.8892935180664062, "step": 13800 }, { "epoch": 0.24522171761435566, "grad_norm": 7.6875, "learning_rate": 9.674180951414952e-06, "loss": 0.8445626068115234, "step": 13850 }, { "epoch": 0.24610699457325225, "grad_norm": 7.84375, "learning_rate": 9.672913074348313e-06, "loss": 0.8559996795654297, "step": 13900 }, { "epoch": 0.24699227153214884, "grad_norm": 10.25, "learning_rate": 9.671645197281673e-06, "loss": 0.889148941040039, "step": 13950 }, { "epoch": 0.24787754849104543, "grad_norm": 9.9375, "learning_rate": 9.670377320215033e-06, "loss": 0.81448486328125, "step": 14000 }, { "epoch": 0.24876282544994202, "grad_norm": 7.5625, "learning_rate": 9.669109443148393e-06, "loss": 0.8375322723388672, "step": 14050 }, { "epoch": 0.2496481024088386, "grad_norm": 9.1875, "learning_rate": 9.667841566081754e-06, "loss": 0.7957274627685547, "step": 14100 }, { "epoch": 0.2505333793677352, "grad_norm": 8.4375, "learning_rate": 9.666573689015113e-06, "loss": 0.8567101287841797, "step": 14150 }, { "epoch": 0.2514186563266318, "grad_norm": 9.5625, "learning_rate": 9.665305811948474e-06, "loss": 0.8208657073974609, "step": 14200 }, { "epoch": 0.25230393328552836, "grad_norm": 6.9375, "learning_rate": 9.664037934881835e-06, "loss": 0.8073037719726562, "step": 14250 }, { "epoch": 0.253189210244425, "grad_norm": 8.125, "learning_rate": 9.662770057815196e-06, "loss": 0.8132960510253906, "step": 14300 }, { "epoch": 0.25407448720332154, "grad_norm": 8.125, "learning_rate": 9.661502180748555e-06, "loss": 0.839927749633789, "step": 14350 }, { "epoch": 0.25495976416221816, "grad_norm": 8.0, "learning_rate": 9.660234303681916e-06, "loss": 0.8778898620605469, "step": 14400 }, { "epoch": 0.2558450411211147, "grad_norm": 8.3125, "learning_rate": 9.658966426615276e-06, "loss": 0.833895492553711, "step": 14450 }, { "epoch": 0.25673031808001134, "grad_norm": 7.9375, "learning_rate": 9.657698549548637e-06, "loss": 0.8810472869873047, "step": 14500 }, { "epoch": 0.2576155950389079, "grad_norm": 7.46875, "learning_rate": 9.656430672481996e-06, "loss": 0.7839117431640625, "step": 14550 }, { "epoch": 0.2585008719978045, "grad_norm": 8.5, "learning_rate": 9.655162795415357e-06, "loss": 0.9003073120117188, "step": 14600 }, { "epoch": 0.2593861489567011, "grad_norm": 8.375, "learning_rate": 9.653894918348718e-06, "loss": 0.8191262054443359, "step": 14650 }, { "epoch": 0.2602714259155977, "grad_norm": 9.0625, "learning_rate": 9.652627041282077e-06, "loss": 0.8235029602050781, "step": 14700 }, { "epoch": 0.2611567028744943, "grad_norm": 7.75, "learning_rate": 9.651359164215438e-06, "loss": 0.8247006225585938, "step": 14750 }, { "epoch": 0.2620419798333909, "grad_norm": 6.875, "learning_rate": 9.650091287148799e-06, "loss": 0.8273910522460938, "step": 14800 }, { "epoch": 0.26292725679228746, "grad_norm": 8.25, "learning_rate": 9.64882341008216e-06, "loss": 0.7925537109375, "step": 14850 }, { "epoch": 0.2638125337511841, "grad_norm": 6.78125, "learning_rate": 9.647555533015519e-06, "loss": 0.8129417419433593, "step": 14900 }, { "epoch": 0.26469781071008064, "grad_norm": 8.4375, "learning_rate": 9.646287655948881e-06, "loss": 0.7914369964599609, "step": 14950 }, { "epoch": 0.26558308766897726, "grad_norm": 7.53125, "learning_rate": 9.64501977888224e-06, "loss": 0.8484162139892578, "step": 15000 }, { "epoch": 0.26558308766897726, "eval_cer": 16.190625965955, "eval_loss": 0.3607212007045746, "eval_runtime": 393.1507, "eval_samples_per_second": 12.718, "eval_steps_per_second": 1.59, "eval_wer": 32.338651938222355, "step": 15000 }, { "epoch": 0.2664683646278738, "grad_norm": 7.75, "learning_rate": 9.643751901815601e-06, "loss": 0.9018843078613281, "step": 15050 }, { "epoch": 0.26735364158677044, "grad_norm": 9.5625, "learning_rate": 9.642484024748962e-06, "loss": 0.8514089965820313, "step": 15100 }, { "epoch": 0.268238918545667, "grad_norm": 8.0, "learning_rate": 9.641216147682323e-06, "loss": 0.832979507446289, "step": 15150 }, { "epoch": 0.2691241955045636, "grad_norm": 10.0, "learning_rate": 9.639948270615682e-06, "loss": 0.843365707397461, "step": 15200 }, { "epoch": 0.2700094724634602, "grad_norm": 7.78125, "learning_rate": 9.638680393549043e-06, "loss": 0.8681787109375, "step": 15250 }, { "epoch": 0.2708947494223568, "grad_norm": 9.25, "learning_rate": 9.637412516482403e-06, "loss": 0.8352089691162109, "step": 15300 }, { "epoch": 0.27178002638125337, "grad_norm": 6.59375, "learning_rate": 9.636144639415763e-06, "loss": 0.890997543334961, "step": 15350 }, { "epoch": 0.27266530334015, "grad_norm": 10.1875, "learning_rate": 9.634876762349123e-06, "loss": 0.8376169586181641, "step": 15400 }, { "epoch": 0.27355058029904655, "grad_norm": 9.75, "learning_rate": 9.633608885282484e-06, "loss": 0.8699169921875, "step": 15450 }, { "epoch": 0.27443585725794317, "grad_norm": 8.625, "learning_rate": 9.632341008215845e-06, "loss": 0.8817887115478515, "step": 15500 }, { "epoch": 0.27532113421683974, "grad_norm": 8.625, "learning_rate": 9.631073131149204e-06, "loss": 0.8531747436523438, "step": 15550 }, { "epoch": 0.27620641117573635, "grad_norm": 7.09375, "learning_rate": 9.629805254082565e-06, "loss": 0.78208740234375, "step": 15600 }, { "epoch": 0.2770916881346329, "grad_norm": 7.28125, "learning_rate": 9.628537377015926e-06, "loss": 0.7966637420654297, "step": 15650 }, { "epoch": 0.27797696509352954, "grad_norm": 6.0, "learning_rate": 9.627269499949285e-06, "loss": 0.8817159271240235, "step": 15700 }, { "epoch": 0.2788622420524261, "grad_norm": 9.25, "learning_rate": 9.626001622882646e-06, "loss": 0.8878803253173828, "step": 15750 }, { "epoch": 0.27974751901132267, "grad_norm": 8.0, "learning_rate": 9.624733745816006e-06, "loss": 0.803402328491211, "step": 15800 }, { "epoch": 0.2806327959702193, "grad_norm": 11.5625, "learning_rate": 9.623465868749367e-06, "loss": 0.8275116729736328, "step": 15850 }, { "epoch": 0.28151807292911585, "grad_norm": 10.5625, "learning_rate": 9.622197991682726e-06, "loss": 0.887125244140625, "step": 15900 }, { "epoch": 0.28240334988801247, "grad_norm": 7.84375, "learning_rate": 9.620930114616087e-06, "loss": 0.9281369781494141, "step": 15950 }, { "epoch": 0.28328862684690903, "grad_norm": 8.125, "learning_rate": 9.619662237549448e-06, "loss": 0.7792628479003906, "step": 16000 }, { "epoch": 0.28417390380580565, "grad_norm": 7.84375, "learning_rate": 9.618394360482807e-06, "loss": 0.797691650390625, "step": 16050 }, { "epoch": 0.2850591807647022, "grad_norm": 7.34375, "learning_rate": 9.61712648341617e-06, "loss": 0.8892618560791016, "step": 16100 }, { "epoch": 0.28594445772359883, "grad_norm": 10.875, "learning_rate": 9.61585860634953e-06, "loss": 0.8775372314453125, "step": 16150 }, { "epoch": 0.2868297346824954, "grad_norm": 7.21875, "learning_rate": 9.61459072928289e-06, "loss": 0.8037387084960937, "step": 16200 }, { "epoch": 0.287715011641392, "grad_norm": 11.25, "learning_rate": 9.61332285221625e-06, "loss": 0.8707780456542968, "step": 16250 }, { "epoch": 0.2886002886002886, "grad_norm": 8.25, "learning_rate": 9.612054975149611e-06, "loss": 0.8491946411132812, "step": 16300 }, { "epoch": 0.2894855655591852, "grad_norm": 8.4375, "learning_rate": 9.61078709808297e-06, "loss": 0.8137165069580078, "step": 16350 }, { "epoch": 0.29037084251808176, "grad_norm": 13.0625, "learning_rate": 9.609519221016331e-06, "loss": 0.8208762359619141, "step": 16400 }, { "epoch": 0.2912561194769784, "grad_norm": 8.75, "learning_rate": 9.608251343949692e-06, "loss": 0.8479267120361328, "step": 16450 }, { "epoch": 0.29214139643587494, "grad_norm": 9.25, "learning_rate": 9.606983466883053e-06, "loss": 0.8337993621826172, "step": 16500 }, { "epoch": 0.29302667339477156, "grad_norm": 8.75, "learning_rate": 9.605715589816412e-06, "loss": 0.8054754638671875, "step": 16550 }, { "epoch": 0.2939119503536681, "grad_norm": 6.53125, "learning_rate": 9.604447712749773e-06, "loss": 0.7765091705322266, "step": 16600 }, { "epoch": 0.29479722731256475, "grad_norm": 7.625, "learning_rate": 9.603179835683133e-06, "loss": 0.8374893188476562, "step": 16650 }, { "epoch": 0.2956825042714613, "grad_norm": 9.625, "learning_rate": 9.601911958616492e-06, "loss": 0.8807014465332031, "step": 16700 }, { "epoch": 0.29656778123035793, "grad_norm": 7.375, "learning_rate": 9.600644081549853e-06, "loss": 0.7978987884521485, "step": 16750 }, { "epoch": 0.2974530581892545, "grad_norm": 9.75, "learning_rate": 9.599376204483214e-06, "loss": 0.8720764923095703, "step": 16800 }, { "epoch": 0.2983383351481511, "grad_norm": 9.5625, "learning_rate": 9.598108327416575e-06, "loss": 0.8416900634765625, "step": 16850 }, { "epoch": 0.2992236121070477, "grad_norm": 9.0625, "learning_rate": 9.596840450349934e-06, "loss": 0.8841259002685546, "step": 16900 }, { "epoch": 0.3001088890659443, "grad_norm": 8.4375, "learning_rate": 9.595572573283295e-06, "loss": 0.8410957336425782, "step": 16950 }, { "epoch": 0.30099416602484086, "grad_norm": 10.75, "learning_rate": 9.594304696216656e-06, "loss": 0.8281800079345704, "step": 17000 }, { "epoch": 0.3018794429837375, "grad_norm": 9.1875, "learning_rate": 9.593036819150016e-06, "loss": 0.8017253875732422, "step": 17050 }, { "epoch": 0.30276471994263404, "grad_norm": 10.75, "learning_rate": 9.591768942083376e-06, "loss": 0.7658233642578125, "step": 17100 }, { "epoch": 0.30364999690153066, "grad_norm": 9.8125, "learning_rate": 9.590501065016736e-06, "loss": 0.8435968780517578, "step": 17150 }, { "epoch": 0.3045352738604272, "grad_norm": 10.9375, "learning_rate": 9.589233187950097e-06, "loss": 0.8984470367431641, "step": 17200 }, { "epoch": 0.30542055081932384, "grad_norm": 7.75, "learning_rate": 9.587965310883458e-06, "loss": 0.8200258636474609, "step": 17250 }, { "epoch": 0.3063058277782204, "grad_norm": 7.09375, "learning_rate": 9.586697433816819e-06, "loss": 0.8967515563964844, "step": 17300 }, { "epoch": 0.307191104737117, "grad_norm": 7.40625, "learning_rate": 9.585429556750178e-06, "loss": 0.81177734375, "step": 17350 }, { "epoch": 0.3080763816960136, "grad_norm": 9.3125, "learning_rate": 9.584161679683539e-06, "loss": 0.8331631469726563, "step": 17400 }, { "epoch": 0.3089616586549102, "grad_norm": 7.6875, "learning_rate": 9.5828938026169e-06, "loss": 0.834053726196289, "step": 17450 }, { "epoch": 0.3098469356138068, "grad_norm": 7.625, "learning_rate": 9.58162592555026e-06, "loss": 0.7964566040039063, "step": 17500 }, { "epoch": 0.3107322125727034, "grad_norm": 9.0, "learning_rate": 9.58035804848362e-06, "loss": 0.8436365509033203, "step": 17550 }, { "epoch": 0.31161748953159996, "grad_norm": 8.625, "learning_rate": 9.57909017141698e-06, "loss": 0.8416962432861328, "step": 17600 }, { "epoch": 0.3125027664904966, "grad_norm": 9.3125, "learning_rate": 9.577822294350341e-06, "loss": 0.7992512512207032, "step": 17650 }, { "epoch": 0.31338804344939314, "grad_norm": 7.25, "learning_rate": 9.576554417283702e-06, "loss": 0.7787315368652343, "step": 17700 }, { "epoch": 0.31427332040828976, "grad_norm": 10.25, "learning_rate": 9.575286540217061e-06, "loss": 0.8611086273193359, "step": 17750 }, { "epoch": 0.3151585973671863, "grad_norm": 7.625, "learning_rate": 9.574018663150422e-06, "loss": 0.8016007995605469, "step": 17800 }, { "epoch": 0.31604387432608294, "grad_norm": 7.0625, "learning_rate": 9.572750786083783e-06, "loss": 0.8051242828369141, "step": 17850 }, { "epoch": 0.3169291512849795, "grad_norm": 7.96875, "learning_rate": 9.571482909017142e-06, "loss": 0.8546369171142578, "step": 17900 }, { "epoch": 0.3178144282438761, "grad_norm": 9.75, "learning_rate": 9.570215031950503e-06, "loss": 0.8925285339355469, "step": 17950 }, { "epoch": 0.3186997052027727, "grad_norm": 7.4375, "learning_rate": 9.568947154883863e-06, "loss": 0.852982177734375, "step": 18000 }, { "epoch": 0.31958498216166925, "grad_norm": 7.5625, "learning_rate": 9.567679277817224e-06, "loss": 0.8864115142822265, "step": 18050 }, { "epoch": 0.32047025912056587, "grad_norm": 8.625, "learning_rate": 9.566411400750583e-06, "loss": 0.7933511352539062, "step": 18100 }, { "epoch": 0.32135553607946243, "grad_norm": 6.71875, "learning_rate": 9.565143523683944e-06, "loss": 0.816483154296875, "step": 18150 }, { "epoch": 0.32224081303835905, "grad_norm": 8.9375, "learning_rate": 9.563875646617305e-06, "loss": 0.8137828063964844, "step": 18200 }, { "epoch": 0.3231260899972556, "grad_norm": 7.5, "learning_rate": 9.562607769550664e-06, "loss": 0.8930496978759765, "step": 18250 }, { "epoch": 0.32401136695615224, "grad_norm": 7.875, "learning_rate": 9.561339892484025e-06, "loss": 0.8350536346435546, "step": 18300 }, { "epoch": 0.3248966439150488, "grad_norm": 7.5625, "learning_rate": 9.560072015417386e-06, "loss": 0.7733663940429687, "step": 18350 }, { "epoch": 0.3257819208739454, "grad_norm": 9.9375, "learning_rate": 9.558804138350746e-06, "loss": 0.8673963165283203, "step": 18400 }, { "epoch": 0.326667197832842, "grad_norm": 8.5, "learning_rate": 9.557536261284107e-06, "loss": 0.8465771484375, "step": 18450 }, { "epoch": 0.3275524747917386, "grad_norm": 8.9375, "learning_rate": 9.556268384217468e-06, "loss": 0.8279172515869141, "step": 18500 }, { "epoch": 0.32843775175063517, "grad_norm": 9.4375, "learning_rate": 9.555000507150827e-06, "loss": 0.8542655181884765, "step": 18550 }, { "epoch": 0.3293230287095318, "grad_norm": 7.09375, "learning_rate": 9.553732630084188e-06, "loss": 0.8693686676025391, "step": 18600 }, { "epoch": 0.33020830566842835, "grad_norm": 9.625, "learning_rate": 9.552464753017549e-06, "loss": 0.7420355224609375, "step": 18650 }, { "epoch": 0.33109358262732497, "grad_norm": 9.75, "learning_rate": 9.55119687595091e-06, "loss": 0.8566489410400391, "step": 18700 }, { "epoch": 0.33197885958622153, "grad_norm": 6.65625, "learning_rate": 9.549928998884269e-06, "loss": 0.8141315460205079, "step": 18750 }, { "epoch": 0.33286413654511815, "grad_norm": 9.3125, "learning_rate": 9.54866112181763e-06, "loss": 0.8337672424316406, "step": 18800 }, { "epoch": 0.3337494135040147, "grad_norm": 9.0625, "learning_rate": 9.54739324475099e-06, "loss": 0.8334447479248047, "step": 18850 }, { "epoch": 0.33463469046291133, "grad_norm": 8.4375, "learning_rate": 9.54612536768435e-06, "loss": 0.8280233764648437, "step": 18900 }, { "epoch": 0.3355199674218079, "grad_norm": 8.875, "learning_rate": 9.54485749061771e-06, "loss": 0.8619183349609375, "step": 18950 }, { "epoch": 0.3364052443807045, "grad_norm": 7.28125, "learning_rate": 9.543589613551071e-06, "loss": 0.792462158203125, "step": 19000 }, { "epoch": 0.3372905213396011, "grad_norm": 10.0, "learning_rate": 9.542321736484432e-06, "loss": 0.8707679748535156, "step": 19050 }, { "epoch": 0.3381757982984977, "grad_norm": 9.4375, "learning_rate": 9.541053859417791e-06, "loss": 0.8130400085449219, "step": 19100 }, { "epoch": 0.33906107525739426, "grad_norm": 8.5, "learning_rate": 9.539785982351152e-06, "loss": 0.7939989471435547, "step": 19150 }, { "epoch": 0.3399463522162909, "grad_norm": 7.46875, "learning_rate": 9.538518105284513e-06, "loss": 0.8232540893554687, "step": 19200 }, { "epoch": 0.34083162917518744, "grad_norm": 9.0, "learning_rate": 9.537250228217872e-06, "loss": 0.8494704437255859, "step": 19250 }, { "epoch": 0.34171690613408406, "grad_norm": 7.125, "learning_rate": 9.535982351151233e-06, "loss": 0.8644766235351562, "step": 19300 }, { "epoch": 0.34260218309298063, "grad_norm": 8.75, "learning_rate": 9.534714474084593e-06, "loss": 0.8599738311767579, "step": 19350 }, { "epoch": 0.34348746005187725, "grad_norm": 7.75, "learning_rate": 9.533446597017954e-06, "loss": 0.8332124328613282, "step": 19400 }, { "epoch": 0.3443727370107738, "grad_norm": 8.6875, "learning_rate": 9.532178719951313e-06, "loss": 0.8581776428222656, "step": 19450 }, { "epoch": 0.34525801396967043, "grad_norm": 6.875, "learning_rate": 9.530910842884674e-06, "loss": 0.8054019927978515, "step": 19500 }, { "epoch": 0.346143290928567, "grad_norm": 8.9375, "learning_rate": 9.529642965818035e-06, "loss": 0.833067398071289, "step": 19550 }, { "epoch": 0.3470285678874636, "grad_norm": 8.6875, "learning_rate": 9.528375088751396e-06, "loss": 0.8120539855957031, "step": 19600 }, { "epoch": 0.3479138448463602, "grad_norm": 8.6875, "learning_rate": 9.527107211684756e-06, "loss": 0.7809496307373047, "step": 19650 }, { "epoch": 0.3487991218052568, "grad_norm": 9.4375, "learning_rate": 9.525839334618117e-06, "loss": 0.8102002716064454, "step": 19700 }, { "epoch": 0.34968439876415336, "grad_norm": 7.9375, "learning_rate": 9.524571457551476e-06, "loss": 0.8498989868164063, "step": 19750 }, { "epoch": 0.35056967572305, "grad_norm": 10.1875, "learning_rate": 9.523303580484837e-06, "loss": 0.8017991638183594, "step": 19800 }, { "epoch": 0.35145495268194654, "grad_norm": 10.9375, "learning_rate": 9.522035703418198e-06, "loss": 0.7946707153320313, "step": 19850 }, { "epoch": 0.35234022964084316, "grad_norm": 7.03125, "learning_rate": 9.520767826351557e-06, "loss": 0.8487144470214844, "step": 19900 }, { "epoch": 0.3532255065997397, "grad_norm": 9.375, "learning_rate": 9.519499949284918e-06, "loss": 0.7888392639160157, "step": 19950 }, { "epoch": 0.35411078355863634, "grad_norm": 7.84375, "learning_rate": 9.518232072218279e-06, "loss": 0.8027859497070312, "step": 20000 }, { "epoch": 0.35411078355863634, "eval_cer": 16.348974799759223, "eval_loss": 0.35486724972724915, "eval_runtime": 393.2747, "eval_samples_per_second": 12.714, "eval_steps_per_second": 1.589, "eval_wer": 32.70174900276158, "step": 20000 }, { "epoch": 0.3549960605175329, "grad_norm": 9.6875, "learning_rate": 9.51696419515164e-06, "loss": 0.8532256317138672, "step": 20050 }, { "epoch": 0.3558813374764295, "grad_norm": 10.0625, "learning_rate": 9.515696318084999e-06, "loss": 0.884788589477539, "step": 20100 }, { "epoch": 0.3567666144353261, "grad_norm": 8.875, "learning_rate": 9.51442844101836e-06, "loss": 0.7734761047363281, "step": 20150 }, { "epoch": 0.3576518913942227, "grad_norm": 8.125, "learning_rate": 9.51316056395172e-06, "loss": 0.8991136169433593, "step": 20200 }, { "epoch": 0.3585371683531193, "grad_norm": 8.0625, "learning_rate": 9.511892686885081e-06, "loss": 0.8061054229736329, "step": 20250 }, { "epoch": 0.35942244531201584, "grad_norm": 8.0, "learning_rate": 9.51062480981844e-06, "loss": 0.8342051696777344, "step": 20300 }, { "epoch": 0.36030772227091246, "grad_norm": 6.46875, "learning_rate": 9.509356932751801e-06, "loss": 0.8063926696777344, "step": 20350 }, { "epoch": 0.361192999229809, "grad_norm": 8.875, "learning_rate": 9.508089055685162e-06, "loss": 0.9006285095214843, "step": 20400 }, { "epoch": 0.36207827618870564, "grad_norm": 8.0625, "learning_rate": 9.506821178618521e-06, "loss": 0.7908558654785156, "step": 20450 }, { "epoch": 0.3629635531476022, "grad_norm": 7.0625, "learning_rate": 9.505553301551882e-06, "loss": 0.8398319244384765, "step": 20500 }, { "epoch": 0.3638488301064988, "grad_norm": 5.8125, "learning_rate": 9.504285424485243e-06, "loss": 0.8094285583496094, "step": 20550 }, { "epoch": 0.3647341070653954, "grad_norm": 9.1875, "learning_rate": 9.503017547418603e-06, "loss": 0.8495778656005859, "step": 20600 }, { "epoch": 0.365619384024292, "grad_norm": 7.5, "learning_rate": 9.501749670351963e-06, "loss": 0.8105506134033204, "step": 20650 }, { "epoch": 0.36650466098318857, "grad_norm": 8.5625, "learning_rate": 9.500481793285325e-06, "loss": 0.7994151306152344, "step": 20700 }, { "epoch": 0.3673899379420852, "grad_norm": 8.375, "learning_rate": 9.499213916218684e-06, "loss": 0.8393498992919922, "step": 20750 }, { "epoch": 0.36827521490098175, "grad_norm": 10.6875, "learning_rate": 9.497946039152045e-06, "loss": 0.82013427734375, "step": 20800 }, { "epoch": 0.36916049185987837, "grad_norm": 9.5, "learning_rate": 9.496678162085406e-06, "loss": 0.8179158782958984, "step": 20850 }, { "epoch": 0.37004576881877493, "grad_norm": 12.875, "learning_rate": 9.495410285018767e-06, "loss": 0.7779678344726563, "step": 20900 }, { "epoch": 0.37093104577767155, "grad_norm": 7.46875, "learning_rate": 9.494142407952126e-06, "loss": 0.8509443664550781, "step": 20950 }, { "epoch": 0.3718163227365681, "grad_norm": 8.375, "learning_rate": 9.492874530885486e-06, "loss": 0.7994340515136719, "step": 21000 }, { "epoch": 0.37270159969546474, "grad_norm": 8.875, "learning_rate": 9.491606653818847e-06, "loss": 0.8334093475341797, "step": 21050 }, { "epoch": 0.3735868766543613, "grad_norm": 10.3125, "learning_rate": 9.490338776752206e-06, "loss": 0.8529891967773438, "step": 21100 }, { "epoch": 0.3744721536132579, "grad_norm": 9.125, "learning_rate": 9.489070899685567e-06, "loss": 0.7969075012207031, "step": 21150 }, { "epoch": 0.3753574305721545, "grad_norm": 11.3125, "learning_rate": 9.487803022618928e-06, "loss": 0.8441764068603516, "step": 21200 }, { "epoch": 0.3762427075310511, "grad_norm": 7.71875, "learning_rate": 9.486535145552289e-06, "loss": 0.8383098602294922, "step": 21250 }, { "epoch": 0.37712798448994767, "grad_norm": 7.8125, "learning_rate": 9.485267268485648e-06, "loss": 0.8604541778564453, "step": 21300 }, { "epoch": 0.3780132614488443, "grad_norm": 9.25, "learning_rate": 9.483999391419009e-06, "loss": 0.8413866424560547, "step": 21350 }, { "epoch": 0.37889853840774085, "grad_norm": 9.625, "learning_rate": 9.48273151435237e-06, "loss": 0.7846895599365235, "step": 21400 }, { "epoch": 0.37978381536663747, "grad_norm": 7.375, "learning_rate": 9.481463637285729e-06, "loss": 0.8002585601806641, "step": 21450 }, { "epoch": 0.38066909232553403, "grad_norm": 8.0, "learning_rate": 9.48019576021909e-06, "loss": 0.8195709991455078, "step": 21500 }, { "epoch": 0.38155436928443065, "grad_norm": 9.0625, "learning_rate": 9.47892788315245e-06, "loss": 0.8092010498046875, "step": 21550 }, { "epoch": 0.3824396462433272, "grad_norm": 8.8125, "learning_rate": 9.477660006085811e-06, "loss": 0.8117552947998047, "step": 21600 }, { "epoch": 0.38332492320222383, "grad_norm": 6.21875, "learning_rate": 9.47639212901917e-06, "loss": 0.8895623779296875, "step": 21650 }, { "epoch": 0.3842102001611204, "grad_norm": 8.75, "learning_rate": 9.475124251952531e-06, "loss": 0.7886461639404296, "step": 21700 }, { "epoch": 0.385095477120017, "grad_norm": 8.3125, "learning_rate": 9.473856374885892e-06, "loss": 0.802349853515625, "step": 21750 }, { "epoch": 0.3859807540789136, "grad_norm": 12.0625, "learning_rate": 9.472588497819251e-06, "loss": 0.8550609588623047, "step": 21800 }, { "epoch": 0.3868660310378102, "grad_norm": 8.125, "learning_rate": 9.471320620752612e-06, "loss": 0.7605663299560547, "step": 21850 }, { "epoch": 0.38775130799670676, "grad_norm": 7.53125, "learning_rate": 9.470052743685974e-06, "loss": 0.8628280639648438, "step": 21900 }, { "epoch": 0.3886365849556034, "grad_norm": 8.625, "learning_rate": 9.468784866619333e-06, "loss": 0.8964498138427734, "step": 21950 }, { "epoch": 0.38952186191449995, "grad_norm": 10.75, "learning_rate": 9.467516989552694e-06, "loss": 0.7898972320556641, "step": 22000 }, { "epoch": 0.39040713887339656, "grad_norm": 6.375, "learning_rate": 9.466249112486055e-06, "loss": 0.8010289001464844, "step": 22050 }, { "epoch": 0.39129241583229313, "grad_norm": 8.5, "learning_rate": 9.464981235419414e-06, "loss": 0.8129051208496094, "step": 22100 }, { "epoch": 0.39217769279118975, "grad_norm": 10.4375, "learning_rate": 9.463713358352775e-06, "loss": 0.8285366058349609, "step": 22150 }, { "epoch": 0.3930629697500863, "grad_norm": 10.9375, "learning_rate": 9.462445481286136e-06, "loss": 0.8392569732666015, "step": 22200 }, { "epoch": 0.39394824670898293, "grad_norm": 8.6875, "learning_rate": 9.461177604219497e-06, "loss": 0.8171870422363281, "step": 22250 }, { "epoch": 0.3948335236678795, "grad_norm": 8.9375, "learning_rate": 9.459909727152856e-06, "loss": 0.8369279479980469, "step": 22300 }, { "epoch": 0.3957188006267761, "grad_norm": 9.9375, "learning_rate": 9.458641850086216e-06, "loss": 0.7750814819335937, "step": 22350 }, { "epoch": 0.3966040775856727, "grad_norm": 11.0625, "learning_rate": 9.457373973019577e-06, "loss": 0.8476492309570313, "step": 22400 }, { "epoch": 0.3974893545445693, "grad_norm": 8.375, "learning_rate": 9.456106095952936e-06, "loss": 0.8161160278320313, "step": 22450 }, { "epoch": 0.39837463150346586, "grad_norm": 8.9375, "learning_rate": 9.454838218886297e-06, "loss": 0.861170654296875, "step": 22500 }, { "epoch": 0.3992599084623624, "grad_norm": 8.125, "learning_rate": 9.453570341819658e-06, "loss": 0.7938341522216796, "step": 22550 }, { "epoch": 0.40014518542125904, "grad_norm": 6.8125, "learning_rate": 9.452302464753019e-06, "loss": 0.7918325805664063, "step": 22600 }, { "epoch": 0.4010304623801556, "grad_norm": 8.5, "learning_rate": 9.451034587686378e-06, "loss": 0.8244574737548828, "step": 22650 }, { "epoch": 0.4019157393390522, "grad_norm": 7.75, "learning_rate": 9.449766710619739e-06, "loss": 0.759022216796875, "step": 22700 }, { "epoch": 0.4028010162979488, "grad_norm": 9.0625, "learning_rate": 9.4484988335531e-06, "loss": 0.7742694854736328, "step": 22750 }, { "epoch": 0.4036862932568454, "grad_norm": 7.84375, "learning_rate": 9.447230956486459e-06, "loss": 0.8123777008056641, "step": 22800 }, { "epoch": 0.40457157021574197, "grad_norm": 8.6875, "learning_rate": 9.44596307941982e-06, "loss": 0.7599580383300781, "step": 22850 }, { "epoch": 0.4054568471746386, "grad_norm": 7.625, "learning_rate": 9.44469520235318e-06, "loss": 0.7971720123291015, "step": 22900 }, { "epoch": 0.40634212413353515, "grad_norm": 9.625, "learning_rate": 9.443427325286541e-06, "loss": 0.7970821380615234, "step": 22950 }, { "epoch": 0.4072274010924318, "grad_norm": 8.6875, "learning_rate": 9.4421594482199e-06, "loss": 0.8227862548828125, "step": 23000 }, { "epoch": 0.40811267805132834, "grad_norm": 8.25, "learning_rate": 9.440891571153263e-06, "loss": 0.8271920013427735, "step": 23050 }, { "epoch": 0.40899795501022496, "grad_norm": 8.5, "learning_rate": 9.439623694086622e-06, "loss": 0.83154541015625, "step": 23100 }, { "epoch": 0.4098832319691215, "grad_norm": 8.6875, "learning_rate": 9.438355817019983e-06, "loss": 0.8107095336914063, "step": 23150 }, { "epoch": 0.41076850892801814, "grad_norm": 8.9375, "learning_rate": 9.437087939953343e-06, "loss": 0.8437236785888672, "step": 23200 }, { "epoch": 0.4116537858869147, "grad_norm": 8.5625, "learning_rate": 9.435820062886704e-06, "loss": 0.8471569061279297, "step": 23250 }, { "epoch": 0.4125390628458113, "grad_norm": 7.875, "learning_rate": 9.434552185820063e-06, "loss": 0.885667724609375, "step": 23300 }, { "epoch": 0.4134243398047079, "grad_norm": 8.3125, "learning_rate": 9.433284308753424e-06, "loss": 0.8514400482177734, "step": 23350 }, { "epoch": 0.4143096167636045, "grad_norm": 10.1875, "learning_rate": 9.432016431686785e-06, "loss": 0.7581684112548828, "step": 23400 }, { "epoch": 0.41519489372250107, "grad_norm": 10.0, "learning_rate": 9.430748554620146e-06, "loss": 0.824618911743164, "step": 23450 }, { "epoch": 0.4160801706813977, "grad_norm": 7.59375, "learning_rate": 9.429480677553505e-06, "loss": 0.8305864715576172, "step": 23500 }, { "epoch": 0.41696544764029425, "grad_norm": 9.1875, "learning_rate": 9.428212800486866e-06, "loss": 0.8161003875732422, "step": 23550 }, { "epoch": 0.41785072459919087, "grad_norm": 12.0, "learning_rate": 9.426944923420227e-06, "loss": 0.8694761657714843, "step": 23600 }, { "epoch": 0.41873600155808743, "grad_norm": 9.4375, "learning_rate": 9.425677046353586e-06, "loss": 0.8649164581298828, "step": 23650 }, { "epoch": 0.41962127851698405, "grad_norm": 7.9375, "learning_rate": 9.424409169286946e-06, "loss": 0.8016709899902343, "step": 23700 }, { "epoch": 0.4205065554758806, "grad_norm": 6.625, "learning_rate": 9.423141292220307e-06, "loss": 0.8073011016845704, "step": 23750 }, { "epoch": 0.42139183243477724, "grad_norm": 8.5, "learning_rate": 9.421873415153668e-06, "loss": 0.8519166564941406, "step": 23800 }, { "epoch": 0.4222771093936738, "grad_norm": 8.6875, "learning_rate": 9.420605538087027e-06, "loss": 0.7963951110839844, "step": 23850 }, { "epoch": 0.4231623863525704, "grad_norm": 9.4375, "learning_rate": 9.419337661020388e-06, "loss": 0.8319783020019531, "step": 23900 }, { "epoch": 0.424047663311467, "grad_norm": 8.5, "learning_rate": 9.418069783953749e-06, "loss": 0.8024713134765625, "step": 23950 }, { "epoch": 0.4249329402703636, "grad_norm": 6.5, "learning_rate": 9.416801906887108e-06, "loss": 0.8116042327880859, "step": 24000 }, { "epoch": 0.42581821722926017, "grad_norm": 5.96875, "learning_rate": 9.415534029820469e-06, "loss": 0.8232134246826172, "step": 24050 }, { "epoch": 0.4267034941881568, "grad_norm": 9.5625, "learning_rate": 9.41426615275383e-06, "loss": 0.8045470428466797, "step": 24100 }, { "epoch": 0.42758877114705335, "grad_norm": 8.5625, "learning_rate": 9.41299827568719e-06, "loss": 0.8654727935791016, "step": 24150 }, { "epoch": 0.42847404810594997, "grad_norm": 7.5625, "learning_rate": 9.411730398620551e-06, "loss": 0.8151998138427734, "step": 24200 }, { "epoch": 0.42935932506484653, "grad_norm": 7.1875, "learning_rate": 9.410462521553912e-06, "loss": 0.8104602813720703, "step": 24250 }, { "epoch": 0.43024460202374315, "grad_norm": 8.625, "learning_rate": 9.409194644487271e-06, "loss": 0.8054197692871093, "step": 24300 }, { "epoch": 0.4311298789826397, "grad_norm": 7.40625, "learning_rate": 9.407926767420632e-06, "loss": 0.8097126770019532, "step": 24350 }, { "epoch": 0.43201515594153633, "grad_norm": 8.3125, "learning_rate": 9.406658890353993e-06, "loss": 0.7878807067871094, "step": 24400 }, { "epoch": 0.4329004329004329, "grad_norm": 9.4375, "learning_rate": 9.405391013287353e-06, "loss": 0.8757616424560547, "step": 24450 }, { "epoch": 0.4337857098593295, "grad_norm": 7.53125, "learning_rate": 9.404123136220713e-06, "loss": 0.7997799682617187, "step": 24500 }, { "epoch": 0.4346709868182261, "grad_norm": 11.0625, "learning_rate": 9.402855259154073e-06, "loss": 0.7962652587890625, "step": 24550 }, { "epoch": 0.4355562637771227, "grad_norm": 8.25, "learning_rate": 9.401587382087434e-06, "loss": 0.7983963775634766, "step": 24600 }, { "epoch": 0.43644154073601926, "grad_norm": 6.34375, "learning_rate": 9.400319505020793e-06, "loss": 0.8001494598388672, "step": 24650 }, { "epoch": 0.4373268176949159, "grad_norm": 9.5, "learning_rate": 9.399051627954154e-06, "loss": 0.7819596099853515, "step": 24700 }, { "epoch": 0.43821209465381245, "grad_norm": 8.0625, "learning_rate": 9.397783750887515e-06, "loss": 0.8707284545898437, "step": 24750 }, { "epoch": 0.439097371612709, "grad_norm": 10.3125, "learning_rate": 9.396515873820876e-06, "loss": 0.805533447265625, "step": 24800 }, { "epoch": 0.43998264857160563, "grad_norm": 6.46875, "learning_rate": 9.395247996754235e-06, "loss": 0.8091240692138671, "step": 24850 }, { "epoch": 0.4408679255305022, "grad_norm": 6.5625, "learning_rate": 9.393980119687596e-06, "loss": 0.7720470428466797, "step": 24900 }, { "epoch": 0.4417532024893988, "grad_norm": 7.71875, "learning_rate": 9.392712242620956e-06, "loss": 0.7984862518310547, "step": 24950 }, { "epoch": 0.4426384794482954, "grad_norm": 10.1875, "learning_rate": 9.391444365554316e-06, "loss": 0.85141845703125, "step": 25000 }, { "epoch": 0.4426384794482954, "eval_cer": 15.512762807546515, "eval_loss": 0.3506615161895752, "eval_runtime": 388.5865, "eval_samples_per_second": 12.867, "eval_steps_per_second": 1.608, "eval_wer": 31.382325866830318, "step": 25000 }, { "epoch": 0.443523756407192, "grad_norm": 8.1875, "learning_rate": 9.390176488487676e-06, "loss": 0.799303207397461, "step": 25050 }, { "epoch": 0.44440903336608856, "grad_norm": 7.5625, "learning_rate": 9.388908611421037e-06, "loss": 0.7852619934082031, "step": 25100 }, { "epoch": 0.4452943103249852, "grad_norm": 9.125, "learning_rate": 9.387640734354398e-06, "loss": 0.7992433166503906, "step": 25150 }, { "epoch": 0.44617958728388174, "grad_norm": 7.5625, "learning_rate": 9.386372857287757e-06, "loss": 0.8536985015869141, "step": 25200 }, { "epoch": 0.44706486424277836, "grad_norm": 8.375, "learning_rate": 9.385104980221118e-06, "loss": 0.7951334381103515, "step": 25250 }, { "epoch": 0.4479501412016749, "grad_norm": 6.15625, "learning_rate": 9.383837103154479e-06, "loss": 0.8157552337646484, "step": 25300 }, { "epoch": 0.44883541816057154, "grad_norm": 7.875, "learning_rate": 9.38256922608784e-06, "loss": 0.8191168212890625, "step": 25350 }, { "epoch": 0.4497206951194681, "grad_norm": 10.5625, "learning_rate": 9.3813013490212e-06, "loss": 0.7932091522216796, "step": 25400 }, { "epoch": 0.4506059720783647, "grad_norm": 7.0625, "learning_rate": 9.380033471954561e-06, "loss": 0.8196167755126953, "step": 25450 }, { "epoch": 0.4514912490372613, "grad_norm": 7.96875, "learning_rate": 9.37876559488792e-06, "loss": 0.80684326171875, "step": 25500 }, { "epoch": 0.4523765259961579, "grad_norm": 8.375, "learning_rate": 9.377497717821281e-06, "loss": 0.8326478576660157, "step": 25550 }, { "epoch": 0.45326180295505447, "grad_norm": 10.5625, "learning_rate": 9.376229840754642e-06, "loss": 0.8006377410888672, "step": 25600 }, { "epoch": 0.4541470799139511, "grad_norm": 8.8125, "learning_rate": 9.374961963688001e-06, "loss": 0.8118048858642578, "step": 25650 }, { "epoch": 0.45503235687284765, "grad_norm": 6.75, "learning_rate": 9.373694086621362e-06, "loss": 0.8060718536376953, "step": 25700 }, { "epoch": 0.4559176338317443, "grad_norm": 10.0625, "learning_rate": 9.372426209554723e-06, "loss": 0.7967828369140625, "step": 25750 }, { "epoch": 0.45680291079064084, "grad_norm": 9.3125, "learning_rate": 9.371158332488083e-06, "loss": 0.8676046752929687, "step": 25800 }, { "epoch": 0.45768818774953746, "grad_norm": 6.5625, "learning_rate": 9.369890455421443e-06, "loss": 0.83003662109375, "step": 25850 }, { "epoch": 0.458573464708434, "grad_norm": 7.84375, "learning_rate": 9.368622578354803e-06, "loss": 0.7804559326171875, "step": 25900 }, { "epoch": 0.45945874166733064, "grad_norm": 10.0625, "learning_rate": 9.367354701288164e-06, "loss": 0.8414221954345703, "step": 25950 }, { "epoch": 0.4603440186262272, "grad_norm": 10.0625, "learning_rate": 9.366086824221523e-06, "loss": 0.8436249542236328, "step": 26000 }, { "epoch": 0.4612292955851238, "grad_norm": 9.75, "learning_rate": 9.364818947154884e-06, "loss": 0.8198944091796875, "step": 26050 }, { "epoch": 0.4621145725440204, "grad_norm": 7.21875, "learning_rate": 9.363551070088245e-06, "loss": 0.822802505493164, "step": 26100 }, { "epoch": 0.462999849502917, "grad_norm": 9.5, "learning_rate": 9.362283193021606e-06, "loss": 0.8616585540771484, "step": 26150 }, { "epoch": 0.46388512646181357, "grad_norm": 10.5625, "learning_rate": 9.361015315954965e-06, "loss": 0.874574203491211, "step": 26200 }, { "epoch": 0.4647704034207102, "grad_norm": 8.25, "learning_rate": 9.359747438888326e-06, "loss": 0.8157247161865234, "step": 26250 }, { "epoch": 0.46565568037960675, "grad_norm": 9.5, "learning_rate": 9.358479561821686e-06, "loss": 0.8261668395996093, "step": 26300 }, { "epoch": 0.46654095733850337, "grad_norm": 9.25, "learning_rate": 9.357211684755047e-06, "loss": 0.7921039581298828, "step": 26350 }, { "epoch": 0.46742623429739993, "grad_norm": 8.125, "learning_rate": 9.355943807688406e-06, "loss": 0.8063948059082031, "step": 26400 }, { "epoch": 0.46831151125629655, "grad_norm": 8.4375, "learning_rate": 9.354675930621767e-06, "loss": 0.8948915100097656, "step": 26450 }, { "epoch": 0.4691967882151931, "grad_norm": 8.375, "learning_rate": 9.353408053555128e-06, "loss": 0.8333416748046875, "step": 26500 }, { "epoch": 0.47008206517408974, "grad_norm": 8.6875, "learning_rate": 9.352140176488489e-06, "loss": 0.8180615997314453, "step": 26550 }, { "epoch": 0.4709673421329863, "grad_norm": 8.3125, "learning_rate": 9.35087229942185e-06, "loss": 0.8063065338134766, "step": 26600 }, { "epoch": 0.4718526190918829, "grad_norm": 8.5625, "learning_rate": 9.34960442235521e-06, "loss": 0.7838368225097656, "step": 26650 }, { "epoch": 0.4727378960507795, "grad_norm": 9.5, "learning_rate": 9.34833654528857e-06, "loss": 0.7889875030517578, "step": 26700 }, { "epoch": 0.4736231730096761, "grad_norm": 8.875, "learning_rate": 9.34706866822193e-06, "loss": 0.8149432373046875, "step": 26750 }, { "epoch": 0.47450844996857267, "grad_norm": 7.59375, "learning_rate": 9.345800791155291e-06, "loss": 0.8343911743164063, "step": 26800 }, { "epoch": 0.4753937269274693, "grad_norm": 6.09375, "learning_rate": 9.34453291408865e-06, "loss": 0.8231613159179687, "step": 26850 }, { "epoch": 0.47627900388636585, "grad_norm": 8.875, "learning_rate": 9.343265037022011e-06, "loss": 0.8221437835693359, "step": 26900 }, { "epoch": 0.47716428084526247, "grad_norm": 7.90625, "learning_rate": 9.341997159955372e-06, "loss": 0.8173371124267578, "step": 26950 }, { "epoch": 0.47804955780415903, "grad_norm": 8.25, "learning_rate": 9.340729282888733e-06, "loss": 0.8692239379882812, "step": 27000 }, { "epoch": 0.4789348347630556, "grad_norm": 6.40625, "learning_rate": 9.339461405822092e-06, "loss": 0.7678931427001953, "step": 27050 }, { "epoch": 0.4798201117219522, "grad_norm": 9.0625, "learning_rate": 9.338193528755453e-06, "loss": 0.8847496032714843, "step": 27100 }, { "epoch": 0.4807053886808488, "grad_norm": 7.96875, "learning_rate": 9.336925651688813e-06, "loss": 0.8440608978271484, "step": 27150 }, { "epoch": 0.4815906656397454, "grad_norm": 7.34375, "learning_rate": 9.335657774622173e-06, "loss": 0.7591238403320313, "step": 27200 }, { "epoch": 0.48247594259864196, "grad_norm": 7.4375, "learning_rate": 9.334389897555533e-06, "loss": 0.8198709106445312, "step": 27250 }, { "epoch": 0.4833612195575386, "grad_norm": 8.9375, "learning_rate": 9.333122020488894e-06, "loss": 0.7931713104248047, "step": 27300 }, { "epoch": 0.48424649651643514, "grad_norm": 9.25, "learning_rate": 9.331854143422255e-06, "loss": 0.8829562377929687, "step": 27350 }, { "epoch": 0.48513177347533176, "grad_norm": 8.3125, "learning_rate": 9.330586266355614e-06, "loss": 0.857093734741211, "step": 27400 }, { "epoch": 0.4860170504342283, "grad_norm": 8.25, "learning_rate": 9.329318389288975e-06, "loss": 0.8017523193359375, "step": 27450 }, { "epoch": 0.48690232739312495, "grad_norm": 9.9375, "learning_rate": 9.328050512222336e-06, "loss": 0.8276552581787109, "step": 27500 }, { "epoch": 0.4877876043520215, "grad_norm": 8.125, "learning_rate": 9.326782635155695e-06, "loss": 0.7625294494628906, "step": 27550 }, { "epoch": 0.48867288131091813, "grad_norm": 6.4375, "learning_rate": 9.325514758089056e-06, "loss": 0.8343047332763672, "step": 27600 }, { "epoch": 0.4895581582698147, "grad_norm": 6.59375, "learning_rate": 9.324246881022418e-06, "loss": 0.8407593536376953, "step": 27650 }, { "epoch": 0.4904434352287113, "grad_norm": 9.75, "learning_rate": 9.322979003955777e-06, "loss": 0.7998301696777343, "step": 27700 }, { "epoch": 0.4913287121876079, "grad_norm": 9.125, "learning_rate": 9.321711126889138e-06, "loss": 0.8573383331298828, "step": 27750 }, { "epoch": 0.4922139891465045, "grad_norm": 10.875, "learning_rate": 9.320443249822499e-06, "loss": 0.8192384338378906, "step": 27800 }, { "epoch": 0.49309926610540106, "grad_norm": 8.8125, "learning_rate": 9.319175372755858e-06, "loss": 0.7779326629638672, "step": 27850 }, { "epoch": 0.4939845430642977, "grad_norm": 7.90625, "learning_rate": 9.317907495689219e-06, "loss": 0.8377117156982422, "step": 27900 }, { "epoch": 0.49486982002319424, "grad_norm": 6.9375, "learning_rate": 9.31663961862258e-06, "loss": 0.8032022094726563, "step": 27950 }, { "epoch": 0.49575509698209086, "grad_norm": 9.4375, "learning_rate": 9.31537174155594e-06, "loss": 0.8107691955566406, "step": 28000 }, { "epoch": 0.4966403739409874, "grad_norm": 7.75, "learning_rate": 9.3141038644893e-06, "loss": 0.8325393676757813, "step": 28050 }, { "epoch": 0.49752565089988404, "grad_norm": 9.625, "learning_rate": 9.31283598742266e-06, "loss": 0.80835693359375, "step": 28100 }, { "epoch": 0.4984109278587806, "grad_norm": 9.0625, "learning_rate": 9.311568110356021e-06, "loss": 0.8398920440673828, "step": 28150 }, { "epoch": 0.4992962048176772, "grad_norm": 9.5, "learning_rate": 9.31030023328938e-06, "loss": 0.8618205261230468, "step": 28200 }, { "epoch": 0.5001814817765738, "grad_norm": 7.3125, "learning_rate": 9.309032356222741e-06, "loss": 0.7820648956298828, "step": 28250 }, { "epoch": 0.5010667587354704, "grad_norm": 5.96875, "learning_rate": 9.307764479156102e-06, "loss": 0.8232012176513672, "step": 28300 }, { "epoch": 0.501952035694367, "grad_norm": 7.15625, "learning_rate": 9.306496602089463e-06, "loss": 0.7935179138183593, "step": 28350 }, { "epoch": 0.5028373126532636, "grad_norm": 7.75, "learning_rate": 9.305228725022822e-06, "loss": 0.812857666015625, "step": 28400 }, { "epoch": 0.5037225896121602, "grad_norm": 6.75, "learning_rate": 9.303960847956183e-06, "loss": 0.7825227355957032, "step": 28450 }, { "epoch": 0.5046078665710567, "grad_norm": 6.875, "learning_rate": 9.302692970889543e-06, "loss": 0.8190470886230469, "step": 28500 }, { "epoch": 0.5054931435299533, "grad_norm": 6.28125, "learning_rate": 9.301425093822903e-06, "loss": 0.855802993774414, "step": 28550 }, { "epoch": 0.50637842048885, "grad_norm": 10.5625, "learning_rate": 9.300157216756263e-06, "loss": 0.848614501953125, "step": 28600 }, { "epoch": 0.5072636974477466, "grad_norm": 8.125, "learning_rate": 9.298889339689624e-06, "loss": 0.7645280456542969, "step": 28650 }, { "epoch": 0.5081489744066431, "grad_norm": 9.1875, "learning_rate": 9.297621462622985e-06, "loss": 0.8297785949707032, "step": 28700 }, { "epoch": 0.5090342513655397, "grad_norm": 6.40625, "learning_rate": 9.296353585556344e-06, "loss": 0.8629362487792969, "step": 28750 }, { "epoch": 0.5099195283244363, "grad_norm": 9.375, "learning_rate": 9.295085708489707e-06, "loss": 0.8396317291259766, "step": 28800 }, { "epoch": 0.5108048052833329, "grad_norm": 9.9375, "learning_rate": 9.293817831423066e-06, "loss": 0.8256559753417969, "step": 28850 }, { "epoch": 0.5116900822422294, "grad_norm": 8.9375, "learning_rate": 9.292549954356427e-06, "loss": 0.8319975280761719, "step": 28900 }, { "epoch": 0.5125753592011261, "grad_norm": 8.0, "learning_rate": 9.291282077289787e-06, "loss": 0.7743982696533203, "step": 28950 }, { "epoch": 0.5134606361600227, "grad_norm": 7.21875, "learning_rate": 9.290014200223148e-06, "loss": 0.8560353088378906, "step": 29000 }, { "epoch": 0.5143459131189193, "grad_norm": 9.25, "learning_rate": 9.288746323156507e-06, "loss": 0.872972640991211, "step": 29050 }, { "epoch": 0.5152311900778158, "grad_norm": 9.0625, "learning_rate": 9.287478446089868e-06, "loss": 0.8381455230712891, "step": 29100 }, { "epoch": 0.5161164670367124, "grad_norm": 9.3125, "learning_rate": 9.286210569023229e-06, "loss": 0.7603321838378906, "step": 29150 }, { "epoch": 0.517001743995609, "grad_norm": 7.28125, "learning_rate": 9.284942691956588e-06, "loss": 0.8029882049560547, "step": 29200 }, { "epoch": 0.5178870209545057, "grad_norm": 8.5625, "learning_rate": 9.283674814889949e-06, "loss": 0.8445874786376953, "step": 29250 }, { "epoch": 0.5187722979134022, "grad_norm": 7.21875, "learning_rate": 9.28240693782331e-06, "loss": 0.806130142211914, "step": 29300 }, { "epoch": 0.5196575748722988, "grad_norm": 8.625, "learning_rate": 9.28113906075667e-06, "loss": 0.8128162384033203, "step": 29350 }, { "epoch": 0.5205428518311954, "grad_norm": 9.3125, "learning_rate": 9.27987118369003e-06, "loss": 0.8138497161865235, "step": 29400 }, { "epoch": 0.5214281287900919, "grad_norm": 8.875, "learning_rate": 9.27860330662339e-06, "loss": 0.7841197967529296, "step": 29450 }, { "epoch": 0.5223134057489885, "grad_norm": 7.46875, "learning_rate": 9.277335429556751e-06, "loss": 0.8085577392578125, "step": 29500 }, { "epoch": 0.5231986827078852, "grad_norm": 10.1875, "learning_rate": 9.276067552490112e-06, "loss": 0.8392274475097656, "step": 29550 }, { "epoch": 0.5240839596667818, "grad_norm": 8.1875, "learning_rate": 9.274799675423471e-06, "loss": 0.8423346710205079, "step": 29600 }, { "epoch": 0.5249692366256783, "grad_norm": 8.9375, "learning_rate": 9.273531798356832e-06, "loss": 0.7886857604980468, "step": 29650 }, { "epoch": 0.5258545135845749, "grad_norm": 9.5, "learning_rate": 9.272263921290193e-06, "loss": 0.8614559936523437, "step": 29700 }, { "epoch": 0.5267397905434715, "grad_norm": 8.6875, "learning_rate": 9.270996044223552e-06, "loss": 0.7886000061035157, "step": 29750 }, { "epoch": 0.5276250675023682, "grad_norm": 9.0, "learning_rate": 9.269728167156913e-06, "loss": 0.8309735870361328, "step": 29800 }, { "epoch": 0.5285103444612647, "grad_norm": 10.25, "learning_rate": 9.268460290090273e-06, "loss": 0.8716236877441407, "step": 29850 }, { "epoch": 0.5293956214201613, "grad_norm": 8.625, "learning_rate": 9.267192413023634e-06, "loss": 0.8325408935546875, "step": 29900 }, { "epoch": 0.5302808983790579, "grad_norm": 6.96875, "learning_rate": 9.265924535956993e-06, "loss": 0.7999062347412109, "step": 29950 }, { "epoch": 0.5311661753379545, "grad_norm": 8.3125, "learning_rate": 9.264656658890356e-06, "loss": 0.8242444610595703, "step": 30000 }, { "epoch": 0.5311661753379545, "eval_cer": 14.722645510105584, "eval_loss": 0.3480595052242279, "eval_runtime": 379.8738, "eval_samples_per_second": 13.162, "eval_steps_per_second": 1.645, "eval_wer": 30.56407896082643, "step": 30000 }, { "epoch": 0.532051452296851, "grad_norm": 8.6875, "learning_rate": 9.263388781823715e-06, "loss": 0.9031015777587891, "step": 30050 }, { "epoch": 0.5329367292557476, "grad_norm": 13.3125, "learning_rate": 9.262120904757076e-06, "loss": 0.8271858978271485, "step": 30100 }, { "epoch": 0.5338220062146443, "grad_norm": 7.3125, "learning_rate": 9.260853027690437e-06, "loss": 0.7925968170166016, "step": 30150 }, { "epoch": 0.5347072831735409, "grad_norm": 10.25, "learning_rate": 9.259585150623797e-06, "loss": 0.8345429229736329, "step": 30200 }, { "epoch": 0.5355925601324374, "grad_norm": 9.625, "learning_rate": 9.258317273557156e-06, "loss": 0.8198999786376953, "step": 30250 }, { "epoch": 0.536477837091334, "grad_norm": 8.0, "learning_rate": 9.257049396490517e-06, "loss": 0.8118746185302734, "step": 30300 }, { "epoch": 0.5373631140502306, "grad_norm": 9.125, "learning_rate": 9.255781519423878e-06, "loss": 0.7819632720947266, "step": 30350 }, { "epoch": 0.5382483910091272, "grad_norm": 8.125, "learning_rate": 9.254513642357237e-06, "loss": 0.84641845703125, "step": 30400 }, { "epoch": 0.5391336679680238, "grad_norm": 8.5625, "learning_rate": 9.253245765290598e-06, "loss": 0.8433683776855468, "step": 30450 }, { "epoch": 0.5400189449269204, "grad_norm": 9.375, "learning_rate": 9.251977888223959e-06, "loss": 0.7750820922851562, "step": 30500 }, { "epoch": 0.540904221885817, "grad_norm": 8.75, "learning_rate": 9.25071001115732e-06, "loss": 0.8537428283691406, "step": 30550 }, { "epoch": 0.5417894988447136, "grad_norm": 8.6875, "learning_rate": 9.249442134090679e-06, "loss": 0.8174424743652344, "step": 30600 }, { "epoch": 0.5426747758036101, "grad_norm": 7.28125, "learning_rate": 9.24817425702404e-06, "loss": 0.7803053283691406, "step": 30650 }, { "epoch": 0.5435600527625067, "grad_norm": 8.875, "learning_rate": 9.2469063799574e-06, "loss": 0.8024676513671875, "step": 30700 }, { "epoch": 0.5444453297214034, "grad_norm": 8.125, "learning_rate": 9.24563850289076e-06, "loss": 0.830534439086914, "step": 30750 }, { "epoch": 0.5453306066803, "grad_norm": 8.25, "learning_rate": 9.24437062582412e-06, "loss": 0.8581230926513672, "step": 30800 }, { "epoch": 0.5462158836391965, "grad_norm": 9.25, "learning_rate": 9.243102748757481e-06, "loss": 0.8423960876464843, "step": 30850 }, { "epoch": 0.5471011605980931, "grad_norm": 6.78125, "learning_rate": 9.241834871690842e-06, "loss": 0.8226587677001953, "step": 30900 }, { "epoch": 0.5479864375569897, "grad_norm": 9.6875, "learning_rate": 9.240566994624201e-06, "loss": 0.7333302307128906, "step": 30950 }, { "epoch": 0.5488717145158863, "grad_norm": 9.4375, "learning_rate": 9.239299117557562e-06, "loss": 0.8279609680175781, "step": 31000 }, { "epoch": 0.5497569914747829, "grad_norm": 10.3125, "learning_rate": 9.238031240490923e-06, "loss": 0.8205570983886719, "step": 31050 }, { "epoch": 0.5506422684336795, "grad_norm": 10.5625, "learning_rate": 9.236763363424282e-06, "loss": 0.7977956390380859, "step": 31100 }, { "epoch": 0.5515275453925761, "grad_norm": 9.4375, "learning_rate": 9.235495486357644e-06, "loss": 0.7922000885009766, "step": 31150 }, { "epoch": 0.5524128223514727, "grad_norm": 8.1875, "learning_rate": 9.234227609291005e-06, "loss": 0.8065330505371093, "step": 31200 }, { "epoch": 0.5532980993103692, "grad_norm": 6.5625, "learning_rate": 9.232959732224364e-06, "loss": 0.766522445678711, "step": 31250 }, { "epoch": 0.5541833762692658, "grad_norm": 8.4375, "learning_rate": 9.231691855157725e-06, "loss": 0.7584080505371094, "step": 31300 }, { "epoch": 0.5550686532281625, "grad_norm": 7.8125, "learning_rate": 9.230423978091086e-06, "loss": 0.835227279663086, "step": 31350 }, { "epoch": 0.5559539301870591, "grad_norm": 7.71875, "learning_rate": 9.229156101024445e-06, "loss": 0.8134999084472656, "step": 31400 }, { "epoch": 0.5568392071459556, "grad_norm": 8.625, "learning_rate": 9.227888223957806e-06, "loss": 0.8568966674804688, "step": 31450 }, { "epoch": 0.5577244841048522, "grad_norm": 9.1875, "learning_rate": 9.226620346891167e-06, "loss": 0.8286125946044922, "step": 31500 }, { "epoch": 0.5586097610637488, "grad_norm": 8.1875, "learning_rate": 9.225352469824527e-06, "loss": 0.806230697631836, "step": 31550 }, { "epoch": 0.5594950380226453, "grad_norm": 7.0625, "learning_rate": 9.224084592757886e-06, "loss": 0.8486292266845703, "step": 31600 }, { "epoch": 0.560380314981542, "grad_norm": 9.875, "learning_rate": 9.222816715691247e-06, "loss": 0.8534250640869141, "step": 31650 }, { "epoch": 0.5612655919404386, "grad_norm": 9.0, "learning_rate": 9.221548838624608e-06, "loss": 0.7652369689941406, "step": 31700 }, { "epoch": 0.5621508688993352, "grad_norm": 9.75, "learning_rate": 9.220280961557967e-06, "loss": 0.8141221618652343, "step": 31750 }, { "epoch": 0.5630361458582317, "grad_norm": 7.75, "learning_rate": 9.219013084491328e-06, "loss": 0.7593769073486328, "step": 31800 }, { "epoch": 0.5639214228171283, "grad_norm": 9.5625, "learning_rate": 9.217745207424689e-06, "loss": 0.89138671875, "step": 31850 }, { "epoch": 0.5648066997760249, "grad_norm": 8.1875, "learning_rate": 9.21647733035805e-06, "loss": 0.7844825744628906, "step": 31900 }, { "epoch": 0.5656919767349216, "grad_norm": 7.9375, "learning_rate": 9.215209453291409e-06, "loss": 0.7966393280029297, "step": 31950 }, { "epoch": 0.5665772536938181, "grad_norm": 7.71875, "learning_rate": 9.21394157622477e-06, "loss": 0.7827631378173828, "step": 32000 }, { "epoch": 0.5674625306527147, "grad_norm": 9.0, "learning_rate": 9.21267369915813e-06, "loss": 0.8383135986328125, "step": 32050 }, { "epoch": 0.5683478076116113, "grad_norm": 8.3125, "learning_rate": 9.211405822091491e-06, "loss": 0.8304233551025391, "step": 32100 }, { "epoch": 0.5692330845705079, "grad_norm": 9.5, "learning_rate": 9.21013794502485e-06, "loss": 0.8089006042480469, "step": 32150 }, { "epoch": 0.5701183615294044, "grad_norm": 6.40625, "learning_rate": 9.208870067958211e-06, "loss": 0.8643374633789063, "step": 32200 }, { "epoch": 0.571003638488301, "grad_norm": 8.5, "learning_rate": 9.207602190891572e-06, "loss": 0.87065185546875, "step": 32250 }, { "epoch": 0.5718889154471977, "grad_norm": 8.375, "learning_rate": 9.206334313824933e-06, "loss": 0.8011983489990234, "step": 32300 }, { "epoch": 0.5727741924060943, "grad_norm": 9.5, "learning_rate": 9.205066436758294e-06, "loss": 0.8257938385009765, "step": 32350 }, { "epoch": 0.5736594693649908, "grad_norm": 9.25, "learning_rate": 9.203798559691654e-06, "loss": 0.7791096496582032, "step": 32400 }, { "epoch": 0.5745447463238874, "grad_norm": 9.6875, "learning_rate": 9.202530682625013e-06, "loss": 0.8195112609863281, "step": 32450 }, { "epoch": 0.575430023282784, "grad_norm": 8.25, "learning_rate": 9.201262805558374e-06, "loss": 0.8126992797851562, "step": 32500 }, { "epoch": 0.5763153002416807, "grad_norm": 6.9375, "learning_rate": 9.199994928491735e-06, "loss": 0.8560022735595703, "step": 32550 }, { "epoch": 0.5772005772005772, "grad_norm": 8.6875, "learning_rate": 9.198727051425094e-06, "loss": 0.8273929595947266, "step": 32600 }, { "epoch": 0.5780858541594738, "grad_norm": 9.25, "learning_rate": 9.197459174358455e-06, "loss": 0.8422256469726562, "step": 32650 }, { "epoch": 0.5789711311183704, "grad_norm": 7.625, "learning_rate": 9.196191297291816e-06, "loss": 0.7996526336669922, "step": 32700 }, { "epoch": 0.579856408077267, "grad_norm": 9.125, "learning_rate": 9.194923420225177e-06, "loss": 0.8200563812255859, "step": 32750 }, { "epoch": 0.5807416850361635, "grad_norm": 8.4375, "learning_rate": 9.193655543158536e-06, "loss": 0.7688130187988281, "step": 32800 }, { "epoch": 0.5816269619950601, "grad_norm": 7.625, "learning_rate": 9.192387666091897e-06, "loss": 0.8218675231933594, "step": 32850 }, { "epoch": 0.5825122389539568, "grad_norm": 10.5625, "learning_rate": 9.191119789025257e-06, "loss": 0.8125396728515625, "step": 32900 }, { "epoch": 0.5833975159128534, "grad_norm": 9.0, "learning_rate": 9.189851911958616e-06, "loss": 0.7877046966552734, "step": 32950 }, { "epoch": 0.5842827928717499, "grad_norm": 10.5, "learning_rate": 9.188584034891977e-06, "loss": 0.8003798675537109, "step": 33000 }, { "epoch": 0.5851680698306465, "grad_norm": 10.1875, "learning_rate": 9.187316157825338e-06, "loss": 0.768984146118164, "step": 33050 }, { "epoch": 0.5860533467895431, "grad_norm": 9.75, "learning_rate": 9.186048280758699e-06, "loss": 0.8237137603759765, "step": 33100 }, { "epoch": 0.5869386237484397, "grad_norm": 7.75, "learning_rate": 9.184780403692058e-06, "loss": 0.7769164276123047, "step": 33150 }, { "epoch": 0.5878239007073363, "grad_norm": 9.375, "learning_rate": 9.183512526625419e-06, "loss": 0.8206394958496094, "step": 33200 }, { "epoch": 0.5887091776662329, "grad_norm": 8.125, "learning_rate": 9.18224464955878e-06, "loss": 0.811323013305664, "step": 33250 }, { "epoch": 0.5895944546251295, "grad_norm": 7.3125, "learning_rate": 9.180976772492139e-06, "loss": 0.8037760925292968, "step": 33300 }, { "epoch": 0.5904797315840261, "grad_norm": 9.4375, "learning_rate": 9.1797088954255e-06, "loss": 0.8595541381835937, "step": 33350 }, { "epoch": 0.5913650085429226, "grad_norm": 9.5625, "learning_rate": 9.17844101835886e-06, "loss": 0.7697556304931641, "step": 33400 }, { "epoch": 0.5922502855018192, "grad_norm": 9.25, "learning_rate": 9.177173141292221e-06, "loss": 0.7896424865722657, "step": 33450 }, { "epoch": 0.5931355624607159, "grad_norm": 8.3125, "learning_rate": 9.175905264225582e-06, "loss": 0.8332701873779297, "step": 33500 }, { "epoch": 0.5940208394196125, "grad_norm": 7.96875, "learning_rate": 9.174637387158943e-06, "loss": 0.7919515228271484, "step": 33550 }, { "epoch": 0.594906116378509, "grad_norm": 8.1875, "learning_rate": 9.173369510092302e-06, "loss": 0.800345687866211, "step": 33600 }, { "epoch": 0.5957913933374056, "grad_norm": 9.3125, "learning_rate": 9.172101633025663e-06, "loss": 0.7806795501708984, "step": 33650 }, { "epoch": 0.5966766702963022, "grad_norm": 7.96875, "learning_rate": 9.170833755959023e-06, "loss": 0.8437120056152344, "step": 33700 }, { "epoch": 0.5975619472551988, "grad_norm": 10.4375, "learning_rate": 9.169565878892384e-06, "loss": 0.854691162109375, "step": 33750 }, { "epoch": 0.5984472242140954, "grad_norm": 9.125, "learning_rate": 9.168298001825743e-06, "loss": 0.7852024078369141, "step": 33800 }, { "epoch": 0.599332501172992, "grad_norm": 10.25, "learning_rate": 9.167030124759104e-06, "loss": 0.7816255187988281, "step": 33850 }, { "epoch": 0.6002177781318886, "grad_norm": 9.375, "learning_rate": 9.165762247692465e-06, "loss": 0.7625868988037109, "step": 33900 }, { "epoch": 0.6011030550907851, "grad_norm": 7.71875, "learning_rate": 9.164494370625824e-06, "loss": 0.8304756164550782, "step": 33950 }, { "epoch": 0.6019883320496817, "grad_norm": 9.6875, "learning_rate": 9.163226493559185e-06, "loss": 0.8507473754882813, "step": 34000 }, { "epoch": 0.6028736090085783, "grad_norm": 9.9375, "learning_rate": 9.161958616492546e-06, "loss": 0.8916117858886718, "step": 34050 }, { "epoch": 0.603758885967475, "grad_norm": 9.0625, "learning_rate": 9.160690739425907e-06, "loss": 0.7812516021728516, "step": 34100 }, { "epoch": 0.6046441629263715, "grad_norm": 9.9375, "learning_rate": 9.159422862359266e-06, "loss": 0.85893310546875, "step": 34150 }, { "epoch": 0.6055294398852681, "grad_norm": 8.75, "learning_rate": 9.158154985292627e-06, "loss": 0.8037278747558594, "step": 34200 }, { "epoch": 0.6064147168441647, "grad_norm": 9.5, "learning_rate": 9.156887108225987e-06, "loss": 0.8571352386474609, "step": 34250 }, { "epoch": 0.6072999938030613, "grad_norm": 9.25, "learning_rate": 9.155619231159346e-06, "loss": 0.8393865203857422, "step": 34300 }, { "epoch": 0.6081852707619578, "grad_norm": 8.8125, "learning_rate": 9.154351354092707e-06, "loss": 0.7959491729736328, "step": 34350 }, { "epoch": 0.6090705477208544, "grad_norm": 8.375, "learning_rate": 9.153083477026068e-06, "loss": 0.8323846435546876, "step": 34400 }, { "epoch": 0.6099558246797511, "grad_norm": 8.1875, "learning_rate": 9.151815599959429e-06, "loss": 0.7896021270751953, "step": 34450 }, { "epoch": 0.6108411016386477, "grad_norm": 10.4375, "learning_rate": 9.150547722892788e-06, "loss": 0.8387126922607422, "step": 34500 }, { "epoch": 0.6117263785975442, "grad_norm": 8.75, "learning_rate": 9.149279845826149e-06, "loss": 0.8292191314697266, "step": 34550 }, { "epoch": 0.6126116555564408, "grad_norm": 6.75, "learning_rate": 9.14801196875951e-06, "loss": 0.8881966400146485, "step": 34600 }, { "epoch": 0.6134969325153374, "grad_norm": 8.875, "learning_rate": 9.14674409169287e-06, "loss": 0.7790008544921875, "step": 34650 }, { "epoch": 0.614382209474234, "grad_norm": 9.875, "learning_rate": 9.145476214626231e-06, "loss": 0.772136459350586, "step": 34700 }, { "epoch": 0.6152674864331306, "grad_norm": 8.9375, "learning_rate": 9.144208337559592e-06, "loss": 0.8583509063720703, "step": 34750 }, { "epoch": 0.6161527633920272, "grad_norm": 9.5, "learning_rate": 9.142940460492951e-06, "loss": 0.7817726898193359, "step": 34800 }, { "epoch": 0.6170380403509238, "grad_norm": 7.96875, "learning_rate": 9.141672583426312e-06, "loss": 0.7983211517333985, "step": 34850 }, { "epoch": 0.6179233173098204, "grad_norm": 7.75, "learning_rate": 9.140404706359673e-06, "loss": 0.8518927001953125, "step": 34900 }, { "epoch": 0.6188085942687169, "grad_norm": 7.15625, "learning_rate": 9.139136829293032e-06, "loss": 0.7762453460693359, "step": 34950 }, { "epoch": 0.6196938712276135, "grad_norm": 8.6875, "learning_rate": 9.137868952226393e-06, "loss": 0.7606196594238281, "step": 35000 }, { "epoch": 0.6196938712276135, "eval_cer": 14.630998411088756, "eval_loss": 0.34649306535720825, "eval_runtime": 381.1753, "eval_samples_per_second": 13.117, "eval_steps_per_second": 1.64, "eval_wer": 30.37997340697555, "step": 35000 } ], "logging_steps": 50, "max_steps": 395360, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.8051981033472e+21, "train_batch_size": 16, "trial_name": null, "trial_params": null }