{ "best_global_step": 110000, "best_metric": 0.07368261883895177, "best_model_checkpoint": "./TrOCR_SigLIP2_Aranizer_41K_AND_COMBINED/stage2/checkpoint-110000", "epoch": 50.0, "eval_steps": 10000, "global_step": 115050, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.021733224667209998, "grad_norm": 190.25732421875, "learning_rate": 4.900000000000001e-07, "loss": 17.665, "step": 50 }, { "epoch": 0.043466449334419996, "grad_norm": 101.98078918457031, "learning_rate": 9.9e-07, "loss": 17.0, "step": 100 }, { "epoch": 0.06519967400162999, "grad_norm": 61.98678970336914, "learning_rate": 1.4900000000000001e-06, "loss": 16.6227, "step": 150 }, { "epoch": 0.08693289866883999, "grad_norm": 77.35042572021484, "learning_rate": 1.9900000000000004e-06, "loss": 16.5637, "step": 200 }, { "epoch": 0.10866612333604998, "grad_norm": 91.11700439453125, "learning_rate": 2.4900000000000003e-06, "loss": 16.7798, "step": 250 }, { "epoch": 0.13039934800325997, "grad_norm": 95.30789184570312, "learning_rate": 2.99e-06, "loss": 16.4945, "step": 300 }, { "epoch": 0.15213257267046998, "grad_norm": 81.99232482910156, "learning_rate": 3.49e-06, "loss": 16.2094, "step": 350 }, { "epoch": 0.17386579733767998, "grad_norm": 65.64993286132812, "learning_rate": 3.990000000000001e-06, "loss": 16.4501, "step": 400 }, { "epoch": 0.19559902200489, "grad_norm": 98.58848571777344, "learning_rate": 4.49e-06, "loss": 16.386, "step": 450 }, { "epoch": 0.21733224667209997, "grad_norm": 85.1087646484375, "learning_rate": 4.9900000000000005e-06, "loss": 16.453, "step": 500 }, { "epoch": 0.23906547133930997, "grad_norm": 68.43016052246094, "learning_rate": 5.490000000000001e-06, "loss": 16.245, "step": 550 }, { "epoch": 0.26079869600651995, "grad_norm": 71.09822082519531, "learning_rate": 5.99e-06, "loss": 16.0378, "step": 600 }, { "epoch": 0.28253192067372995, "grad_norm": 71.08029174804688, "learning_rate": 6.4900000000000005e-06, "loss": 16.7085, "step": 650 }, { "epoch": 0.30426514534093996, "grad_norm": 66.14205169677734, "learning_rate": 6.99e-06, "loss": 16.3454, "step": 700 }, { "epoch": 0.32599837000814996, "grad_norm": 93.4423599243164, "learning_rate": 7.49e-06, "loss": 16.4424, "step": 750 }, { "epoch": 0.34773159467535997, "grad_norm": 80.68280029296875, "learning_rate": 7.990000000000001e-06, "loss": 16.5934, "step": 800 }, { "epoch": 0.36946481934257, "grad_norm": 50.99578094482422, "learning_rate": 8.49e-06, "loss": 16.4154, "step": 850 }, { "epoch": 0.39119804400978, "grad_norm": 73.9505386352539, "learning_rate": 8.99e-06, "loss": 16.4743, "step": 900 }, { "epoch": 0.4129312686769899, "grad_norm": 53.72673034667969, "learning_rate": 9.49e-06, "loss": 16.5231, "step": 950 }, { "epoch": 0.43466449334419993, "grad_norm": 80.76425170898438, "learning_rate": 9.990000000000001e-06, "loss": 16.3367, "step": 1000 }, { "epoch": 0.45639771801140994, "grad_norm": 88.06133270263672, "learning_rate": 9.995703638754933e-06, "loss": 16.7444, "step": 1050 }, { "epoch": 0.47813094267861994, "grad_norm": 49.88163757324219, "learning_rate": 9.991319596668128e-06, "loss": 16.4815, "step": 1100 }, { "epoch": 0.49986416734582995, "grad_norm": 66.88523864746094, "learning_rate": 9.986935554581326e-06, "loss": 16.467, "step": 1150 }, { "epoch": 0.5215973920130399, "grad_norm": 65.38529968261719, "learning_rate": 9.982551512494521e-06, "loss": 16.4785, "step": 1200 }, { "epoch": 0.54333061668025, "grad_norm": 54.72123336791992, "learning_rate": 9.978167470407716e-06, "loss": 16.5669, "step": 1250 }, { "epoch": 0.5650638413474599, "grad_norm": 52.83816909790039, "learning_rate": 9.973783428320912e-06, "loss": 16.3767, "step": 1300 }, { "epoch": 0.58679706601467, "grad_norm": 72.39130401611328, "learning_rate": 9.969399386234109e-06, "loss": 16.2847, "step": 1350 }, { "epoch": 0.6085302906818799, "grad_norm": 52.192020416259766, "learning_rate": 9.965015344147304e-06, "loss": 16.3576, "step": 1400 }, { "epoch": 0.63026351534909, "grad_norm": 50.59873962402344, "learning_rate": 9.960631302060501e-06, "loss": 15.9625, "step": 1450 }, { "epoch": 0.6519967400162999, "grad_norm": 253.75856018066406, "learning_rate": 9.956247259973697e-06, "loss": 16.3934, "step": 1500 }, { "epoch": 0.6737299646835099, "grad_norm": 43.8497314453125, "learning_rate": 9.951863217886892e-06, "loss": 16.686, "step": 1550 }, { "epoch": 0.6954631893507199, "grad_norm": 48.21563720703125, "learning_rate": 9.947479175800089e-06, "loss": 16.4491, "step": 1600 }, { "epoch": 0.7171964140179299, "grad_norm": 59.72194290161133, "learning_rate": 9.943095133713284e-06, "loss": 16.1068, "step": 1650 }, { "epoch": 0.73892963868514, "grad_norm": 117.0466079711914, "learning_rate": 9.93871109162648e-06, "loss": 16.3469, "step": 1700 }, { "epoch": 0.7606628633523499, "grad_norm": 48.334346771240234, "learning_rate": 9.934327049539675e-06, "loss": 16.3334, "step": 1750 }, { "epoch": 0.78239608801956, "grad_norm": 54.45792007446289, "learning_rate": 9.929943007452872e-06, "loss": 16.2452, "step": 1800 }, { "epoch": 0.8041293126867699, "grad_norm": 39.005428314208984, "learning_rate": 9.925558965366068e-06, "loss": 16.0999, "step": 1850 }, { "epoch": 0.8258625373539799, "grad_norm": 67.72175598144531, "learning_rate": 9.921174923279265e-06, "loss": 16.2755, "step": 1900 }, { "epoch": 0.8475957620211899, "grad_norm": 72.75259399414062, "learning_rate": 9.91679088119246e-06, "loss": 16.5987, "step": 1950 }, { "epoch": 0.8693289866883999, "grad_norm": 58.86764144897461, "learning_rate": 9.912406839105656e-06, "loss": 16.4092, "step": 2000 }, { "epoch": 0.8910622113556099, "grad_norm": 73.46177673339844, "learning_rate": 9.908022797018853e-06, "loss": 16.637, "step": 2050 }, { "epoch": 0.9127954360228199, "grad_norm": 62.41428756713867, "learning_rate": 9.903638754932048e-06, "loss": 16.528, "step": 2100 }, { "epoch": 0.9345286606900299, "grad_norm": 65.01278686523438, "learning_rate": 9.899254712845245e-06, "loss": 16.5003, "step": 2150 }, { "epoch": 0.9562618853572399, "grad_norm": 57.43757247924805, "learning_rate": 9.894870670758439e-06, "loss": 16.3343, "step": 2200 }, { "epoch": 0.9779951100244498, "grad_norm": 55.26877975463867, "learning_rate": 9.890486628671636e-06, "loss": 16.4804, "step": 2250 }, { "epoch": 0.9997283346916599, "grad_norm": 59.66270446777344, "learning_rate": 9.886102586584833e-06, "loss": 16.1125, "step": 2300 }, { "epoch": 1.0212985601738658, "grad_norm": 65.677490234375, "learning_rate": 9.881718544498028e-06, "loss": 15.5081, "step": 2350 }, { "epoch": 1.0430317848410757, "grad_norm": 46.62354278564453, "learning_rate": 9.877334502411224e-06, "loss": 15.5581, "step": 2400 }, { "epoch": 1.0647650095082857, "grad_norm": 47.7025032043457, "learning_rate": 9.87295046032442e-06, "loss": 15.2401, "step": 2450 }, { "epoch": 1.0864982341754958, "grad_norm": 57.22602081298828, "learning_rate": 9.868566418237616e-06, "loss": 15.5116, "step": 2500 }, { "epoch": 1.1082314588427058, "grad_norm": 51.149818420410156, "learning_rate": 9.864182376150812e-06, "loss": 15.5938, "step": 2550 }, { "epoch": 1.1299646835099157, "grad_norm": 80.70169067382812, "learning_rate": 9.859798334064009e-06, "loss": 15.5891, "step": 2600 }, { "epoch": 1.1516979081771257, "grad_norm": 59.62293243408203, "learning_rate": 9.855414291977204e-06, "loss": 15.6038, "step": 2650 }, { "epoch": 1.1734311328443359, "grad_norm": 109.22635650634766, "learning_rate": 9.8510302498904e-06, "loss": 15.4956, "step": 2700 }, { "epoch": 1.1951643575115458, "grad_norm": 54.90534591674805, "learning_rate": 9.846646207803597e-06, "loss": 15.5951, "step": 2750 }, { "epoch": 1.2168975821787558, "grad_norm": 130.99798583984375, "learning_rate": 9.842262165716792e-06, "loss": 15.5201, "step": 2800 }, { "epoch": 1.2386308068459657, "grad_norm": 45.625389099121094, "learning_rate": 9.837878123629987e-06, "loss": 15.4576, "step": 2850 }, { "epoch": 1.2603640315131757, "grad_norm": 36.033836364746094, "learning_rate": 9.833494081543183e-06, "loss": 15.4927, "step": 2900 }, { "epoch": 1.2820972561803858, "grad_norm": 52.81075668334961, "learning_rate": 9.82911003945638e-06, "loss": 15.4669, "step": 2950 }, { "epoch": 1.3038304808475958, "grad_norm": 43.44648361206055, "learning_rate": 9.824725997369575e-06, "loss": 15.5326, "step": 3000 }, { "epoch": 1.3255637055148057, "grad_norm": 40.79172134399414, "learning_rate": 9.820341955282772e-06, "loss": 15.6252, "step": 3050 }, { "epoch": 1.3472969301820157, "grad_norm": 54.189910888671875, "learning_rate": 9.815957913195968e-06, "loss": 15.5897, "step": 3100 }, { "epoch": 1.3690301548492259, "grad_norm": 55.73503112792969, "learning_rate": 9.811573871109163e-06, "loss": 15.6081, "step": 3150 }, { "epoch": 1.3907633795164358, "grad_norm": 67.98750305175781, "learning_rate": 9.80718982902236e-06, "loss": 15.5154, "step": 3200 }, { "epoch": 1.4124966041836458, "grad_norm": 61.99040222167969, "learning_rate": 9.802805786935556e-06, "loss": 15.464, "step": 3250 }, { "epoch": 1.4342298288508557, "grad_norm": 42.3632926940918, "learning_rate": 9.798421744848751e-06, "loss": 15.3425, "step": 3300 }, { "epoch": 1.4559630535180657, "grad_norm": 46.5098991394043, "learning_rate": 9.794037702761946e-06, "loss": 15.5891, "step": 3350 }, { "epoch": 1.4776962781852756, "grad_norm": 59.43826675415039, "learning_rate": 9.789653660675143e-06, "loss": 15.6337, "step": 3400 }, { "epoch": 1.4994295028524858, "grad_norm": 76.57585906982422, "learning_rate": 9.785269618588339e-06, "loss": 15.4892, "step": 3450 }, { "epoch": 1.5211627275196957, "grad_norm": 65.46538543701172, "learning_rate": 9.780885576501536e-06, "loss": 15.6873, "step": 3500 }, { "epoch": 1.542895952186906, "grad_norm": 65.29698181152344, "learning_rate": 9.776501534414731e-06, "loss": 15.5051, "step": 3550 }, { "epoch": 1.5646291768541158, "grad_norm": 46.745784759521484, "learning_rate": 9.772117492327927e-06, "loss": 15.6602, "step": 3600 }, { "epoch": 1.5863624015213258, "grad_norm": 44.605228424072266, "learning_rate": 9.767733450241124e-06, "loss": 15.5229, "step": 3650 }, { "epoch": 1.6080956261885357, "grad_norm": 47.4207649230957, "learning_rate": 9.763349408154319e-06, "loss": 15.5634, "step": 3700 }, { "epoch": 1.6298288508557457, "grad_norm": 43.18611145019531, "learning_rate": 9.758965366067516e-06, "loss": 15.5222, "step": 3750 }, { "epoch": 1.6515620755229556, "grad_norm": 39.6898078918457, "learning_rate": 9.754581323980712e-06, "loss": 15.5259, "step": 3800 }, { "epoch": 1.6732953001901656, "grad_norm": 71.0409164428711, "learning_rate": 9.750197281893907e-06, "loss": 15.5971, "step": 3850 }, { "epoch": 1.6950285248573758, "grad_norm": 53.462467193603516, "learning_rate": 9.745813239807102e-06, "loss": 15.4515, "step": 3900 }, { "epoch": 1.7167617495245857, "grad_norm": 40.28457260131836, "learning_rate": 9.7414291977203e-06, "loss": 15.4006, "step": 3950 }, { "epoch": 1.7384949741917957, "grad_norm": 50.27900695800781, "learning_rate": 9.737045155633495e-06, "loss": 15.3051, "step": 4000 }, { "epoch": 1.7602281988590058, "grad_norm": 44.33418655395508, "learning_rate": 9.73266111354669e-06, "loss": 15.7606, "step": 4050 }, { "epoch": 1.7819614235262158, "grad_norm": 65.12041473388672, "learning_rate": 9.728277071459887e-06, "loss": 15.4747, "step": 4100 }, { "epoch": 1.8036946481934257, "grad_norm": 50.64781951904297, "learning_rate": 9.723893029373083e-06, "loss": 15.5251, "step": 4150 }, { "epoch": 1.8254278728606357, "grad_norm": 37.71573257446289, "learning_rate": 9.71950898728628e-06, "loss": 15.4422, "step": 4200 }, { "epoch": 1.8471610975278456, "grad_norm": 53.08781433105469, "learning_rate": 9.715124945199475e-06, "loss": 15.6055, "step": 4250 }, { "epoch": 1.8688943221950556, "grad_norm": 40.412384033203125, "learning_rate": 9.71074090311267e-06, "loss": 15.3275, "step": 4300 }, { "epoch": 1.8906275468622655, "grad_norm": 81.10236358642578, "learning_rate": 9.706356861025866e-06, "loss": 15.391, "step": 4350 }, { "epoch": 1.9123607715294757, "grad_norm": 73.39491271972656, "learning_rate": 9.701972818939063e-06, "loss": 15.7053, "step": 4400 }, { "epoch": 1.9340939961966856, "grad_norm": 42.71440124511719, "learning_rate": 9.697588776852258e-06, "loss": 15.484, "step": 4450 }, { "epoch": 1.9558272208638958, "grad_norm": 73.08609008789062, "learning_rate": 9.693204734765454e-06, "loss": 15.5822, "step": 4500 }, { "epoch": 1.9775604455311058, "grad_norm": 66.59615325927734, "learning_rate": 9.68882069267865e-06, "loss": 15.6825, "step": 4550 }, { "epoch": 1.9992936701983157, "grad_norm": 63.10333251953125, "learning_rate": 9.684436650591846e-06, "loss": 15.5203, "step": 4600 }, { "epoch": 2.0208638956805216, "grad_norm": 57.844970703125, "learning_rate": 9.680052608505043e-06, "loss": 14.5109, "step": 4650 }, { "epoch": 2.0425971203477316, "grad_norm": 36.37318420410156, "learning_rate": 9.675668566418239e-06, "loss": 14.7488, "step": 4700 }, { "epoch": 2.0643303450149415, "grad_norm": 72.80779266357422, "learning_rate": 9.671284524331434e-06, "loss": 14.9111, "step": 4750 }, { "epoch": 2.0860635696821515, "grad_norm": 71.37971496582031, "learning_rate": 9.66690048224463e-06, "loss": 14.6878, "step": 4800 }, { "epoch": 2.1077967943493614, "grad_norm": 42.20883560180664, "learning_rate": 9.662516440157827e-06, "loss": 14.7469, "step": 4850 }, { "epoch": 2.1295300190165714, "grad_norm": 53.63486862182617, "learning_rate": 9.658132398071022e-06, "loss": 14.6422, "step": 4900 }, { "epoch": 2.1512632436837817, "grad_norm": 54.38608932495117, "learning_rate": 9.653748355984219e-06, "loss": 14.6238, "step": 4950 }, { "epoch": 2.1729964683509917, "grad_norm": 44.58712387084961, "learning_rate": 9.649364313897414e-06, "loss": 14.6619, "step": 5000 }, { "epoch": 2.1947296930182016, "grad_norm": 46.281524658203125, "learning_rate": 9.64498027181061e-06, "loss": 14.8443, "step": 5050 }, { "epoch": 2.2164629176854116, "grad_norm": 38.51953887939453, "learning_rate": 9.640596229723807e-06, "loss": 14.9273, "step": 5100 }, { "epoch": 2.2381961423526215, "grad_norm": 53.27817153930664, "learning_rate": 9.636212187637002e-06, "loss": 14.6411, "step": 5150 }, { "epoch": 2.2599293670198315, "grad_norm": 43.47584533691406, "learning_rate": 9.631828145550198e-06, "loss": 14.7459, "step": 5200 }, { "epoch": 2.2816625916870414, "grad_norm": 37.26194381713867, "learning_rate": 9.627444103463393e-06, "loss": 14.9103, "step": 5250 }, { "epoch": 2.3033958163542514, "grad_norm": 38.729373931884766, "learning_rate": 9.62306006137659e-06, "loss": 14.5397, "step": 5300 }, { "epoch": 2.3251290410214613, "grad_norm": 33.352901458740234, "learning_rate": 9.618676019289785e-06, "loss": 14.9044, "step": 5350 }, { "epoch": 2.3468622656886717, "grad_norm": 47.63081359863281, "learning_rate": 9.614291977202983e-06, "loss": 14.5471, "step": 5400 }, { "epoch": 2.3685954903558817, "grad_norm": 125.63179016113281, "learning_rate": 9.609907935116178e-06, "loss": 14.8267, "step": 5450 }, { "epoch": 2.3903287150230916, "grad_norm": 49.1522216796875, "learning_rate": 9.605523893029373e-06, "loss": 14.7433, "step": 5500 }, { "epoch": 2.4120619396903016, "grad_norm": 43.327091217041016, "learning_rate": 9.60113985094257e-06, "loss": 14.908, "step": 5550 }, { "epoch": 2.4337951643575115, "grad_norm": 30.76859474182129, "learning_rate": 9.596755808855766e-06, "loss": 14.7341, "step": 5600 }, { "epoch": 2.4555283890247215, "grad_norm": 42.72526550292969, "learning_rate": 9.592371766768961e-06, "loss": 15.0516, "step": 5650 }, { "epoch": 2.4772616136919314, "grad_norm": 58.9193000793457, "learning_rate": 9.587987724682157e-06, "loss": 14.8745, "step": 5700 }, { "epoch": 2.4989948383591414, "grad_norm": 53.90520095825195, "learning_rate": 9.583603682595354e-06, "loss": 14.8219, "step": 5750 }, { "epoch": 2.5207280630263513, "grad_norm": 61.370452880859375, "learning_rate": 9.579219640508549e-06, "loss": 14.6925, "step": 5800 }, { "epoch": 2.5424612876935617, "grad_norm": 47.58317184448242, "learning_rate": 9.574835598421746e-06, "loss": 14.8654, "step": 5850 }, { "epoch": 2.5641945123607717, "grad_norm": 51.90703582763672, "learning_rate": 9.570451556334942e-06, "loss": 14.8152, "step": 5900 }, { "epoch": 2.5859277370279816, "grad_norm": 42.62101364135742, "learning_rate": 9.566067514248137e-06, "loss": 15.0139, "step": 5950 }, { "epoch": 2.6076609616951916, "grad_norm": 58.69119644165039, "learning_rate": 9.561683472161334e-06, "loss": 14.8962, "step": 6000 }, { "epoch": 2.6293941863624015, "grad_norm": 58.02621841430664, "learning_rate": 9.55729943007453e-06, "loss": 15.0422, "step": 6050 }, { "epoch": 2.6511274110296115, "grad_norm": 45.985225677490234, "learning_rate": 9.552915387987726e-06, "loss": 14.8361, "step": 6100 }, { "epoch": 2.6728606356968214, "grad_norm": 58.74437713623047, "learning_rate": 9.548531345900922e-06, "loss": 14.9231, "step": 6150 }, { "epoch": 2.6945938603640314, "grad_norm": 54.490962982177734, "learning_rate": 9.544147303814117e-06, "loss": 14.7987, "step": 6200 }, { "epoch": 2.7163270850312413, "grad_norm": 44.067710876464844, "learning_rate": 9.539763261727313e-06, "loss": 14.8596, "step": 6250 }, { "epoch": 2.7380603096984517, "grad_norm": 56.0435676574707, "learning_rate": 9.53537921964051e-06, "loss": 14.7602, "step": 6300 }, { "epoch": 2.7597935343656617, "grad_norm": 68.08670806884766, "learning_rate": 9.530995177553705e-06, "loss": 14.78, "step": 6350 }, { "epoch": 2.7815267590328716, "grad_norm": 55.21569061279297, "learning_rate": 9.5266111354669e-06, "loss": 14.908, "step": 6400 }, { "epoch": 2.8032599837000816, "grad_norm": 48.79258346557617, "learning_rate": 9.522227093380098e-06, "loss": 14.8478, "step": 6450 }, { "epoch": 2.8249932083672915, "grad_norm": 61.38957214355469, "learning_rate": 9.517843051293293e-06, "loss": 14.9716, "step": 6500 }, { "epoch": 2.8467264330345015, "grad_norm": 53.00950622558594, "learning_rate": 9.51345900920649e-06, "loss": 14.7508, "step": 6550 }, { "epoch": 2.8684596577017114, "grad_norm": 43.13687515258789, "learning_rate": 9.509074967119685e-06, "loss": 14.8466, "step": 6600 }, { "epoch": 2.8901928823689214, "grad_norm": 54.39565658569336, "learning_rate": 9.50469092503288e-06, "loss": 14.8025, "step": 6650 }, { "epoch": 2.9119261070361313, "grad_norm": 47.21046447753906, "learning_rate": 9.500306882946078e-06, "loss": 14.8096, "step": 6700 }, { "epoch": 2.9336593317033417, "grad_norm": 51.13401412963867, "learning_rate": 9.495922840859273e-06, "loss": 14.9482, "step": 6750 }, { "epoch": 2.955392556370551, "grad_norm": 47.619503021240234, "learning_rate": 9.491538798772469e-06, "loss": 14.9805, "step": 6800 }, { "epoch": 2.9771257810377616, "grad_norm": 40.876808166503906, "learning_rate": 9.487154756685664e-06, "loss": 14.9048, "step": 6850 }, { "epoch": 2.9988590057049715, "grad_norm": 52.037567138671875, "learning_rate": 9.482770714598861e-06, "loss": 14.9026, "step": 6900 }, { "epoch": 3.0204292311871774, "grad_norm": 41.4274787902832, "learning_rate": 9.478386672512057e-06, "loss": 14.1481, "step": 6950 }, { "epoch": 3.0421624558543874, "grad_norm": 51.49604797363281, "learning_rate": 9.474002630425254e-06, "loss": 14.1383, "step": 7000 }, { "epoch": 3.0638956805215973, "grad_norm": 53.052005767822266, "learning_rate": 9.469618588338449e-06, "loss": 14.2031, "step": 7050 }, { "epoch": 3.0856289051888073, "grad_norm": 29.748735427856445, "learning_rate": 9.465234546251644e-06, "loss": 14.2273, "step": 7100 }, { "epoch": 3.1073621298560172, "grad_norm": 41.33003616333008, "learning_rate": 9.460850504164841e-06, "loss": 14.2338, "step": 7150 }, { "epoch": 3.129095354523227, "grad_norm": 39.78664779663086, "learning_rate": 9.456466462078037e-06, "loss": 14.1309, "step": 7200 }, { "epoch": 3.1508285791904376, "grad_norm": 35.99256896972656, "learning_rate": 9.452082419991234e-06, "loss": 14.2261, "step": 7250 }, { "epoch": 3.1725618038576475, "grad_norm": 40.001197814941406, "learning_rate": 9.44769837790443e-06, "loss": 14.1562, "step": 7300 }, { "epoch": 3.1942950285248575, "grad_norm": 48.2380256652832, "learning_rate": 9.443314335817625e-06, "loss": 14.0423, "step": 7350 }, { "epoch": 3.2160282531920674, "grad_norm": 44.41048812866211, "learning_rate": 9.43893029373082e-06, "loss": 14.0881, "step": 7400 }, { "epoch": 3.2377614778592774, "grad_norm": 29.655723571777344, "learning_rate": 9.434546251644017e-06, "loss": 14.2163, "step": 7450 }, { "epoch": 3.2594947025264873, "grad_norm": 40.9448356628418, "learning_rate": 9.430162209557213e-06, "loss": 14.2057, "step": 7500 }, { "epoch": 3.2812279271936973, "grad_norm": 50.84467315673828, "learning_rate": 9.425778167470408e-06, "loss": 14.2386, "step": 7550 }, { "epoch": 3.302961151860907, "grad_norm": 46.98764419555664, "learning_rate": 9.421394125383605e-06, "loss": 14.2241, "step": 7600 }, { "epoch": 3.324694376528117, "grad_norm": 46.322166442871094, "learning_rate": 9.4170100832968e-06, "loss": 14.1992, "step": 7650 }, { "epoch": 3.3464276011953276, "grad_norm": 45.123985290527344, "learning_rate": 9.412626041209997e-06, "loss": 14.2106, "step": 7700 }, { "epoch": 3.3681608258625375, "grad_norm": 50.508583068847656, "learning_rate": 9.408241999123193e-06, "loss": 14.0418, "step": 7750 }, { "epoch": 3.3898940505297475, "grad_norm": 42.03702926635742, "learning_rate": 9.403857957036388e-06, "loss": 14.298, "step": 7800 }, { "epoch": 3.4116272751969574, "grad_norm": 49.16743469238281, "learning_rate": 9.399473914949584e-06, "loss": 14.2654, "step": 7850 }, { "epoch": 3.4333604998641674, "grad_norm": 47.92793273925781, "learning_rate": 9.39508987286278e-06, "loss": 14.3558, "step": 7900 }, { "epoch": 3.4550937245313773, "grad_norm": 37.79042434692383, "learning_rate": 9.390705830775976e-06, "loss": 14.2301, "step": 7950 }, { "epoch": 3.4768269491985873, "grad_norm": 41.851051330566406, "learning_rate": 9.386321788689171e-06, "loss": 14.2025, "step": 8000 }, { "epoch": 3.498560173865797, "grad_norm": 58.03968811035156, "learning_rate": 9.381937746602369e-06, "loss": 14.3552, "step": 8050 }, { "epoch": 3.520293398533007, "grad_norm": 34.9418830871582, "learning_rate": 9.377553704515564e-06, "loss": 14.3336, "step": 8100 }, { "epoch": 3.5420266232002175, "grad_norm": 41.05316162109375, "learning_rate": 9.373169662428761e-06, "loss": 14.2819, "step": 8150 }, { "epoch": 3.563759847867427, "grad_norm": 45.65940856933594, "learning_rate": 9.368785620341956e-06, "loss": 14.2237, "step": 8200 }, { "epoch": 3.5854930725346374, "grad_norm": 37.271751403808594, "learning_rate": 9.364401578255152e-06, "loss": 14.0594, "step": 8250 }, { "epoch": 3.6072262972018474, "grad_norm": 41.95325469970703, "learning_rate": 9.360017536168347e-06, "loss": 14.1691, "step": 8300 }, { "epoch": 3.6289595218690573, "grad_norm": 91.28557586669922, "learning_rate": 9.355633494081544e-06, "loss": 14.1016, "step": 8350 }, { "epoch": 3.6506927465362673, "grad_norm": 56.508670806884766, "learning_rate": 9.35124945199474e-06, "loss": 14.2686, "step": 8400 }, { "epoch": 3.6724259712034772, "grad_norm": 69.8916015625, "learning_rate": 9.346865409907935e-06, "loss": 14.3426, "step": 8450 }, { "epoch": 3.694159195870687, "grad_norm": 51.414215087890625, "learning_rate": 9.342481367821132e-06, "loss": 14.3489, "step": 8500 }, { "epoch": 3.715892420537897, "grad_norm": 51.891639709472656, "learning_rate": 9.338097325734328e-06, "loss": 14.2614, "step": 8550 }, { "epoch": 3.7376256452051075, "grad_norm": 55.276275634765625, "learning_rate": 9.333713283647525e-06, "loss": 14.1835, "step": 8600 }, { "epoch": 3.759358869872317, "grad_norm": 38.33846664428711, "learning_rate": 9.32932924156072e-06, "loss": 14.5973, "step": 8650 }, { "epoch": 3.7810920945395274, "grad_norm": 46.052513122558594, "learning_rate": 9.324945199473915e-06, "loss": 14.3903, "step": 8700 }, { "epoch": 3.8028253192067374, "grad_norm": 35.333560943603516, "learning_rate": 9.32056115738711e-06, "loss": 14.3516, "step": 8750 }, { "epoch": 3.8245585438739473, "grad_norm": 52.49406051635742, "learning_rate": 9.316177115300308e-06, "loss": 14.371, "step": 8800 }, { "epoch": 3.8462917685411573, "grad_norm": 48.86211013793945, "learning_rate": 9.311793073213503e-06, "loss": 14.2632, "step": 8850 }, { "epoch": 3.8680249932083672, "grad_norm": 48.95231628417969, "learning_rate": 9.3074090311267e-06, "loss": 14.1847, "step": 8900 }, { "epoch": 3.889758217875577, "grad_norm": 37.594696044921875, "learning_rate": 9.303024989039896e-06, "loss": 14.3286, "step": 8950 }, { "epoch": 3.911491442542787, "grad_norm": 47.66452407836914, "learning_rate": 9.298640946953091e-06, "loss": 14.3358, "step": 9000 }, { "epoch": 3.9332246672099975, "grad_norm": 40.61109161376953, "learning_rate": 9.294256904866288e-06, "loss": 14.4558, "step": 9050 }, { "epoch": 3.954957891877207, "grad_norm": 34.296836853027344, "learning_rate": 9.289872862779484e-06, "loss": 14.3049, "step": 9100 }, { "epoch": 3.9766911165444174, "grad_norm": 43.91560363769531, "learning_rate": 9.285488820692679e-06, "loss": 14.3231, "step": 9150 }, { "epoch": 3.9984243412116274, "grad_norm": 37.4168586730957, "learning_rate": 9.281104778605874e-06, "loss": 14.2418, "step": 9200 }, { "epoch": 4.019994566693833, "grad_norm": 34.46104049682617, "learning_rate": 9.276720736519071e-06, "loss": 13.6035, "step": 9250 }, { "epoch": 4.041727791361043, "grad_norm": 38.560298919677734, "learning_rate": 9.272336694432267e-06, "loss": 13.6357, "step": 9300 }, { "epoch": 4.063461016028254, "grad_norm": 35.547752380371094, "learning_rate": 9.267952652345464e-06, "loss": 13.798, "step": 9350 }, { "epoch": 4.085194240695463, "grad_norm": 36.332298278808594, "learning_rate": 9.26356861025866e-06, "loss": 13.6992, "step": 9400 }, { "epoch": 4.1069274653626735, "grad_norm": 40.322715759277344, "learning_rate": 9.259184568171855e-06, "loss": 13.7247, "step": 9450 }, { "epoch": 4.128660690029883, "grad_norm": 27.05885887145996, "learning_rate": 9.254800526085052e-06, "loss": 13.801, "step": 9500 }, { "epoch": 4.150393914697093, "grad_norm": 38.66703414916992, "learning_rate": 9.250416483998247e-06, "loss": 13.7814, "step": 9550 }, { "epoch": 4.172127139364303, "grad_norm": 37.8776969909668, "learning_rate": 9.246032441911443e-06, "loss": 13.7403, "step": 9600 }, { "epoch": 4.193860364031513, "grad_norm": 36.977317810058594, "learning_rate": 9.241648399824638e-06, "loss": 13.8831, "step": 9650 }, { "epoch": 4.215593588698723, "grad_norm": 43.09788131713867, "learning_rate": 9.237264357737835e-06, "loss": 13.7397, "step": 9700 }, { "epoch": 4.237326813365933, "grad_norm": 33.9801139831543, "learning_rate": 9.23288031565103e-06, "loss": 13.8114, "step": 9750 }, { "epoch": 4.259060038033143, "grad_norm": 32.65711212158203, "learning_rate": 9.228496273564227e-06, "loss": 13.7081, "step": 9800 }, { "epoch": 4.280793262700353, "grad_norm": 71.91608428955078, "learning_rate": 9.224112231477423e-06, "loss": 13.7953, "step": 9850 }, { "epoch": 4.3025264873675635, "grad_norm": 28.490583419799805, "learning_rate": 9.219728189390618e-06, "loss": 13.7322, "step": 9900 }, { "epoch": 4.324259712034773, "grad_norm": 49.53886795043945, "learning_rate": 9.215344147303815e-06, "loss": 13.8046, "step": 9950 }, { "epoch": 4.345992936701983, "grad_norm": 40.42410659790039, "learning_rate": 9.21096010521701e-06, "loss": 13.735, "step": 10000 }, { "epoch": 4.345992936701983, "eval_cer": 0.07540147877501142, "eval_loss": 2.2973904609680176, "eval_runtime": 396.0823, "eval_samples_per_second": 13.649, "eval_steps_per_second": 3.413, "eval_wer": 0.22808918197519235, "step": 10000 }, { "epoch": 4.367726161369193, "grad_norm": 32.75251388549805, "learning_rate": 9.206576063130208e-06, "loss": 13.8058, "step": 10050 }, { "epoch": 4.389459386036403, "grad_norm": 35.6936149597168, "learning_rate": 9.202192021043403e-06, "loss": 13.7489, "step": 10100 }, { "epoch": 4.411192610703613, "grad_norm": 39.304100036621094, "learning_rate": 9.197807978956599e-06, "loss": 13.8124, "step": 10150 }, { "epoch": 4.432925835370823, "grad_norm": 39.43230438232422, "learning_rate": 9.193423936869794e-06, "loss": 13.9531, "step": 10200 }, { "epoch": 4.454659060038033, "grad_norm": 37.89631652832031, "learning_rate": 9.189039894782991e-06, "loss": 13.7975, "step": 10250 }, { "epoch": 4.476392284705243, "grad_norm": 36.32379150390625, "learning_rate": 9.184655852696186e-06, "loss": 13.9208, "step": 10300 }, { "epoch": 4.4981255093724535, "grad_norm": 39.24440002441406, "learning_rate": 9.180271810609382e-06, "loss": 13.88, "step": 10350 }, { "epoch": 4.519858734039663, "grad_norm": 32.791900634765625, "learning_rate": 9.175887768522579e-06, "loss": 13.8944, "step": 10400 }, { "epoch": 4.541591958706873, "grad_norm": 33.695865631103516, "learning_rate": 9.171503726435774e-06, "loss": 13.8637, "step": 10450 }, { "epoch": 4.563325183374083, "grad_norm": 33.961647033691406, "learning_rate": 9.167119684348971e-06, "loss": 13.7873, "step": 10500 }, { "epoch": 4.585058408041293, "grad_norm": 101.09957122802734, "learning_rate": 9.162735642262167e-06, "loss": 13.848, "step": 10550 }, { "epoch": 4.606791632708503, "grad_norm": 42.666595458984375, "learning_rate": 9.158351600175362e-06, "loss": 13.9049, "step": 10600 }, { "epoch": 4.628524857375713, "grad_norm": 44.05756378173828, "learning_rate": 9.153967558088558e-06, "loss": 13.9251, "step": 10650 }, { "epoch": 4.650258082042923, "grad_norm": 44.468162536621094, "learning_rate": 9.149583516001755e-06, "loss": 13.7975, "step": 10700 }, { "epoch": 4.671991306710133, "grad_norm": 35.0707893371582, "learning_rate": 9.14519947391495e-06, "loss": 13.9261, "step": 10750 }, { "epoch": 4.6937245313773435, "grad_norm": 40.042274475097656, "learning_rate": 9.140815431828145e-06, "loss": 13.8594, "step": 10800 }, { "epoch": 4.715457756044553, "grad_norm": 29.434371948242188, "learning_rate": 9.136431389741342e-06, "loss": 13.8838, "step": 10850 }, { "epoch": 4.737190980711763, "grad_norm": 37.34782409667969, "learning_rate": 9.132047347654538e-06, "loss": 13.7635, "step": 10900 }, { "epoch": 4.758924205378973, "grad_norm": 221.30532836914062, "learning_rate": 9.127663305567735e-06, "loss": 13.76, "step": 10950 }, { "epoch": 4.780657430046183, "grad_norm": 35.67972946166992, "learning_rate": 9.12327926348093e-06, "loss": 13.8596, "step": 11000 }, { "epoch": 4.802390654713393, "grad_norm": 52.167972564697266, "learning_rate": 9.118895221394126e-06, "loss": 13.8706, "step": 11050 }, { "epoch": 4.824123879380603, "grad_norm": 47.18834686279297, "learning_rate": 9.114511179307321e-06, "loss": 13.8763, "step": 11100 }, { "epoch": 4.845857104047813, "grad_norm": 42.99448776245117, "learning_rate": 9.110127137220518e-06, "loss": 13.9622, "step": 11150 }, { "epoch": 4.867590328715023, "grad_norm": 55.08070755004883, "learning_rate": 9.105743095133715e-06, "loss": 13.8151, "step": 11200 }, { "epoch": 4.8893235533822335, "grad_norm": 32.91100311279297, "learning_rate": 9.10135905304691e-06, "loss": 13.8262, "step": 11250 }, { "epoch": 4.911056778049443, "grad_norm": 34.78753662109375, "learning_rate": 9.096975010960106e-06, "loss": 13.928, "step": 11300 }, { "epoch": 4.932790002716653, "grad_norm": 40.1533203125, "learning_rate": 9.092590968873301e-06, "loss": 13.8169, "step": 11350 }, { "epoch": 4.954523227383863, "grad_norm": 41.97115707397461, "learning_rate": 9.088206926786498e-06, "loss": 13.8194, "step": 11400 }, { "epoch": 4.976256452051073, "grad_norm": 30.406110763549805, "learning_rate": 9.083822884699694e-06, "loss": 13.9941, "step": 11450 }, { "epoch": 4.997989676718283, "grad_norm": 44.02429962158203, "learning_rate": 9.07943884261289e-06, "loss": 13.9272, "step": 11500 }, { "epoch": 5.019559902200489, "grad_norm": 42.15421676635742, "learning_rate": 9.075054800526086e-06, "loss": 13.386, "step": 11550 }, { "epoch": 5.041293126867699, "grad_norm": 28.951597213745117, "learning_rate": 9.070670758439282e-06, "loss": 13.4325, "step": 11600 }, { "epoch": 5.0630263515349085, "grad_norm": 59.380374908447266, "learning_rate": 9.066286716352479e-06, "loss": 13.409, "step": 11650 }, { "epoch": 5.084759576202119, "grad_norm": 56.05976867675781, "learning_rate": 9.061902674265674e-06, "loss": 13.5315, "step": 11700 }, { "epoch": 5.106492800869329, "grad_norm": 36.069583892822266, "learning_rate": 9.05751863217887e-06, "loss": 13.3532, "step": 11750 }, { "epoch": 5.128226025536539, "grad_norm": 39.289833068847656, "learning_rate": 9.053134590092065e-06, "loss": 13.3686, "step": 11800 }, { "epoch": 5.149959250203749, "grad_norm": 37.062931060791016, "learning_rate": 9.048750548005262e-06, "loss": 13.4362, "step": 11850 }, { "epoch": 5.171692474870959, "grad_norm": 56.210750579833984, "learning_rate": 9.044366505918457e-06, "loss": 13.4053, "step": 11900 }, { "epoch": 5.193425699538169, "grad_norm": 26.70563507080078, "learning_rate": 9.039982463831653e-06, "loss": 13.3191, "step": 11950 }, { "epoch": 5.215158924205379, "grad_norm": 39.95426940917969, "learning_rate": 9.03559842174485e-06, "loss": 13.3161, "step": 12000 }, { "epoch": 5.236892148872589, "grad_norm": 29.761014938354492, "learning_rate": 9.031214379658045e-06, "loss": 13.5076, "step": 12050 }, { "epoch": 5.2586253735397985, "grad_norm": 32.707786560058594, "learning_rate": 9.026830337571242e-06, "loss": 13.4865, "step": 12100 }, { "epoch": 5.280358598207009, "grad_norm": 30.934314727783203, "learning_rate": 9.022446295484438e-06, "loss": 13.3348, "step": 12150 }, { "epoch": 5.302091822874219, "grad_norm": 38.97114562988281, "learning_rate": 9.018062253397633e-06, "loss": 13.4458, "step": 12200 }, { "epoch": 5.323825047541429, "grad_norm": 52.749507904052734, "learning_rate": 9.013678211310829e-06, "loss": 13.5492, "step": 12250 }, { "epoch": 5.345558272208639, "grad_norm": 37.54782485961914, "learning_rate": 9.009294169224026e-06, "loss": 13.3087, "step": 12300 }, { "epoch": 5.367291496875849, "grad_norm": 40.16310501098633, "learning_rate": 9.004910127137221e-06, "loss": 13.4691, "step": 12350 }, { "epoch": 5.389024721543059, "grad_norm": 58.52961349487305, "learning_rate": 9.000526085050418e-06, "loss": 13.5101, "step": 12400 }, { "epoch": 5.410757946210269, "grad_norm": 31.150737762451172, "learning_rate": 8.996142042963613e-06, "loss": 13.3933, "step": 12450 }, { "epoch": 5.432491170877479, "grad_norm": 31.380889892578125, "learning_rate": 8.991758000876809e-06, "loss": 13.5171, "step": 12500 }, { "epoch": 5.4542243955446885, "grad_norm": 45.46767044067383, "learning_rate": 8.987373958790006e-06, "loss": 13.4807, "step": 12550 }, { "epoch": 5.475957620211899, "grad_norm": 42.970542907714844, "learning_rate": 8.982989916703201e-06, "loss": 13.4787, "step": 12600 }, { "epoch": 5.497690844879109, "grad_norm": 51.134578704833984, "learning_rate": 8.978605874616397e-06, "loss": 13.4804, "step": 12650 }, { "epoch": 5.519424069546319, "grad_norm": 37.2877082824707, "learning_rate": 8.974221832529592e-06, "loss": 13.5335, "step": 12700 }, { "epoch": 5.541157294213529, "grad_norm": 41.327144622802734, "learning_rate": 8.96983779044279e-06, "loss": 13.5202, "step": 12750 }, { "epoch": 5.562890518880739, "grad_norm": 41.8232421875, "learning_rate": 8.965453748355985e-06, "loss": 13.4273, "step": 12800 }, { "epoch": 5.584623743547949, "grad_norm": 34.09703063964844, "learning_rate": 8.961069706269182e-06, "loss": 13.5441, "step": 12850 }, { "epoch": 5.606356968215159, "grad_norm": 34.51966094970703, "learning_rate": 8.956685664182377e-06, "loss": 13.5746, "step": 12900 }, { "epoch": 5.628090192882369, "grad_norm": 44.580360412597656, "learning_rate": 8.952301622095572e-06, "loss": 13.5962, "step": 12950 }, { "epoch": 5.6498234175495785, "grad_norm": 49.46404266357422, "learning_rate": 8.94791758000877e-06, "loss": 13.5788, "step": 13000 }, { "epoch": 5.671556642216789, "grad_norm": 33.00864028930664, "learning_rate": 8.943533537921965e-06, "loss": 13.4571, "step": 13050 }, { "epoch": 5.693289866883999, "grad_norm": 31.570575714111328, "learning_rate": 8.93914949583516e-06, "loss": 13.512, "step": 13100 }, { "epoch": 5.715023091551209, "grad_norm": 31.16398048400879, "learning_rate": 8.934765453748356e-06, "loss": 13.5388, "step": 13150 }, { "epoch": 5.736756316218419, "grad_norm": 31.840078353881836, "learning_rate": 8.930381411661553e-06, "loss": 13.5697, "step": 13200 }, { "epoch": 5.758489540885629, "grad_norm": 41.02314376831055, "learning_rate": 8.925997369574748e-06, "loss": 13.5952, "step": 13250 }, { "epoch": 5.780222765552839, "grad_norm": 38.16290283203125, "learning_rate": 8.921613327487945e-06, "loss": 13.6544, "step": 13300 }, { "epoch": 5.801955990220049, "grad_norm": 34.18564224243164, "learning_rate": 8.91722928540114e-06, "loss": 13.6995, "step": 13350 }, { "epoch": 5.823689214887259, "grad_norm": 27.264175415039062, "learning_rate": 8.912845243314336e-06, "loss": 13.5539, "step": 13400 }, { "epoch": 5.8454224395544685, "grad_norm": 39.271888732910156, "learning_rate": 8.908461201227533e-06, "loss": 13.6108, "step": 13450 }, { "epoch": 5.867155664221679, "grad_norm": 25.51955223083496, "learning_rate": 8.904077159140728e-06, "loss": 13.5095, "step": 13500 }, { "epoch": 5.888888888888889, "grad_norm": 37.255367279052734, "learning_rate": 8.899693117053926e-06, "loss": 13.5916, "step": 13550 }, { "epoch": 5.910622113556099, "grad_norm": 36.901702880859375, "learning_rate": 8.89530907496712e-06, "loss": 13.5283, "step": 13600 }, { "epoch": 5.932355338223309, "grad_norm": 36.892799377441406, "learning_rate": 8.890925032880316e-06, "loss": 13.6032, "step": 13650 }, { "epoch": 5.954088562890519, "grad_norm": 36.8080940246582, "learning_rate": 8.886540990793512e-06, "loss": 13.7407, "step": 13700 }, { "epoch": 5.975821787557729, "grad_norm": 41.102657318115234, "learning_rate": 8.882156948706709e-06, "loss": 13.5335, "step": 13750 }, { "epoch": 5.997555012224939, "grad_norm": 31.643165588378906, "learning_rate": 8.877772906619904e-06, "loss": 13.6137, "step": 13800 }, { "epoch": 6.0191252377071445, "grad_norm": 35.148006439208984, "learning_rate": 8.8733888645331e-06, "loss": 13.1715, "step": 13850 }, { "epoch": 6.040858462374355, "grad_norm": 34.13616943359375, "learning_rate": 8.869004822446297e-06, "loss": 13.1644, "step": 13900 }, { "epoch": 6.062591687041564, "grad_norm": 43.90581512451172, "learning_rate": 8.864620780359492e-06, "loss": 13.0996, "step": 13950 }, { "epoch": 6.084324911708775, "grad_norm": 36.725379943847656, "learning_rate": 8.860236738272689e-06, "loss": 13.1415, "step": 14000 }, { "epoch": 6.106058136375985, "grad_norm": 32.847129821777344, "learning_rate": 8.855852696185884e-06, "loss": 13.1429, "step": 14050 }, { "epoch": 6.127791361043195, "grad_norm": 27.32487678527832, "learning_rate": 8.85146865409908e-06, "loss": 13.2287, "step": 14100 }, { "epoch": 6.149524585710405, "grad_norm": 38.18893051147461, "learning_rate": 8.847084612012275e-06, "loss": 13.1909, "step": 14150 }, { "epoch": 6.171257810377615, "grad_norm": 29.566404342651367, "learning_rate": 8.842700569925472e-06, "loss": 13.1921, "step": 14200 }, { "epoch": 6.192991035044825, "grad_norm": 27.988677978515625, "learning_rate": 8.838316527838668e-06, "loss": 13.1361, "step": 14250 }, { "epoch": 6.2147242597120345, "grad_norm": 36.260833740234375, "learning_rate": 8.833932485751863e-06, "loss": 13.1959, "step": 14300 }, { "epoch": 6.236457484379245, "grad_norm": 37.56959533691406, "learning_rate": 8.82954844366506e-06, "loss": 13.0934, "step": 14350 }, { "epoch": 6.258190709046454, "grad_norm": 37.16026306152344, "learning_rate": 8.825164401578256e-06, "loss": 13.3774, "step": 14400 }, { "epoch": 6.279923933713665, "grad_norm": 51.96893310546875, "learning_rate": 8.820780359491453e-06, "loss": 13.2566, "step": 14450 }, { "epoch": 6.301657158380875, "grad_norm": 31.46018409729004, "learning_rate": 8.816396317404648e-06, "loss": 13.242, "step": 14500 }, { "epoch": 6.323390383048085, "grad_norm": 40.38423538208008, "learning_rate": 8.812012275317843e-06, "loss": 13.2649, "step": 14550 }, { "epoch": 6.345123607715295, "grad_norm": 33.40611267089844, "learning_rate": 8.807628233231039e-06, "loss": 13.2143, "step": 14600 }, { "epoch": 6.3668568323825045, "grad_norm": 32.6546745300293, "learning_rate": 8.803244191144236e-06, "loss": 13.2293, "step": 14650 }, { "epoch": 6.388590057049715, "grad_norm": 30.99147605895996, "learning_rate": 8.798860149057433e-06, "loss": 13.1595, "step": 14700 }, { "epoch": 6.4103232817169244, "grad_norm": 49.923667907714844, "learning_rate": 8.794476106970627e-06, "loss": 13.1624, "step": 14750 }, { "epoch": 6.432056506384135, "grad_norm": 27.526941299438477, "learning_rate": 8.790092064883824e-06, "loss": 13.245, "step": 14800 }, { "epoch": 6.453789731051344, "grad_norm": 41.09890365600586, "learning_rate": 8.785708022797019e-06, "loss": 13.1945, "step": 14850 }, { "epoch": 6.475522955718555, "grad_norm": 36.0584831237793, "learning_rate": 8.781323980710216e-06, "loss": 13.207, "step": 14900 }, { "epoch": 6.497256180385765, "grad_norm": 30.85024642944336, "learning_rate": 8.776939938623412e-06, "loss": 13.2022, "step": 14950 }, { "epoch": 6.518989405052975, "grad_norm": 34.92485427856445, "learning_rate": 8.772555896536607e-06, "loss": 13.3696, "step": 15000 }, { "epoch": 6.540722629720185, "grad_norm": 33.38056564331055, "learning_rate": 8.768171854449802e-06, "loss": 13.2597, "step": 15050 }, { "epoch": 6.5624558543873945, "grad_norm": 29.834815979003906, "learning_rate": 8.763787812363e-06, "loss": 13.2281, "step": 15100 }, { "epoch": 6.584189079054605, "grad_norm": 30.077539443969727, "learning_rate": 8.759403770276197e-06, "loss": 13.2554, "step": 15150 }, { "epoch": 6.605922303721814, "grad_norm": 43.224586486816406, "learning_rate": 8.755019728189392e-06, "loss": 13.1997, "step": 15200 }, { "epoch": 6.627655528389025, "grad_norm": 48.51641082763672, "learning_rate": 8.750635686102587e-06, "loss": 13.2243, "step": 15250 }, { "epoch": 6.649388753056234, "grad_norm": 29.839174270629883, "learning_rate": 8.746251644015783e-06, "loss": 13.3737, "step": 15300 }, { "epoch": 6.671121977723445, "grad_norm": 44.47172546386719, "learning_rate": 8.74186760192898e-06, "loss": 13.1659, "step": 15350 }, { "epoch": 6.692855202390655, "grad_norm": 27.568334579467773, "learning_rate": 8.737483559842175e-06, "loss": 13.2285, "step": 15400 }, { "epoch": 6.714588427057865, "grad_norm": 31.159231185913086, "learning_rate": 8.73309951775537e-06, "loss": 13.3102, "step": 15450 }, { "epoch": 6.736321651725075, "grad_norm": 30.869430541992188, "learning_rate": 8.728715475668566e-06, "loss": 13.2286, "step": 15500 }, { "epoch": 6.7580548763922845, "grad_norm": 51.48735427856445, "learning_rate": 8.724331433581763e-06, "loss": 13.3347, "step": 15550 }, { "epoch": 6.779788101059495, "grad_norm": 35.06986999511719, "learning_rate": 8.71994739149496e-06, "loss": 13.1973, "step": 15600 }, { "epoch": 6.801521325726704, "grad_norm": 27.670289993286133, "learning_rate": 8.715563349408155e-06, "loss": 13.162, "step": 15650 }, { "epoch": 6.823254550393915, "grad_norm": 34.26895523071289, "learning_rate": 8.711179307321351e-06, "loss": 13.3011, "step": 15700 }, { "epoch": 6.844987775061124, "grad_norm": 41.056182861328125, "learning_rate": 8.706795265234546e-06, "loss": 13.2564, "step": 15750 }, { "epoch": 6.866720999728335, "grad_norm": 47.23772048950195, "learning_rate": 8.702411223147743e-06, "loss": 13.3127, "step": 15800 }, { "epoch": 6.888454224395545, "grad_norm": 65.80028533935547, "learning_rate": 8.698027181060939e-06, "loss": 13.2797, "step": 15850 }, { "epoch": 6.910187449062755, "grad_norm": 40.93989562988281, "learning_rate": 8.693643138974134e-06, "loss": 13.3707, "step": 15900 }, { "epoch": 6.931920673729965, "grad_norm": 83.51680755615234, "learning_rate": 8.689259096887331e-06, "loss": 13.217, "step": 15950 }, { "epoch": 6.9536538983971745, "grad_norm": 32.16157150268555, "learning_rate": 8.684875054800527e-06, "loss": 13.274, "step": 16000 }, { "epoch": 6.975387123064385, "grad_norm": 31.57478904724121, "learning_rate": 8.680491012713724e-06, "loss": 13.2057, "step": 16050 }, { "epoch": 6.997120347731594, "grad_norm": 37.837303161621094, "learning_rate": 8.676106970626919e-06, "loss": 13.262, "step": 16100 }, { "epoch": 7.0186905732138, "grad_norm": 24.430326461791992, "learning_rate": 8.671722928540114e-06, "loss": 12.9144, "step": 16150 }, { "epoch": 7.040423797881011, "grad_norm": 45.298194885253906, "learning_rate": 8.66733888645331e-06, "loss": 12.8617, "step": 16200 }, { "epoch": 7.06215702254822, "grad_norm": 52.39512252807617, "learning_rate": 8.662954844366507e-06, "loss": 12.9514, "step": 16250 }, { "epoch": 7.083890247215431, "grad_norm": 35.9492073059082, "learning_rate": 8.658570802279702e-06, "loss": 12.9577, "step": 16300 }, { "epoch": 7.105623471882641, "grad_norm": 31.363454818725586, "learning_rate": 8.6541867601929e-06, "loss": 12.9849, "step": 16350 }, { "epoch": 7.1273566965498505, "grad_norm": 24.993553161621094, "learning_rate": 8.649802718106095e-06, "loss": 12.9269, "step": 16400 }, { "epoch": 7.149089921217061, "grad_norm": 28.327381134033203, "learning_rate": 8.64541867601929e-06, "loss": 12.941, "step": 16450 }, { "epoch": 7.17082314588427, "grad_norm": 30.908496856689453, "learning_rate": 8.641034633932487e-06, "loss": 13.0525, "step": 16500 }, { "epoch": 7.192556370551481, "grad_norm": 41.53740310668945, "learning_rate": 8.636650591845683e-06, "loss": 13.0038, "step": 16550 }, { "epoch": 7.21428959521869, "grad_norm": 34.16611862182617, "learning_rate": 8.632266549758878e-06, "loss": 12.9893, "step": 16600 }, { "epoch": 7.236022819885901, "grad_norm": 28.183107376098633, "learning_rate": 8.627882507672073e-06, "loss": 13.0103, "step": 16650 }, { "epoch": 7.25775604455311, "grad_norm": 28.345674514770508, "learning_rate": 8.62349846558527e-06, "loss": 12.9886, "step": 16700 }, { "epoch": 7.279489269220321, "grad_norm": 36.2637825012207, "learning_rate": 8.619114423498466e-06, "loss": 12.9905, "step": 16750 }, { "epoch": 7.301222493887531, "grad_norm": 32.89162826538086, "learning_rate": 8.614730381411663e-06, "loss": 12.9033, "step": 16800 }, { "epoch": 7.3229557185547405, "grad_norm": 31.151569366455078, "learning_rate": 8.610346339324858e-06, "loss": 13.026, "step": 16850 }, { "epoch": 7.344688943221951, "grad_norm": 32.4716682434082, "learning_rate": 8.605962297238054e-06, "loss": 12.9932, "step": 16900 }, { "epoch": 7.36642216788916, "grad_norm": 28.446046829223633, "learning_rate": 8.60157825515125e-06, "loss": 13.0201, "step": 16950 }, { "epoch": 7.388155392556371, "grad_norm": 27.000221252441406, "learning_rate": 8.597194213064446e-06, "loss": 13.0463, "step": 17000 }, { "epoch": 7.40988861722358, "grad_norm": 35.49698257446289, "learning_rate": 8.592810170977642e-06, "loss": 12.9461, "step": 17050 }, { "epoch": 7.431621841890791, "grad_norm": 48.70148849487305, "learning_rate": 8.588426128890837e-06, "loss": 13.0921, "step": 17100 }, { "epoch": 7.453355066558, "grad_norm": 28.99524688720703, "learning_rate": 8.584042086804034e-06, "loss": 12.9729, "step": 17150 }, { "epoch": 7.475088291225211, "grad_norm": 28.51788902282715, "learning_rate": 8.57965804471723e-06, "loss": 13.0311, "step": 17200 }, { "epoch": 7.496821515892421, "grad_norm": 48.5558967590332, "learning_rate": 8.575274002630427e-06, "loss": 13.0931, "step": 17250 }, { "epoch": 7.5185547405596305, "grad_norm": 35.883365631103516, "learning_rate": 8.570889960543622e-06, "loss": 13.0601, "step": 17300 }, { "epoch": 7.540287965226841, "grad_norm": 30.609474182128906, "learning_rate": 8.566505918456817e-06, "loss": 13.0277, "step": 17350 }, { "epoch": 7.56202118989405, "grad_norm": 31.2172794342041, "learning_rate": 8.562121876370014e-06, "loss": 12.9501, "step": 17400 }, { "epoch": 7.583754414561261, "grad_norm": 42.7708740234375, "learning_rate": 8.55773783428321e-06, "loss": 13.0667, "step": 17450 }, { "epoch": 7.60548763922847, "grad_norm": 30.39897346496582, "learning_rate": 8.553353792196407e-06, "loss": 13.0591, "step": 17500 }, { "epoch": 7.627220863895681, "grad_norm": 26.951528549194336, "learning_rate": 8.548969750109602e-06, "loss": 12.9949, "step": 17550 }, { "epoch": 7.64895408856289, "grad_norm": 33.658206939697266, "learning_rate": 8.544585708022798e-06, "loss": 13.0532, "step": 17600 }, { "epoch": 7.670687313230101, "grad_norm": 34.114768981933594, "learning_rate": 8.540201665935993e-06, "loss": 13.1035, "step": 17650 }, { "epoch": 7.692420537897311, "grad_norm": 29.691999435424805, "learning_rate": 8.53581762384919e-06, "loss": 13.0645, "step": 17700 }, { "epoch": 7.7141537625645205, "grad_norm": 39.269493103027344, "learning_rate": 8.531433581762385e-06, "loss": 13.112, "step": 17750 }, { "epoch": 7.735886987231731, "grad_norm": 37.816837310791016, "learning_rate": 8.527049539675581e-06, "loss": 13.0634, "step": 17800 }, { "epoch": 7.75762021189894, "grad_norm": 36.515132904052734, "learning_rate": 8.522665497588778e-06, "loss": 13.0395, "step": 17850 }, { "epoch": 7.779353436566151, "grad_norm": 22.76226043701172, "learning_rate": 8.518281455501973e-06, "loss": 13.0559, "step": 17900 }, { "epoch": 7.80108666123336, "grad_norm": 28.64872169494629, "learning_rate": 8.51389741341517e-06, "loss": 13.0638, "step": 17950 }, { "epoch": 7.822819885900571, "grad_norm": 41.4809684753418, "learning_rate": 8.509513371328366e-06, "loss": 13.0299, "step": 18000 }, { "epoch": 7.84455311056778, "grad_norm": 25.84028434753418, "learning_rate": 8.505129329241561e-06, "loss": 13.0003, "step": 18050 }, { "epoch": 7.8662863352349905, "grad_norm": 36.24126434326172, "learning_rate": 8.500745287154757e-06, "loss": 13.0231, "step": 18100 }, { "epoch": 7.888019559902201, "grad_norm": 19.62076187133789, "learning_rate": 8.496361245067954e-06, "loss": 13.006, "step": 18150 }, { "epoch": 7.9097527845694104, "grad_norm": 28.422643661499023, "learning_rate": 8.491977202981149e-06, "loss": 12.9981, "step": 18200 }, { "epoch": 7.931486009236621, "grad_norm": 36.77701187133789, "learning_rate": 8.487593160894344e-06, "loss": 13.1429, "step": 18250 }, { "epoch": 7.95321923390383, "grad_norm": 36.51480484008789, "learning_rate": 8.483209118807542e-06, "loss": 12.9689, "step": 18300 }, { "epoch": 7.974952458571041, "grad_norm": 30.303489685058594, "learning_rate": 8.478825076720737e-06, "loss": 13.0392, "step": 18350 }, { "epoch": 7.99668568323825, "grad_norm": 41.148353576660156, "learning_rate": 8.474441034633934e-06, "loss": 13.0697, "step": 18400 }, { "epoch": 8.018255908720457, "grad_norm": 30.144062042236328, "learning_rate": 8.47005699254713e-06, "loss": 12.7092, "step": 18450 }, { "epoch": 8.039989133387666, "grad_norm": 33.70432662963867, "learning_rate": 8.465672950460325e-06, "loss": 12.8033, "step": 18500 }, { "epoch": 8.061722358054876, "grad_norm": 25.66695785522461, "learning_rate": 8.46128890837352e-06, "loss": 12.7704, "step": 18550 }, { "epoch": 8.083455582722086, "grad_norm": 38.33973693847656, "learning_rate": 8.456904866286717e-06, "loss": 12.8512, "step": 18600 }, { "epoch": 8.105188807389297, "grad_norm": 25.794679641723633, "learning_rate": 8.452520824199914e-06, "loss": 12.7138, "step": 18650 }, { "epoch": 8.126922032056507, "grad_norm": 39.2582893371582, "learning_rate": 8.44813678211311e-06, "loss": 12.7657, "step": 18700 }, { "epoch": 8.148655256723716, "grad_norm": 30.886682510375977, "learning_rate": 8.443752740026305e-06, "loss": 12.7667, "step": 18750 }, { "epoch": 8.170388481390926, "grad_norm": 39.30559158325195, "learning_rate": 8.4393686979395e-06, "loss": 12.7548, "step": 18800 }, { "epoch": 8.192121706058137, "grad_norm": 22.945003509521484, "learning_rate": 8.434984655852698e-06, "loss": 12.8788, "step": 18850 }, { "epoch": 8.213854930725347, "grad_norm": 30.998369216918945, "learning_rate": 8.430600613765893e-06, "loss": 12.8048, "step": 18900 }, { "epoch": 8.235588155392556, "grad_norm": 29.44565773010254, "learning_rate": 8.426216571679088e-06, "loss": 12.7907, "step": 18950 }, { "epoch": 8.257321380059766, "grad_norm": 29.368488311767578, "learning_rate": 8.421832529592284e-06, "loss": 12.8157, "step": 19000 }, { "epoch": 8.279054604726976, "grad_norm": 28.4185791015625, "learning_rate": 8.41744848750548e-06, "loss": 12.8382, "step": 19050 }, { "epoch": 8.300787829394187, "grad_norm": 45.91888427734375, "learning_rate": 8.413064445418678e-06, "loss": 12.9013, "step": 19100 }, { "epoch": 8.322521054061397, "grad_norm": 36.90361022949219, "learning_rate": 8.408680403331873e-06, "loss": 12.8076, "step": 19150 }, { "epoch": 8.344254278728606, "grad_norm": 54.692935943603516, "learning_rate": 8.404296361245069e-06, "loss": 12.8288, "step": 19200 }, { "epoch": 8.365987503395816, "grad_norm": 27.947093963623047, "learning_rate": 8.399912319158264e-06, "loss": 12.8577, "step": 19250 }, { "epoch": 8.387720728063027, "grad_norm": 28.992555618286133, "learning_rate": 8.395528277071461e-06, "loss": 12.882, "step": 19300 }, { "epoch": 8.409453952730237, "grad_norm": 22.34044647216797, "learning_rate": 8.391144234984656e-06, "loss": 12.8171, "step": 19350 }, { "epoch": 8.431187177397446, "grad_norm": 50.96314239501953, "learning_rate": 8.386760192897852e-06, "loss": 12.8761, "step": 19400 }, { "epoch": 8.452920402064656, "grad_norm": Infinity, "learning_rate": 8.382376150811047e-06, "loss": 12.8613, "step": 19450 }, { "epoch": 8.474653626731866, "grad_norm": 25.97089195251465, "learning_rate": 8.377992108724244e-06, "loss": 12.8875, "step": 19500 }, { "epoch": 8.496386851399077, "grad_norm": 30.094532012939453, "learning_rate": 8.373608066637441e-06, "loss": 12.8784, "step": 19550 }, { "epoch": 8.518120076066285, "grad_norm": 37.806156158447266, "learning_rate": 8.369224024550637e-06, "loss": 12.8339, "step": 19600 }, { "epoch": 8.539853300733496, "grad_norm": 38.92607498168945, "learning_rate": 8.364839982463832e-06, "loss": 12.8541, "step": 19650 }, { "epoch": 8.561586525400706, "grad_norm": 31.54934310913086, "learning_rate": 8.360455940377028e-06, "loss": 12.8991, "step": 19700 }, { "epoch": 8.583319750067917, "grad_norm": 37.04362869262695, "learning_rate": 8.356071898290225e-06, "loss": 12.9116, "step": 19750 }, { "epoch": 8.605052974735127, "grad_norm": 38.93299865722656, "learning_rate": 8.35168785620342e-06, "loss": 12.8499, "step": 19800 }, { "epoch": 8.626786199402336, "grad_norm": 28.214290618896484, "learning_rate": 8.347303814116615e-06, "loss": 12.8512, "step": 19850 }, { "epoch": 8.648519424069546, "grad_norm": 27.576839447021484, "learning_rate": 8.34291977202981e-06, "loss": 12.8824, "step": 19900 }, { "epoch": 8.670252648736756, "grad_norm": 25.321149826049805, "learning_rate": 8.338535729943008e-06, "loss": 12.842, "step": 19950 }, { "epoch": 8.691985873403967, "grad_norm": 36.43674087524414, "learning_rate": 8.334151687856205e-06, "loss": 12.9156, "step": 20000 }, { "epoch": 8.691985873403967, "eval_cer": 0.07732709565131522, "eval_loss": 2.3227267265319824, "eval_runtime": 401.1503, "eval_samples_per_second": 13.476, "eval_steps_per_second": 3.37, "eval_wer": 0.23097817553776104, "step": 20000 }, { "epoch": 8.713719098071177, "grad_norm": 30.650314331054688, "learning_rate": 8.3297676457694e-06, "loss": 12.9089, "step": 20050 }, { "epoch": 8.735452322738386, "grad_norm": 27.448633193969727, "learning_rate": 8.325383603682596e-06, "loss": 12.8572, "step": 20100 }, { "epoch": 8.757185547405596, "grad_norm": 25.665332794189453, "learning_rate": 8.320999561595791e-06, "loss": 12.8087, "step": 20150 }, { "epoch": 8.778918772072807, "grad_norm": 43.74554443359375, "learning_rate": 8.316615519508988e-06, "loss": 12.915, "step": 20200 }, { "epoch": 8.800651996740017, "grad_norm": 31.74461555480957, "learning_rate": 8.312231477422184e-06, "loss": 12.8676, "step": 20250 }, { "epoch": 8.822385221407226, "grad_norm": 28.51342010498047, "learning_rate": 8.30784743533538e-06, "loss": 12.8645, "step": 20300 }, { "epoch": 8.844118446074436, "grad_norm": 27.660497665405273, "learning_rate": 8.303463393248576e-06, "loss": 12.9217, "step": 20350 }, { "epoch": 8.865851670741646, "grad_norm": 41.046485900878906, "learning_rate": 8.299079351161771e-06, "loss": 12.8472, "step": 20400 }, { "epoch": 8.887584895408857, "grad_norm": 50.21107482910156, "learning_rate": 8.294695309074969e-06, "loss": 12.8141, "step": 20450 }, { "epoch": 8.909318120076065, "grad_norm": 42.08512878417969, "learning_rate": 8.290311266988164e-06, "loss": 12.9162, "step": 20500 }, { "epoch": 8.931051344743276, "grad_norm": 22.199024200439453, "learning_rate": 8.28592722490136e-06, "loss": 12.8649, "step": 20550 }, { "epoch": 8.952784569410486, "grad_norm": 38.15290451049805, "learning_rate": 8.281543182814555e-06, "loss": 12.8547, "step": 20600 }, { "epoch": 8.974517794077697, "grad_norm": 35.076698303222656, "learning_rate": 8.277159140727752e-06, "loss": 12.8954, "step": 20650 }, { "epoch": 8.996251018744907, "grad_norm": 26.742168426513672, "learning_rate": 8.272775098640947e-06, "loss": 12.8845, "step": 20700 }, { "epoch": 9.017821244227113, "grad_norm": 18.43798828125, "learning_rate": 8.268391056554144e-06, "loss": 12.6111, "step": 20750 }, { "epoch": 9.039554468894321, "grad_norm": 22.483016967773438, "learning_rate": 8.26400701446734e-06, "loss": 12.6938, "step": 20800 }, { "epoch": 9.061287693561532, "grad_norm": 22.414525985717773, "learning_rate": 8.259622972380535e-06, "loss": 12.6499, "step": 20850 }, { "epoch": 9.083020918228742, "grad_norm": 33.88186264038086, "learning_rate": 8.255238930293732e-06, "loss": 12.5987, "step": 20900 }, { "epoch": 9.104754142895953, "grad_norm": 34.6947021484375, "learning_rate": 8.250854888206928e-06, "loss": 12.6804, "step": 20950 }, { "epoch": 9.126487367563163, "grad_norm": 22.22621726989746, "learning_rate": 8.246470846120123e-06, "loss": 12.7388, "step": 21000 }, { "epoch": 9.148220592230372, "grad_norm": 30.4085693359375, "learning_rate": 8.242086804033318e-06, "loss": 12.7085, "step": 21050 }, { "epoch": 9.169953816897582, "grad_norm": 131.27008056640625, "learning_rate": 8.237702761946515e-06, "loss": 12.7142, "step": 21100 }, { "epoch": 9.191687041564792, "grad_norm": 28.05132293701172, "learning_rate": 8.23331871985971e-06, "loss": 12.698, "step": 21150 }, { "epoch": 9.213420266232003, "grad_norm": 157.52548217773438, "learning_rate": 8.228934677772908e-06, "loss": 12.7275, "step": 21200 }, { "epoch": 9.235153490899211, "grad_norm": 29.362707138061523, "learning_rate": 8.224550635686103e-06, "loss": 12.648, "step": 21250 }, { "epoch": 9.256886715566422, "grad_norm": 27.221683502197266, "learning_rate": 8.220166593599299e-06, "loss": 12.7306, "step": 21300 }, { "epoch": 9.278619940233632, "grad_norm": 18.6680850982666, "learning_rate": 8.215782551512496e-06, "loss": 12.6896, "step": 21350 }, { "epoch": 9.300353164900843, "grad_norm": 35.81766128540039, "learning_rate": 8.211398509425691e-06, "loss": 12.6838, "step": 21400 }, { "epoch": 9.322086389568053, "grad_norm": 24.64043426513672, "learning_rate": 8.207014467338888e-06, "loss": 12.7201, "step": 21450 }, { "epoch": 9.343819614235262, "grad_norm": 41.39848327636719, "learning_rate": 8.202630425252084e-06, "loss": 12.7289, "step": 21500 }, { "epoch": 9.365552838902472, "grad_norm": 23.982431411743164, "learning_rate": 8.198246383165279e-06, "loss": 12.7145, "step": 21550 }, { "epoch": 9.387286063569682, "grad_norm": 25.513904571533203, "learning_rate": 8.193862341078474e-06, "loss": 12.6646, "step": 21600 }, { "epoch": 9.409019288236893, "grad_norm": 28.16943359375, "learning_rate": 8.189478298991671e-06, "loss": 12.7157, "step": 21650 }, { "epoch": 9.430752512904101, "grad_norm": 31.33350944519043, "learning_rate": 8.185094256904867e-06, "loss": 12.7245, "step": 21700 }, { "epoch": 9.452485737571312, "grad_norm": 22.30205726623535, "learning_rate": 8.180710214818062e-06, "loss": 12.7082, "step": 21750 }, { "epoch": 9.474218962238522, "grad_norm": 31.175230026245117, "learning_rate": 8.17632617273126e-06, "loss": 12.7716, "step": 21800 }, { "epoch": 9.495952186905733, "grad_norm": 24.61014747619629, "learning_rate": 8.171942130644455e-06, "loss": 12.7153, "step": 21850 }, { "epoch": 9.517685411572941, "grad_norm": 37.26193618774414, "learning_rate": 8.167558088557652e-06, "loss": 12.7623, "step": 21900 }, { "epoch": 9.539418636240152, "grad_norm": 29.6248779296875, "learning_rate": 8.163174046470847e-06, "loss": 12.7862, "step": 21950 }, { "epoch": 9.561151860907362, "grad_norm": 37.52980422973633, "learning_rate": 8.158790004384042e-06, "loss": 12.6912, "step": 22000 }, { "epoch": 9.582885085574572, "grad_norm": 35.345035552978516, "learning_rate": 8.154405962297238e-06, "loss": 12.677, "step": 22050 }, { "epoch": 9.604618310241783, "grad_norm": 32.45883560180664, "learning_rate": 8.150021920210435e-06, "loss": 12.6662, "step": 22100 }, { "epoch": 9.626351534908991, "grad_norm": 46.35236358642578, "learning_rate": 8.14563787812363e-06, "loss": 12.7472, "step": 22150 }, { "epoch": 9.648084759576202, "grad_norm": 26.202049255371094, "learning_rate": 8.141253836036826e-06, "loss": 12.7174, "step": 22200 }, { "epoch": 9.669817984243412, "grad_norm": 27.350576400756836, "learning_rate": 8.136869793950023e-06, "loss": 12.6917, "step": 22250 }, { "epoch": 9.691551208910623, "grad_norm": 32.96540451049805, "learning_rate": 8.132485751863218e-06, "loss": 12.7865, "step": 22300 }, { "epoch": 9.713284433577833, "grad_norm": 33.34325408935547, "learning_rate": 8.128101709776415e-06, "loss": 12.8177, "step": 22350 }, { "epoch": 9.735017658245042, "grad_norm": 24.0529727935791, "learning_rate": 8.12371766768961e-06, "loss": 12.7816, "step": 22400 }, { "epoch": 9.756750882912252, "grad_norm": 31.504335403442383, "learning_rate": 8.119333625602806e-06, "loss": 12.7014, "step": 22450 }, { "epoch": 9.778484107579462, "grad_norm": 37.35165023803711, "learning_rate": 8.114949583516001e-06, "loss": 12.6674, "step": 22500 }, { "epoch": 9.800217332246673, "grad_norm": 22.923002243041992, "learning_rate": 8.110565541429199e-06, "loss": 12.7619, "step": 22550 }, { "epoch": 9.821950556913881, "grad_norm": 29.871366500854492, "learning_rate": 8.106181499342396e-06, "loss": 12.7368, "step": 22600 }, { "epoch": 9.843683781581092, "grad_norm": 40.105369567871094, "learning_rate": 8.101797457255591e-06, "loss": 12.6962, "step": 22650 }, { "epoch": 9.865417006248302, "grad_norm": 25.92096710205078, "learning_rate": 8.097413415168786e-06, "loss": 12.686, "step": 22700 }, { "epoch": 9.887150230915513, "grad_norm": 42.663368225097656, "learning_rate": 8.093029373081982e-06, "loss": 12.7663, "step": 22750 }, { "epoch": 9.908883455582721, "grad_norm": 30.958925247192383, "learning_rate": 8.088645330995179e-06, "loss": 12.7574, "step": 22800 }, { "epoch": 9.930616680249932, "grad_norm": 32.973209381103516, "learning_rate": 8.084261288908374e-06, "loss": 12.7376, "step": 22850 }, { "epoch": 9.952349904917142, "grad_norm": 24.848648071289062, "learning_rate": 8.07987724682157e-06, "loss": 12.7988, "step": 22900 }, { "epoch": 9.974083129584352, "grad_norm": 38.90625762939453, "learning_rate": 8.075493204734765e-06, "loss": 12.7848, "step": 22950 }, { "epoch": 9.995816354251563, "grad_norm": 169.55076599121094, "learning_rate": 8.071109162647962e-06, "loss": 12.7591, "step": 23000 }, { "epoch": 10.017386579733769, "grad_norm": 25.580976486206055, "learning_rate": 8.06672512056116e-06, "loss": 12.4225, "step": 23050 }, { "epoch": 10.039119804400977, "grad_norm": 35.71001434326172, "learning_rate": 8.062341078474355e-06, "loss": 12.6339, "step": 23100 }, { "epoch": 10.060853029068188, "grad_norm": 27.853500366210938, "learning_rate": 8.05795703638755e-06, "loss": 12.5467, "step": 23150 }, { "epoch": 10.082586253735398, "grad_norm": 25.689022064208984, "learning_rate": 8.053572994300745e-06, "loss": 12.6073, "step": 23200 }, { "epoch": 10.104319478402608, "grad_norm": 19.449281692504883, "learning_rate": 8.049188952213942e-06, "loss": 12.65, "step": 23250 }, { "epoch": 10.126052703069817, "grad_norm": 50.91756820678711, "learning_rate": 8.044804910127138e-06, "loss": 12.6245, "step": 23300 }, { "epoch": 10.147785927737027, "grad_norm": 30.20039939880371, "learning_rate": 8.040420868040333e-06, "loss": 12.5309, "step": 23350 }, { "epoch": 10.169519152404238, "grad_norm": 19.78704071044922, "learning_rate": 8.036036825953529e-06, "loss": 12.5593, "step": 23400 }, { "epoch": 10.191252377071448, "grad_norm": 19.870885848999023, "learning_rate": 8.031652783866726e-06, "loss": 12.5285, "step": 23450 }, { "epoch": 10.212985601738659, "grad_norm": 28.326723098754883, "learning_rate": 8.027268741779923e-06, "loss": 12.5193, "step": 23500 }, { "epoch": 10.234718826405867, "grad_norm": 27.501436233520508, "learning_rate": 8.022884699693118e-06, "loss": 12.5663, "step": 23550 }, { "epoch": 10.256452051073078, "grad_norm": 28.51038932800293, "learning_rate": 8.018500657606314e-06, "loss": 12.6105, "step": 23600 }, { "epoch": 10.278185275740288, "grad_norm": 38.11888885498047, "learning_rate": 8.014116615519509e-06, "loss": 12.6369, "step": 23650 }, { "epoch": 10.299918500407498, "grad_norm": 56.63121032714844, "learning_rate": 8.009732573432706e-06, "loss": 12.5986, "step": 23700 }, { "epoch": 10.321651725074709, "grad_norm": 30.95232582092285, "learning_rate": 8.005348531345901e-06, "loss": 12.6104, "step": 23750 }, { "epoch": 10.343384949741917, "grad_norm": 46.855831146240234, "learning_rate": 8.000964489259098e-06, "loss": 12.6277, "step": 23800 }, { "epoch": 10.365118174409128, "grad_norm": 38.9176139831543, "learning_rate": 7.996580447172292e-06, "loss": 12.5793, "step": 23850 }, { "epoch": 10.386851399076338, "grad_norm": 20.209339141845703, "learning_rate": 7.99219640508549e-06, "loss": 12.5781, "step": 23900 }, { "epoch": 10.408584623743549, "grad_norm": 34.40525817871094, "learning_rate": 7.987812362998686e-06, "loss": 12.6018, "step": 23950 }, { "epoch": 10.430317848410757, "grad_norm": 44.757041931152344, "learning_rate": 7.983428320911882e-06, "loss": 12.6543, "step": 24000 }, { "epoch": 10.452051073077968, "grad_norm": 40.83699035644531, "learning_rate": 7.979044278825077e-06, "loss": 12.6135, "step": 24050 }, { "epoch": 10.473784297745178, "grad_norm": 31.089038848876953, "learning_rate": 7.974660236738272e-06, "loss": 12.6269, "step": 24100 }, { "epoch": 10.495517522412388, "grad_norm": 33.82300567626953, "learning_rate": 7.97027619465147e-06, "loss": 12.622, "step": 24150 }, { "epoch": 10.517250747079597, "grad_norm": 25.88127899169922, "learning_rate": 7.965892152564665e-06, "loss": 12.6332, "step": 24200 }, { "epoch": 10.538983971746807, "grad_norm": 29.95918083190918, "learning_rate": 7.961508110477862e-06, "loss": 12.6166, "step": 24250 }, { "epoch": 10.560717196414018, "grad_norm": 34.399444580078125, "learning_rate": 7.957124068391057e-06, "loss": 12.5997, "step": 24300 }, { "epoch": 10.582450421081228, "grad_norm": 26.007383346557617, "learning_rate": 7.952740026304253e-06, "loss": 12.5829, "step": 24350 }, { "epoch": 10.604183645748439, "grad_norm": 14.864594459533691, "learning_rate": 7.94835598421745e-06, "loss": 12.6532, "step": 24400 }, { "epoch": 10.625916870415647, "grad_norm": 31.178630828857422, "learning_rate": 7.943971942130645e-06, "loss": 12.5909, "step": 24450 }, { "epoch": 10.647650095082858, "grad_norm": 31.065549850463867, "learning_rate": 7.93958790004384e-06, "loss": 12.5674, "step": 24500 }, { "epoch": 10.669383319750068, "grad_norm": 28.21125030517578, "learning_rate": 7.935203857957036e-06, "loss": 12.6476, "step": 24550 }, { "epoch": 10.691116544417278, "grad_norm": 31.474586486816406, "learning_rate": 7.930819815870233e-06, "loss": 12.5938, "step": 24600 }, { "epoch": 10.712849769084489, "grad_norm": 26.097501754760742, "learning_rate": 7.926435773783428e-06, "loss": 12.6483, "step": 24650 }, { "epoch": 10.734582993751697, "grad_norm": 40.45956039428711, "learning_rate": 7.922051731696626e-06, "loss": 12.6591, "step": 24700 }, { "epoch": 10.756316218418908, "grad_norm": 23.737592697143555, "learning_rate": 7.917667689609821e-06, "loss": 12.5698, "step": 24750 }, { "epoch": 10.778049443086118, "grad_norm": 32.13654708862305, "learning_rate": 7.913283647523016e-06, "loss": 12.5617, "step": 24800 }, { "epoch": 10.799782667753329, "grad_norm": 28.451892852783203, "learning_rate": 7.908899605436213e-06, "loss": 12.6304, "step": 24850 }, { "epoch": 10.821515892420537, "grad_norm": 37.13362121582031, "learning_rate": 7.904515563349409e-06, "loss": 12.6649, "step": 24900 }, { "epoch": 10.843249117087748, "grad_norm": 45.161277770996094, "learning_rate": 7.900131521262606e-06, "loss": 12.6335, "step": 24950 }, { "epoch": 10.864982341754958, "grad_norm": 25.36030387878418, "learning_rate": 7.8957474791758e-06, "loss": 12.7371, "step": 25000 }, { "epoch": 10.886715566422168, "grad_norm": 38.44227981567383, "learning_rate": 7.891363437088997e-06, "loss": 12.6187, "step": 25050 }, { "epoch": 10.908448791089377, "grad_norm": 46.692874908447266, "learning_rate": 7.886979395002192e-06, "loss": 12.6517, "step": 25100 }, { "epoch": 10.930182015756587, "grad_norm": 28.845399856567383, "learning_rate": 7.882595352915389e-06, "loss": 12.5677, "step": 25150 }, { "epoch": 10.951915240423798, "grad_norm": 31.64191436767578, "learning_rate": 7.878211310828585e-06, "loss": 12.6347, "step": 25200 }, { "epoch": 10.973648465091008, "grad_norm": 32.57988357543945, "learning_rate": 7.87382726874178e-06, "loss": 12.5652, "step": 25250 }, { "epoch": 10.995381689758219, "grad_norm": 28.151342391967773, "learning_rate": 7.869443226654977e-06, "loss": 12.5981, "step": 25300 }, { "epoch": 11.016951915240424, "grad_norm": 29.2868595123291, "learning_rate": 7.865059184568172e-06, "loss": 12.4366, "step": 25350 }, { "epoch": 11.038685139907633, "grad_norm": 31.722579956054688, "learning_rate": 7.86067514248137e-06, "loss": 12.4879, "step": 25400 }, { "epoch": 11.060418364574844, "grad_norm": 29.232097625732422, "learning_rate": 7.856291100394565e-06, "loss": 12.4597, "step": 25450 }, { "epoch": 11.082151589242054, "grad_norm": 18.49676513671875, "learning_rate": 7.85190705830776e-06, "loss": 12.4631, "step": 25500 }, { "epoch": 11.103884813909264, "grad_norm": 27.89682388305664, "learning_rate": 7.847523016220956e-06, "loss": 12.4507, "step": 25550 }, { "epoch": 11.125618038576473, "grad_norm": 30.45709800720215, "learning_rate": 7.843138974134153e-06, "loss": 12.5008, "step": 25600 }, { "epoch": 11.147351263243683, "grad_norm": 62.570823669433594, "learning_rate": 7.838754932047348e-06, "loss": 12.5107, "step": 25650 }, { "epoch": 11.169084487910894, "grad_norm": 24.397315979003906, "learning_rate": 7.834370889960543e-06, "loss": 12.5059, "step": 25700 }, { "epoch": 11.190817712578104, "grad_norm": 18.074167251586914, "learning_rate": 7.82998684787374e-06, "loss": 12.5071, "step": 25750 }, { "epoch": 11.212550937245314, "grad_norm": 20.450908660888672, "learning_rate": 7.825602805786936e-06, "loss": 12.5048, "step": 25800 }, { "epoch": 11.234284161912523, "grad_norm": 19.00213623046875, "learning_rate": 7.821218763700133e-06, "loss": 12.4887, "step": 25850 }, { "epoch": 11.256017386579733, "grad_norm": 23.276472091674805, "learning_rate": 7.816834721613328e-06, "loss": 12.5311, "step": 25900 }, { "epoch": 11.277750611246944, "grad_norm": 33.67416763305664, "learning_rate": 7.812450679526524e-06, "loss": 12.5503, "step": 25950 }, { "epoch": 11.299483835914154, "grad_norm": 17.561626434326172, "learning_rate": 7.80806663743972e-06, "loss": 12.4831, "step": 26000 }, { "epoch": 11.321217060581363, "grad_norm": 24.35294532775879, "learning_rate": 7.803682595352916e-06, "loss": 12.4869, "step": 26050 }, { "epoch": 11.342950285248573, "grad_norm": 16.80247688293457, "learning_rate": 7.799298553266113e-06, "loss": 12.5581, "step": 26100 }, { "epoch": 11.364683509915784, "grad_norm": 22.540014266967773, "learning_rate": 7.794914511179307e-06, "loss": 12.5552, "step": 26150 }, { "epoch": 11.386416734582994, "grad_norm": 23.270639419555664, "learning_rate": 7.790530469092504e-06, "loss": 12.5005, "step": 26200 }, { "epoch": 11.408149959250204, "grad_norm": 27.789560317993164, "learning_rate": 7.7861464270057e-06, "loss": 12.5405, "step": 26250 }, { "epoch": 11.429883183917413, "grad_norm": 24.1334285736084, "learning_rate": 7.781762384918897e-06, "loss": 12.5056, "step": 26300 }, { "epoch": 11.451616408584623, "grad_norm": 35.342288970947266, "learning_rate": 7.777378342832092e-06, "loss": 12.501, "step": 26350 }, { "epoch": 11.473349633251834, "grad_norm": 27.646997451782227, "learning_rate": 7.772994300745287e-06, "loss": 12.4571, "step": 26400 }, { "epoch": 11.495082857919044, "grad_norm": 43.06098937988281, "learning_rate": 7.768610258658483e-06, "loss": 12.5856, "step": 26450 }, { "epoch": 11.516816082586253, "grad_norm": 21.487150192260742, "learning_rate": 7.76422621657168e-06, "loss": 12.4849, "step": 26500 }, { "epoch": 11.538549307253463, "grad_norm": 21.75229835510254, "learning_rate": 7.759842174484877e-06, "loss": 12.5192, "step": 26550 }, { "epoch": 11.560282531920674, "grad_norm": 23.02396011352539, "learning_rate": 7.755458132398072e-06, "loss": 12.5011, "step": 26600 }, { "epoch": 11.582015756587884, "grad_norm": 21.738445281982422, "learning_rate": 7.751074090311268e-06, "loss": 12.5525, "step": 26650 }, { "epoch": 11.603748981255094, "grad_norm": 38.93478775024414, "learning_rate": 7.746690048224463e-06, "loss": 12.4925, "step": 26700 }, { "epoch": 11.625482205922303, "grad_norm": 30.070697784423828, "learning_rate": 7.74230600613766e-06, "loss": 12.5598, "step": 26750 }, { "epoch": 11.647215430589513, "grad_norm": 44.55253982543945, "learning_rate": 7.737921964050856e-06, "loss": 12.4896, "step": 26800 }, { "epoch": 11.668948655256724, "grad_norm": 23.052288055419922, "learning_rate": 7.733537921964051e-06, "loss": 12.5198, "step": 26850 }, { "epoch": 11.690681879923934, "grad_norm": 24.383729934692383, "learning_rate": 7.729153879877246e-06, "loss": 12.5321, "step": 26900 }, { "epoch": 11.712415104591145, "grad_norm": 23.777788162231445, "learning_rate": 7.724769837790443e-06, "loss": 12.5403, "step": 26950 }, { "epoch": 11.734148329258353, "grad_norm": 22.8085994720459, "learning_rate": 7.72038579570364e-06, "loss": 12.5297, "step": 27000 }, { "epoch": 11.755881553925564, "grad_norm": 28.690683364868164, "learning_rate": 7.716001753616836e-06, "loss": 12.5626, "step": 27050 }, { "epoch": 11.777614778592774, "grad_norm": 23.2988338470459, "learning_rate": 7.711617711530031e-06, "loss": 12.4646, "step": 27100 }, { "epoch": 11.799348003259984, "grad_norm": 24.85117530822754, "learning_rate": 7.707233669443227e-06, "loss": 12.4886, "step": 27150 }, { "epoch": 11.821081227927193, "grad_norm": 34.84917449951172, "learning_rate": 7.702849627356424e-06, "loss": 12.578, "step": 27200 }, { "epoch": 11.842814452594403, "grad_norm": 27.57342529296875, "learning_rate": 7.698465585269619e-06, "loss": 12.5423, "step": 27250 }, { "epoch": 11.864547677261614, "grad_norm": 21.665023803710938, "learning_rate": 7.694081543182815e-06, "loss": 12.4848, "step": 27300 }, { "epoch": 11.886280901928824, "grad_norm": 20.787555694580078, "learning_rate": 7.68969750109601e-06, "loss": 12.4976, "step": 27350 }, { "epoch": 11.908014126596033, "grad_norm": 42.406837463378906, "learning_rate": 7.685313459009207e-06, "loss": 12.5549, "step": 27400 }, { "epoch": 11.929747351263243, "grad_norm": 23.60106658935547, "learning_rate": 7.680929416922404e-06, "loss": 12.5492, "step": 27450 }, { "epoch": 11.951480575930454, "grad_norm": 21.591079711914062, "learning_rate": 7.6765453748356e-06, "loss": 12.5018, "step": 27500 }, { "epoch": 11.973213800597664, "grad_norm": 32.685333251953125, "learning_rate": 7.672161332748795e-06, "loss": 12.5378, "step": 27550 }, { "epoch": 11.994947025264874, "grad_norm": 26.88076400756836, "learning_rate": 7.66777729066199e-06, "loss": 12.5529, "step": 27600 }, { "epoch": 12.01651725074708, "grad_norm": 19.660898208618164, "learning_rate": 7.663393248575187e-06, "loss": 12.3944, "step": 27650 }, { "epoch": 12.038250475414289, "grad_norm": 36.72605514526367, "learning_rate": 7.659009206488383e-06, "loss": 12.359, "step": 27700 }, { "epoch": 12.0599837000815, "grad_norm": 27.864477157592773, "learning_rate": 7.65462516440158e-06, "loss": 12.3951, "step": 27750 }, { "epoch": 12.08171692474871, "grad_norm": 34.72395324707031, "learning_rate": 7.650241122314775e-06, "loss": 12.4259, "step": 27800 }, { "epoch": 12.10345014941592, "grad_norm": 20.68131446838379, "learning_rate": 7.64585708022797e-06, "loss": 12.4737, "step": 27850 }, { "epoch": 12.125183374083129, "grad_norm": 27.369903564453125, "learning_rate": 7.641473038141168e-06, "loss": 12.4838, "step": 27900 }, { "epoch": 12.14691659875034, "grad_norm": 14.568199157714844, "learning_rate": 7.637088996054363e-06, "loss": 12.3812, "step": 27950 }, { "epoch": 12.16864982341755, "grad_norm": 20.099998474121094, "learning_rate": 7.632704953967558e-06, "loss": 12.4168, "step": 28000 }, { "epoch": 12.19038304808476, "grad_norm": 21.41561508178711, "learning_rate": 7.628320911880755e-06, "loss": 12.3799, "step": 28050 }, { "epoch": 12.21211627275197, "grad_norm": 23.49574851989746, "learning_rate": 7.623936869793951e-06, "loss": 12.4527, "step": 28100 }, { "epoch": 12.233849497419179, "grad_norm": 30.164730072021484, "learning_rate": 7.619552827707146e-06, "loss": 12.4353, "step": 28150 }, { "epoch": 12.25558272208639, "grad_norm": 32.27763748168945, "learning_rate": 7.6151687856203425e-06, "loss": 12.4808, "step": 28200 }, { "epoch": 12.2773159467536, "grad_norm": 36.46564483642578, "learning_rate": 7.610784743533538e-06, "loss": 12.3987, "step": 28250 }, { "epoch": 12.29904917142081, "grad_norm": 19.888980865478516, "learning_rate": 7.606400701446734e-06, "loss": 12.3889, "step": 28300 }, { "epoch": 12.320782396088019, "grad_norm": 20.877737045288086, "learning_rate": 7.602016659359931e-06, "loss": 12.4598, "step": 28350 }, { "epoch": 12.34251562075523, "grad_norm": 20.208404541015625, "learning_rate": 7.5976326172731266e-06, "loss": 12.3682, "step": 28400 }, { "epoch": 12.36424884542244, "grad_norm": 48.63652801513672, "learning_rate": 7.593248575186323e-06, "loss": 12.3874, "step": 28450 }, { "epoch": 12.38598207008965, "grad_norm": 23.263282775878906, "learning_rate": 7.588864533099518e-06, "loss": 12.4161, "step": 28500 }, { "epoch": 12.40771529475686, "grad_norm": 23.76000213623047, "learning_rate": 7.5844804910127144e-06, "loss": 12.4247, "step": 28550 }, { "epoch": 12.429448519424069, "grad_norm": 62.45661544799805, "learning_rate": 7.58009644892591e-06, "loss": 12.4226, "step": 28600 }, { "epoch": 12.45118174409128, "grad_norm": 33.05659484863281, "learning_rate": 7.575712406839106e-06, "loss": 12.493, "step": 28650 }, { "epoch": 12.47291496875849, "grad_norm": 23.853660583496094, "learning_rate": 7.5713283647523014e-06, "loss": 12.4388, "step": 28700 }, { "epoch": 12.4946481934257, "grad_norm": 30.970672607421875, "learning_rate": 7.5669443226654985e-06, "loss": 12.4721, "step": 28750 }, { "epoch": 12.516381418092909, "grad_norm": 20.660356521606445, "learning_rate": 7.562560280578695e-06, "loss": 12.4463, "step": 28800 }, { "epoch": 12.538114642760119, "grad_norm": 27.25446319580078, "learning_rate": 7.55817623849189e-06, "loss": 12.4359, "step": 28850 }, { "epoch": 12.55984786742733, "grad_norm": 19.96375274658203, "learning_rate": 7.553792196405086e-06, "loss": 12.4274, "step": 28900 }, { "epoch": 12.58158109209454, "grad_norm": 25.133895874023438, "learning_rate": 7.549408154318282e-06, "loss": 12.4288, "step": 28950 }, { "epoch": 12.60331431676175, "grad_norm": 55.64627456665039, "learning_rate": 7.545024112231478e-06, "loss": 12.4451, "step": 29000 }, { "epoch": 12.625047541428959, "grad_norm": 70.62721252441406, "learning_rate": 7.540640070144673e-06, "loss": 12.451, "step": 29050 }, { "epoch": 12.64678076609617, "grad_norm": 22.789186477661133, "learning_rate": 7.5362560280578705e-06, "loss": 12.43, "step": 29100 }, { "epoch": 12.66851399076338, "grad_norm": 25.138248443603516, "learning_rate": 7.531871985971065e-06, "loss": 12.4, "step": 29150 }, { "epoch": 12.69024721543059, "grad_norm": 18.74398422241211, "learning_rate": 7.527487943884262e-06, "loss": 12.4338, "step": 29200 }, { "epoch": 12.711980440097799, "grad_norm": 28.796159744262695, "learning_rate": 7.523103901797458e-06, "loss": 12.473, "step": 29250 }, { "epoch": 12.733713664765009, "grad_norm": 28.044872283935547, "learning_rate": 7.518719859710654e-06, "loss": 12.4155, "step": 29300 }, { "epoch": 12.75544688943222, "grad_norm": 21.100650787353516, "learning_rate": 7.51433581762385e-06, "loss": 12.4329, "step": 29350 }, { "epoch": 12.77718011409943, "grad_norm": 24.12652015686035, "learning_rate": 7.509951775537045e-06, "loss": 12.4504, "step": 29400 }, { "epoch": 12.79891333876664, "grad_norm": 18.889480590820312, "learning_rate": 7.5055677334502416e-06, "loss": 12.3981, "step": 29450 }, { "epoch": 12.820646563433849, "grad_norm": 20.395387649536133, "learning_rate": 7.501183691363437e-06, "loss": 12.4834, "step": 29500 }, { "epoch": 12.84237978810106, "grad_norm": 34.01985168457031, "learning_rate": 7.496799649276634e-06, "loss": 12.4485, "step": 29550 }, { "epoch": 12.86411301276827, "grad_norm": 36.57313537597656, "learning_rate": 7.49241560718983e-06, "loss": 12.5063, "step": 29600 }, { "epoch": 12.88584623743548, "grad_norm": 21.946285247802734, "learning_rate": 7.488031565103026e-06, "loss": 12.501, "step": 29650 }, { "epoch": 12.907579462102689, "grad_norm": 26.948814392089844, "learning_rate": 7.483647523016222e-06, "loss": 12.5122, "step": 29700 }, { "epoch": 12.929312686769899, "grad_norm": 31.4482364654541, "learning_rate": 7.479263480929417e-06, "loss": 12.4543, "step": 29750 }, { "epoch": 12.95104591143711, "grad_norm": 35.19594192504883, "learning_rate": 7.4748794388426135e-06, "loss": 12.4657, "step": 29800 }, { "epoch": 12.97277913610432, "grad_norm": 23.498001098632812, "learning_rate": 7.470495396755809e-06, "loss": 12.4462, "step": 29850 }, { "epoch": 12.99451236077153, "grad_norm": 48.50201416015625, "learning_rate": 7.466111354669006e-06, "loss": 12.4134, "step": 29900 }, { "epoch": 13.016082586253736, "grad_norm": 25.189435958862305, "learning_rate": 7.461727312582201e-06, "loss": 12.3134, "step": 29950 }, { "epoch": 13.037815810920945, "grad_norm": 21.985063552856445, "learning_rate": 7.457343270495398e-06, "loss": 12.3299, "step": 30000 }, { "epoch": 13.037815810920945, "eval_cer": 0.0770617061459272, "eval_loss": 2.334705352783203, "eval_runtime": 399.4375, "eval_samples_per_second": 13.534, "eval_steps_per_second": 3.385, "eval_wer": 0.23019312293923694, "step": 30000 }, { "epoch": 13.059549035588155, "grad_norm": 15.290221214294434, "learning_rate": 7.452959228408594e-06, "loss": 12.3108, "step": 30050 }, { "epoch": 13.081282260255366, "grad_norm": 26.75568389892578, "learning_rate": 7.448575186321789e-06, "loss": 12.3347, "step": 30100 }, { "epoch": 13.103015484922576, "grad_norm": 28.02945327758789, "learning_rate": 7.4441911442349854e-06, "loss": 12.3172, "step": 30150 }, { "epoch": 13.124748709589785, "grad_norm": 17.39537811279297, "learning_rate": 7.439807102148181e-06, "loss": 12.3004, "step": 30200 }, { "epoch": 13.146481934256995, "grad_norm": 21.168519973754883, "learning_rate": 7.435423060061377e-06, "loss": 12.3882, "step": 30250 }, { "epoch": 13.168215158924205, "grad_norm": 24.02804946899414, "learning_rate": 7.4310390179745725e-06, "loss": 12.3512, "step": 30300 }, { "epoch": 13.189948383591416, "grad_norm": 25.33257484436035, "learning_rate": 7.4266549758877695e-06, "loss": 12.3121, "step": 30350 }, { "epoch": 13.211681608258626, "grad_norm": 20.40574073791504, "learning_rate": 7.422270933800965e-06, "loss": 12.3737, "step": 30400 }, { "epoch": 13.233414832925835, "grad_norm": 25.527008056640625, "learning_rate": 7.417886891714161e-06, "loss": 12.3575, "step": 30450 }, { "epoch": 13.255148057593045, "grad_norm": 23.7490291595459, "learning_rate": 7.413502849627357e-06, "loss": 12.3835, "step": 30500 }, { "epoch": 13.276881282260256, "grad_norm": 23.39885139465332, "learning_rate": 7.409118807540553e-06, "loss": 12.3056, "step": 30550 }, { "epoch": 13.298614506927466, "grad_norm": 21.89725112915039, "learning_rate": 7.404734765453749e-06, "loss": 12.3262, "step": 30600 }, { "epoch": 13.320347731594675, "grad_norm": 20.838117599487305, "learning_rate": 7.400350723366944e-06, "loss": 12.3879, "step": 30650 }, { "epoch": 13.342080956261885, "grad_norm": 17.388107299804688, "learning_rate": 7.3959666812801415e-06, "loss": 12.3611, "step": 30700 }, { "epoch": 13.363814180929095, "grad_norm": 19.158178329467773, "learning_rate": 7.391582639193337e-06, "loss": 12.3782, "step": 30750 }, { "epoch": 13.385547405596306, "grad_norm": 28.794353485107422, "learning_rate": 7.387198597106533e-06, "loss": 12.4156, "step": 30800 }, { "epoch": 13.407280630263516, "grad_norm": 24.086498260498047, "learning_rate": 7.3828145550197285e-06, "loss": 12.3903, "step": 30850 }, { "epoch": 13.429013854930725, "grad_norm": 24.688875198364258, "learning_rate": 7.378430512932925e-06, "loss": 12.3829, "step": 30900 }, { "epoch": 13.450747079597935, "grad_norm": 54.69606018066406, "learning_rate": 7.374046470846121e-06, "loss": 12.3398, "step": 30950 }, { "epoch": 13.472480304265146, "grad_norm": 25.10434341430664, "learning_rate": 7.369662428759316e-06, "loss": 12.3753, "step": 31000 }, { "epoch": 13.494213528932356, "grad_norm": 35.44208908081055, "learning_rate": 7.3652783866725134e-06, "loss": 12.3937, "step": 31050 }, { "epoch": 13.515946753599565, "grad_norm": 19.743236541748047, "learning_rate": 7.360894344585709e-06, "loss": 12.439, "step": 31100 }, { "epoch": 13.537679978266775, "grad_norm": 29.914348602294922, "learning_rate": 7.356510302498905e-06, "loss": 12.3594, "step": 31150 }, { "epoch": 13.559413202933985, "grad_norm": 105.84856414794922, "learning_rate": 7.3521262604121004e-06, "loss": 12.352, "step": 31200 }, { "epoch": 13.581146427601196, "grad_norm": 23.331436157226562, "learning_rate": 7.347742218325297e-06, "loss": 12.355, "step": 31250 }, { "epoch": 13.602879652268406, "grad_norm": 18.46331214904785, "learning_rate": 7.343358176238492e-06, "loss": 12.3481, "step": 31300 }, { "epoch": 13.624612876935615, "grad_norm": 22.384254455566406, "learning_rate": 7.338974134151688e-06, "loss": 12.4047, "step": 31350 }, { "epoch": 13.646346101602825, "grad_norm": 34.16387176513672, "learning_rate": 7.3345900920648845e-06, "loss": 12.4131, "step": 31400 }, { "epoch": 13.668079326270036, "grad_norm": 59.95965576171875, "learning_rate": 7.33020604997808e-06, "loss": 12.3539, "step": 31450 }, { "epoch": 13.689812550937246, "grad_norm": 21.647342681884766, "learning_rate": 7.325822007891277e-06, "loss": 12.3936, "step": 31500 }, { "epoch": 13.711545775604455, "grad_norm": 20.892303466796875, "learning_rate": 7.321437965804472e-06, "loss": 12.4042, "step": 31550 }, { "epoch": 13.733279000271665, "grad_norm": 25.085771560668945, "learning_rate": 7.317053923717669e-06, "loss": 12.4563, "step": 31600 }, { "epoch": 13.755012224938875, "grad_norm": 29.819766998291016, "learning_rate": 7.312669881630864e-06, "loss": 12.3417, "step": 31650 }, { "epoch": 13.776745449606086, "grad_norm": 23.446327209472656, "learning_rate": 7.30828583954406e-06, "loss": 12.4085, "step": 31700 }, { "epoch": 13.798478674273296, "grad_norm": 30.441680908203125, "learning_rate": 7.303901797457256e-06, "loss": 12.3269, "step": 31750 }, { "epoch": 13.820211898940505, "grad_norm": 46.045162200927734, "learning_rate": 7.299517755370452e-06, "loss": 12.3459, "step": 31800 }, { "epoch": 13.841945123607715, "grad_norm": 20.486669540405273, "learning_rate": 7.295133713283649e-06, "loss": 12.4457, "step": 31850 }, { "epoch": 13.863678348274926, "grad_norm": 18.060197830200195, "learning_rate": 7.290749671196844e-06, "loss": 12.4123, "step": 31900 }, { "epoch": 13.885411572942136, "grad_norm": 29.656959533691406, "learning_rate": 7.2863656291100406e-06, "loss": 12.3888, "step": 31950 }, { "epoch": 13.907144797609345, "grad_norm": 16.68509864807129, "learning_rate": 7.281981587023236e-06, "loss": 12.3878, "step": 32000 }, { "epoch": 13.928878022276555, "grad_norm": 27.963064193725586, "learning_rate": 7.277597544936432e-06, "loss": 12.3907, "step": 32050 }, { "epoch": 13.950611246943765, "grad_norm": 27.46925163269043, "learning_rate": 7.2732135028496276e-06, "loss": 12.4483, "step": 32100 }, { "epoch": 13.972344471610976, "grad_norm": 23.631675720214844, "learning_rate": 7.268829460762824e-06, "loss": 12.4326, "step": 32150 }, { "epoch": 13.994077696278186, "grad_norm": 44.30888748168945, "learning_rate": 7.264445418676019e-06, "loss": 12.4026, "step": 32200 }, { "epoch": 14.015647921760392, "grad_norm": 17.614269256591797, "learning_rate": 7.260061376589215e-06, "loss": 12.2351, "step": 32250 }, { "epoch": 14.0373811464276, "grad_norm": 16.82352638244629, "learning_rate": 7.2556773345024125e-06, "loss": 12.2887, "step": 32300 }, { "epoch": 14.059114371094811, "grad_norm": 22.863889694213867, "learning_rate": 7.251293292415608e-06, "loss": 12.2874, "step": 32350 }, { "epoch": 14.080847595762021, "grad_norm": 22.543703079223633, "learning_rate": 7.246909250328804e-06, "loss": 12.2726, "step": 32400 }, { "epoch": 14.102580820429232, "grad_norm": 19.95811653137207, "learning_rate": 7.2425252082419995e-06, "loss": 12.2948, "step": 32450 }, { "epoch": 14.12431404509644, "grad_norm": 11.412972450256348, "learning_rate": 7.238141166155196e-06, "loss": 12.2971, "step": 32500 }, { "epoch": 14.14604726976365, "grad_norm": 30.869230270385742, "learning_rate": 7.233757124068391e-06, "loss": 12.3222, "step": 32550 }, { "epoch": 14.167780494430861, "grad_norm": 37.976741790771484, "learning_rate": 7.229373081981587e-06, "loss": 12.3079, "step": 32600 }, { "epoch": 14.189513719098072, "grad_norm": 23.526809692382812, "learning_rate": 7.224989039894783e-06, "loss": 12.3085, "step": 32650 }, { "epoch": 14.211246943765282, "grad_norm": 19.888294219970703, "learning_rate": 7.22060499780798e-06, "loss": 12.3141, "step": 32700 }, { "epoch": 14.23298016843249, "grad_norm": 16.727022171020508, "learning_rate": 7.216220955721176e-06, "loss": 12.275, "step": 32750 }, { "epoch": 14.254713393099701, "grad_norm": 14.18730640411377, "learning_rate": 7.2118369136343715e-06, "loss": 12.3144, "step": 32800 }, { "epoch": 14.276446617766911, "grad_norm": 20.451278686523438, "learning_rate": 7.207452871547568e-06, "loss": 12.2727, "step": 32850 }, { "epoch": 14.298179842434122, "grad_norm": 16.769447326660156, "learning_rate": 7.203068829460763e-06, "loss": 12.3384, "step": 32900 }, { "epoch": 14.31991306710133, "grad_norm": 27.05632781982422, "learning_rate": 7.198684787373959e-06, "loss": 12.3492, "step": 32950 }, { "epoch": 14.34164629176854, "grad_norm": 38.1939582824707, "learning_rate": 7.194300745287155e-06, "loss": 12.284, "step": 33000 }, { "epoch": 14.363379516435751, "grad_norm": 32.06970977783203, "learning_rate": 7.189916703200352e-06, "loss": 12.3158, "step": 33050 }, { "epoch": 14.385112741102962, "grad_norm": 25.079200744628906, "learning_rate": 7.185532661113547e-06, "loss": 12.3486, "step": 33100 }, { "epoch": 14.406845965770172, "grad_norm": 21.099042892456055, "learning_rate": 7.181148619026743e-06, "loss": 12.2961, "step": 33150 }, { "epoch": 14.42857919043738, "grad_norm": 18.112712860107422, "learning_rate": 7.17676457693994e-06, "loss": 12.2852, "step": 33200 }, { "epoch": 14.450312415104591, "grad_norm": 18.887737274169922, "learning_rate": 7.172380534853135e-06, "loss": 12.276, "step": 33250 }, { "epoch": 14.472045639771801, "grad_norm": 21.17413902282715, "learning_rate": 7.167996492766331e-06, "loss": 12.3109, "step": 33300 }, { "epoch": 14.493778864439012, "grad_norm": 67.79141998291016, "learning_rate": 7.163612450679527e-06, "loss": 12.3326, "step": 33350 }, { "epoch": 14.51551208910622, "grad_norm": 18.88022232055664, "learning_rate": 7.159228408592723e-06, "loss": 12.355, "step": 33400 }, { "epoch": 14.53724531377343, "grad_norm": 14.09670639038086, "learning_rate": 7.154844366505918e-06, "loss": 12.3049, "step": 33450 }, { "epoch": 14.558978538440641, "grad_norm": 22.51435661315918, "learning_rate": 7.150460324419115e-06, "loss": 12.3043, "step": 33500 }, { "epoch": 14.580711763107852, "grad_norm": 20.429990768432617, "learning_rate": 7.146076282332311e-06, "loss": 12.3029, "step": 33550 }, { "epoch": 14.602444987775062, "grad_norm": 27.559160232543945, "learning_rate": 7.141692240245507e-06, "loss": 12.3017, "step": 33600 }, { "epoch": 14.62417821244227, "grad_norm": 26.29608917236328, "learning_rate": 7.137308198158703e-06, "loss": 12.347, "step": 33650 }, { "epoch": 14.645911437109481, "grad_norm": 12.279489517211914, "learning_rate": 7.132924156071899e-06, "loss": 12.2923, "step": 33700 }, { "epoch": 14.667644661776691, "grad_norm": 19.162981033325195, "learning_rate": 7.128540113985095e-06, "loss": 12.3289, "step": 33750 }, { "epoch": 14.689377886443902, "grad_norm": 19.87074089050293, "learning_rate": 7.12415607189829e-06, "loss": 12.2862, "step": 33800 }, { "epoch": 14.71111111111111, "grad_norm": 22.431442260742188, "learning_rate": 7.119772029811487e-06, "loss": 12.3116, "step": 33850 }, { "epoch": 14.73284433577832, "grad_norm": 16.823158264160156, "learning_rate": 7.115387987724683e-06, "loss": 12.3284, "step": 33900 }, { "epoch": 14.754577560445531, "grad_norm": 26.60719108581543, "learning_rate": 7.111003945637879e-06, "loss": 12.2962, "step": 33950 }, { "epoch": 14.776310785112742, "grad_norm": 20.54785919189453, "learning_rate": 7.106619903551074e-06, "loss": 12.2979, "step": 34000 }, { "epoch": 14.79804400977995, "grad_norm": 26.004840850830078, "learning_rate": 7.1022358614642705e-06, "loss": 12.3252, "step": 34050 }, { "epoch": 14.81977723444716, "grad_norm": 21.51180648803711, "learning_rate": 7.097851819377467e-06, "loss": 12.2844, "step": 34100 }, { "epoch": 14.841510459114371, "grad_norm": 21.89017105102539, "learning_rate": 7.093467777290662e-06, "loss": 12.3084, "step": 34150 }, { "epoch": 14.863243683781581, "grad_norm": 23.298505783081055, "learning_rate": 7.089083735203859e-06, "loss": 12.3825, "step": 34200 }, { "epoch": 14.884976908448792, "grad_norm": 21.83428382873535, "learning_rate": 7.084699693117054e-06, "loss": 12.2981, "step": 34250 }, { "epoch": 14.906710133116, "grad_norm": 26.309865951538086, "learning_rate": 7.080315651030251e-06, "loss": 12.2713, "step": 34300 }, { "epoch": 14.92844335778321, "grad_norm": 28.134078979492188, "learning_rate": 7.075931608943446e-06, "loss": 12.311, "step": 34350 }, { "epoch": 14.950176582450421, "grad_norm": 25.938369750976562, "learning_rate": 7.0715475668566425e-06, "loss": 12.2782, "step": 34400 }, { "epoch": 14.971909807117632, "grad_norm": 25.179311752319336, "learning_rate": 7.067163524769839e-06, "loss": 12.3384, "step": 34450 }, { "epoch": 14.993643031784842, "grad_norm": 18.602447509765625, "learning_rate": 7.062779482683034e-06, "loss": 12.3125, "step": 34500 }, { "epoch": 15.015213257267048, "grad_norm": 23.754281997680664, "learning_rate": 7.05839544059623e-06, "loss": 12.1766, "step": 34550 }, { "epoch": 15.036946481934256, "grad_norm": 22.74106788635254, "learning_rate": 7.054011398509426e-06, "loss": 12.228, "step": 34600 }, { "epoch": 15.058679706601467, "grad_norm": 35.1696662902832, "learning_rate": 7.049627356422623e-06, "loss": 12.2026, "step": 34650 }, { "epoch": 15.080412931268677, "grad_norm": 20.032005310058594, "learning_rate": 7.045243314335818e-06, "loss": 12.2443, "step": 34700 }, { "epoch": 15.102146155935888, "grad_norm": 30.315168380737305, "learning_rate": 7.040859272249014e-06, "loss": 12.221, "step": 34750 }, { "epoch": 15.123879380603096, "grad_norm": 15.685395240783691, "learning_rate": 7.03647523016221e-06, "loss": 12.2394, "step": 34800 }, { "epoch": 15.145612605270307, "grad_norm": 24.66408348083496, "learning_rate": 7.032091188075406e-06, "loss": 12.1932, "step": 34850 }, { "epoch": 15.167345829937517, "grad_norm": 20.8659725189209, "learning_rate": 7.027707145988602e-06, "loss": 12.2888, "step": 34900 }, { "epoch": 15.189079054604727, "grad_norm": 25.82394027709961, "learning_rate": 7.023323103901798e-06, "loss": 12.1971, "step": 34950 }, { "epoch": 15.210812279271938, "grad_norm": 17.442481994628906, "learning_rate": 7.018939061814995e-06, "loss": 12.2246, "step": 35000 }, { "epoch": 15.232545503939146, "grad_norm": 16.10444450378418, "learning_rate": 7.01455501972819e-06, "loss": 12.2378, "step": 35050 }, { "epoch": 15.254278728606357, "grad_norm": 20.300018310546875, "learning_rate": 7.010170977641386e-06, "loss": 12.2806, "step": 35100 }, { "epoch": 15.276011953273567, "grad_norm": 30.641281127929688, "learning_rate": 7.005786935554582e-06, "loss": 12.2608, "step": 35150 }, { "epoch": 15.297745177940778, "grad_norm": 36.21476745605469, "learning_rate": 7.001402893467778e-06, "loss": 12.2502, "step": 35200 }, { "epoch": 15.319478402607986, "grad_norm": 31.640207290649414, "learning_rate": 6.997018851380973e-06, "loss": 12.2664, "step": 35250 }, { "epoch": 15.341211627275197, "grad_norm": 14.65031623840332, "learning_rate": 6.99263480929417e-06, "loss": 12.2727, "step": 35300 }, { "epoch": 15.362944851942407, "grad_norm": 15.896147727966309, "learning_rate": 6.988250767207367e-06, "loss": 12.2335, "step": 35350 }, { "epoch": 15.384678076609617, "grad_norm": 27.60741424560547, "learning_rate": 6.983866725120561e-06, "loss": 12.241, "step": 35400 }, { "epoch": 15.406411301276828, "grad_norm": 27.842981338500977, "learning_rate": 6.979482683033758e-06, "loss": 12.299, "step": 35450 }, { "epoch": 15.428144525944036, "grad_norm": 14.332504272460938, "learning_rate": 6.975098640946954e-06, "loss": 12.2547, "step": 35500 }, { "epoch": 15.449877750611247, "grad_norm": 13.6268310546875, "learning_rate": 6.97071459886015e-06, "loss": 12.2764, "step": 35550 }, { "epoch": 15.471610975278457, "grad_norm": 27.122060775756836, "learning_rate": 6.966330556773345e-06, "loss": 12.2568, "step": 35600 }, { "epoch": 15.493344199945668, "grad_norm": NaN, "learning_rate": 6.9619465146865415e-06, "loss": 12.2716, "step": 35650 }, { "epoch": 15.515077424612876, "grad_norm": 16.30205726623535, "learning_rate": 6.957562472599737e-06, "loss": 12.1952, "step": 35700 }, { "epoch": 15.536810649280087, "grad_norm": 17.126123428344727, "learning_rate": 6.953178430512933e-06, "loss": 12.2365, "step": 35750 }, { "epoch": 15.558543873947297, "grad_norm": 33.20661163330078, "learning_rate": 6.94879438842613e-06, "loss": 12.2776, "step": 35800 }, { "epoch": 15.580277098614507, "grad_norm": 22.688047409057617, "learning_rate": 6.944410346339326e-06, "loss": 12.3202, "step": 35850 }, { "epoch": 15.602010323281718, "grad_norm": 19.268665313720703, "learning_rate": 6.940026304252522e-06, "loss": 12.2744, "step": 35900 }, { "epoch": 15.623743547948926, "grad_norm": 44.28622817993164, "learning_rate": 6.935642262165717e-06, "loss": 12.2619, "step": 35950 }, { "epoch": 15.645476772616137, "grad_norm": 11.47972297668457, "learning_rate": 6.9312582200789135e-06, "loss": 12.2281, "step": 36000 }, { "epoch": 15.667209997283347, "grad_norm": 26.456462860107422, "learning_rate": 6.926874177992109e-06, "loss": 12.2591, "step": 36050 }, { "epoch": 15.688943221950558, "grad_norm": 18.363269805908203, "learning_rate": 6.922490135905305e-06, "loss": 12.2958, "step": 36100 }, { "epoch": 15.710676446617766, "grad_norm": 21.405649185180664, "learning_rate": 6.9181060938185005e-06, "loss": 12.2485, "step": 36150 }, { "epoch": 15.732409671284977, "grad_norm": 27.277904510498047, "learning_rate": 6.913722051731698e-06, "loss": 12.2324, "step": 36200 }, { "epoch": 15.754142895952187, "grad_norm": 20.303300857543945, "learning_rate": 6.909338009644894e-06, "loss": 12.2939, "step": 36250 }, { "epoch": 15.775876120619397, "grad_norm": 14.886679649353027, "learning_rate": 6.904953967558089e-06, "loss": 12.2036, "step": 36300 }, { "epoch": 15.797609345286606, "grad_norm": 17.747087478637695, "learning_rate": 6.9005699254712854e-06, "loss": 12.2459, "step": 36350 }, { "epoch": 15.819342569953816, "grad_norm": 34.592708587646484, "learning_rate": 6.896185883384481e-06, "loss": 12.2688, "step": 36400 }, { "epoch": 15.841075794621027, "grad_norm": 20.060144424438477, "learning_rate": 6.891801841297677e-06, "loss": 12.2528, "step": 36450 }, { "epoch": 15.862809019288237, "grad_norm": 13.47815227508545, "learning_rate": 6.8874177992108724e-06, "loss": 12.2797, "step": 36500 }, { "epoch": 15.884542243955448, "grad_norm": 20.81302833557129, "learning_rate": 6.883033757124069e-06, "loss": 12.269, "step": 36550 }, { "epoch": 15.906275468622656, "grad_norm": 29.326114654541016, "learning_rate": 6.878649715037264e-06, "loss": 12.296, "step": 36600 }, { "epoch": 15.928008693289867, "grad_norm": 21.322439193725586, "learning_rate": 6.874265672950461e-06, "loss": 12.3283, "step": 36650 }, { "epoch": 15.949741917957077, "grad_norm": 25.019590377807617, "learning_rate": 6.869881630863657e-06, "loss": 12.2913, "step": 36700 }, { "epoch": 15.971475142624287, "grad_norm": 16.494823455810547, "learning_rate": 6.865497588776853e-06, "loss": 12.2574, "step": 36750 }, { "epoch": 15.993208367291498, "grad_norm": 22.250595092773438, "learning_rate": 6.861113546690049e-06, "loss": 12.26, "step": 36800 }, { "epoch": 16.014778592773702, "grad_norm": 15.23131275177002, "learning_rate": 6.856729504603244e-06, "loss": 12.1167, "step": 36850 }, { "epoch": 16.036511817440914, "grad_norm": 18.172534942626953, "learning_rate": 6.852345462516441e-06, "loss": 12.1946, "step": 36900 }, { "epoch": 16.058245042108123, "grad_norm": 27.63299560546875, "learning_rate": 6.847961420429636e-06, "loss": 12.1801, "step": 36950 }, { "epoch": 16.07997826677533, "grad_norm": 22.287805557250977, "learning_rate": 6.843577378342833e-06, "loss": 12.2187, "step": 37000 }, { "epoch": 16.101711491442543, "grad_norm": 15.652294158935547, "learning_rate": 6.8391933362560285e-06, "loss": 12.1837, "step": 37050 }, { "epoch": 16.123444716109752, "grad_norm": 23.91109848022461, "learning_rate": 6.834809294169225e-06, "loss": 12.2074, "step": 37100 }, { "epoch": 16.145177940776964, "grad_norm": 24.845624923706055, "learning_rate": 6.830425252082421e-06, "loss": 12.1387, "step": 37150 }, { "epoch": 16.166911165444173, "grad_norm": 19.137048721313477, "learning_rate": 6.826041209995616e-06, "loss": 12.2304, "step": 37200 }, { "epoch": 16.18864439011138, "grad_norm": 17.24692153930664, "learning_rate": 6.8216571679088126e-06, "loss": 12.208, "step": 37250 }, { "epoch": 16.210377614778594, "grad_norm": 20.503686904907227, "learning_rate": 6.817273125822008e-06, "loss": 12.2103, "step": 37300 }, { "epoch": 16.232110839445802, "grad_norm": 27.404552459716797, "learning_rate": 6.812889083735205e-06, "loss": 12.2422, "step": 37350 }, { "epoch": 16.253844064113014, "grad_norm": 25.16230010986328, "learning_rate": 6.8085050416483996e-06, "loss": 12.2006, "step": 37400 }, { "epoch": 16.275577288780223, "grad_norm": 18.156126022338867, "learning_rate": 6.804120999561597e-06, "loss": 12.2023, "step": 37450 }, { "epoch": 16.29731051344743, "grad_norm": 19.68562889099121, "learning_rate": 6.799736957474792e-06, "loss": 12.1877, "step": 37500 }, { "epoch": 16.319043738114644, "grad_norm": 17.91988182067871, "learning_rate": 6.795352915387988e-06, "loss": 12.215, "step": 37550 }, { "epoch": 16.340776962781852, "grad_norm": 15.31675910949707, "learning_rate": 6.7909688733011845e-06, "loss": 12.1548, "step": 37600 }, { "epoch": 16.36251018744906, "grad_norm": 8.975651741027832, "learning_rate": 6.78658483121438e-06, "loss": 12.169, "step": 37650 }, { "epoch": 16.384243412116273, "grad_norm": 16.77298927307129, "learning_rate": 6.782200789127576e-06, "loss": 12.2139, "step": 37700 }, { "epoch": 16.405976636783482, "grad_norm": 23.03885269165039, "learning_rate": 6.7778167470407715e-06, "loss": 12.2093, "step": 37750 }, { "epoch": 16.427709861450694, "grad_norm": 18.47231101989746, "learning_rate": 6.773432704953969e-06, "loss": 12.1992, "step": 37800 }, { "epoch": 16.449443086117903, "grad_norm": 28.977338790893555, "learning_rate": 6.769048662867164e-06, "loss": 12.1989, "step": 37850 }, { "epoch": 16.47117631078511, "grad_norm": 16.37677574157715, "learning_rate": 6.76466462078036e-06, "loss": 12.2296, "step": 37900 }, { "epoch": 16.492909535452323, "grad_norm": 13.731319427490234, "learning_rate": 6.760280578693556e-06, "loss": 12.186, "step": 37950 }, { "epoch": 16.514642760119532, "grad_norm": 20.206491470336914, "learning_rate": 6.755896536606752e-06, "loss": 12.1552, "step": 38000 }, { "epoch": 16.536375984786744, "grad_norm": 19.88826560974121, "learning_rate": 6.751512494519948e-06, "loss": 12.2298, "step": 38050 }, { "epoch": 16.558109209453953, "grad_norm": 31.184532165527344, "learning_rate": 6.7471284524331435e-06, "loss": 12.2041, "step": 38100 }, { "epoch": 16.57984243412116, "grad_norm": 37.404266357421875, "learning_rate": 6.7427444103463405e-06, "loss": 12.208, "step": 38150 }, { "epoch": 16.601575658788374, "grad_norm": 12.503349304199219, "learning_rate": 6.738360368259536e-06, "loss": 12.2289, "step": 38200 }, { "epoch": 16.623308883455582, "grad_norm": 14.80574893951416, "learning_rate": 6.733976326172732e-06, "loss": 12.2027, "step": 38250 }, { "epoch": 16.645042108122794, "grad_norm": 18.339298248291016, "learning_rate": 6.7295922840859276e-06, "loss": 12.2122, "step": 38300 }, { "epoch": 16.666775332790003, "grad_norm": 9.988556861877441, "learning_rate": 6.725208241999124e-06, "loss": 12.2169, "step": 38350 }, { "epoch": 16.68850855745721, "grad_norm": 16.23221778869629, "learning_rate": 6.720824199912319e-06, "loss": 12.2432, "step": 38400 }, { "epoch": 16.710241782124424, "grad_norm": 17.93288803100586, "learning_rate": 6.716440157825515e-06, "loss": 12.1946, "step": 38450 }, { "epoch": 16.731975006791632, "grad_norm": 23.863719940185547, "learning_rate": 6.7120561157387125e-06, "loss": 12.2601, "step": 38500 }, { "epoch": 16.75370823145884, "grad_norm": 32.24260330200195, "learning_rate": 6.707672073651907e-06, "loss": 12.2242, "step": 38550 }, { "epoch": 16.775441456126053, "grad_norm": 31.188295364379883, "learning_rate": 6.703288031565104e-06, "loss": 12.1977, "step": 38600 }, { "epoch": 16.797174680793262, "grad_norm": 21.935489654541016, "learning_rate": 6.6989039894782995e-06, "loss": 12.2153, "step": 38650 }, { "epoch": 16.818907905460474, "grad_norm": 16.820199966430664, "learning_rate": 6.694519947391496e-06, "loss": 12.1841, "step": 38700 }, { "epoch": 16.840641130127683, "grad_norm": 27.350257873535156, "learning_rate": 6.690135905304691e-06, "loss": 12.2308, "step": 38750 }, { "epoch": 16.86237435479489, "grad_norm": 20.717317581176758, "learning_rate": 6.685751863217887e-06, "loss": 12.2139, "step": 38800 }, { "epoch": 16.884107579462103, "grad_norm": 20.515241622924805, "learning_rate": 6.681367821131084e-06, "loss": 12.209, "step": 38850 }, { "epoch": 16.905840804129312, "grad_norm": 16.544082641601562, "learning_rate": 6.676983779044279e-06, "loss": 12.2183, "step": 38900 }, { "epoch": 16.927574028796524, "grad_norm": 17.54091453552246, "learning_rate": 6.672599736957476e-06, "loss": 12.2609, "step": 38950 }, { "epoch": 16.949307253463733, "grad_norm": 21.071598052978516, "learning_rate": 6.6682156948706714e-06, "loss": 12.1727, "step": 39000 }, { "epoch": 16.97104047813094, "grad_norm": 17.628015518188477, "learning_rate": 6.663831652783868e-06, "loss": 12.1862, "step": 39050 }, { "epoch": 16.992773702798154, "grad_norm": 13.298240661621094, "learning_rate": 6.659447610697063e-06, "loss": 12.2279, "step": 39100 }, { "epoch": 17.014343928280358, "grad_norm": 12.616509437561035, "learning_rate": 6.655063568610259e-06, "loss": 12.0914, "step": 39150 }, { "epoch": 17.03607715294757, "grad_norm": 21.71387481689453, "learning_rate": 6.650679526523455e-06, "loss": 12.1576, "step": 39200 }, { "epoch": 17.05781037761478, "grad_norm": 26.497800827026367, "learning_rate": 6.646295484436651e-06, "loss": 12.1522, "step": 39250 }, { "epoch": 17.079543602281987, "grad_norm": 20.276397705078125, "learning_rate": 6.641911442349848e-06, "loss": 12.1579, "step": 39300 }, { "epoch": 17.1012768269492, "grad_norm": 18.534727096557617, "learning_rate": 6.637527400263043e-06, "loss": 12.178, "step": 39350 }, { "epoch": 17.123010051616408, "grad_norm": 29.980501174926758, "learning_rate": 6.63314335817624e-06, "loss": 12.1507, "step": 39400 }, { "epoch": 17.14474327628362, "grad_norm": 25.486083984375, "learning_rate": 6.628759316089435e-06, "loss": 12.148, "step": 39450 }, { "epoch": 17.16647650095083, "grad_norm": 24.499359130859375, "learning_rate": 6.624375274002631e-06, "loss": 12.1513, "step": 39500 }, { "epoch": 17.188209725618037, "grad_norm": 22.07660484313965, "learning_rate": 6.619991231915827e-06, "loss": 12.1477, "step": 39550 }, { "epoch": 17.20994295028525, "grad_norm": 28.42877960205078, "learning_rate": 6.615607189829023e-06, "loss": 12.1747, "step": 39600 }, { "epoch": 17.231676174952458, "grad_norm": 22.489025115966797, "learning_rate": 6.611223147742218e-06, "loss": 12.1474, "step": 39650 }, { "epoch": 17.25340939961967, "grad_norm": 24.58718490600586, "learning_rate": 6.6068391056554145e-06, "loss": 12.1613, "step": 39700 }, { "epoch": 17.27514262428688, "grad_norm": 92.33475494384766, "learning_rate": 6.6024550635686116e-06, "loss": 12.1637, "step": 39750 }, { "epoch": 17.296875848954087, "grad_norm": 19.350147247314453, "learning_rate": 6.598071021481807e-06, "loss": 12.184, "step": 39800 }, { "epoch": 17.3186090736213, "grad_norm": 14.627690315246582, "learning_rate": 6.593686979395003e-06, "loss": 12.1552, "step": 39850 }, { "epoch": 17.34034229828851, "grad_norm": 11.52912425994873, "learning_rate": 6.5893029373081986e-06, "loss": 12.1784, "step": 39900 }, { "epoch": 17.362075522955717, "grad_norm": 18.19782829284668, "learning_rate": 6.584918895221395e-06, "loss": 12.127, "step": 39950 }, { "epoch": 17.38380874762293, "grad_norm": 24.676179885864258, "learning_rate": 6.58053485313459e-06, "loss": 12.2129, "step": 40000 }, { "epoch": 17.38380874762293, "eval_cer": 0.0766327626430326, "eval_loss": 2.3462953567504883, "eval_runtime": 399.3051, "eval_samples_per_second": 13.539, "eval_steps_per_second": 3.386, "eval_wer": 0.22991050400376825, "step": 40000 }, { "epoch": 17.405541972290138, "grad_norm": 10.956995010375977, "learning_rate": 6.5761508110477864e-06, "loss": 12.1388, "step": 40050 }, { "epoch": 17.42727519695735, "grad_norm": 23.64618682861328, "learning_rate": 6.571766768960982e-06, "loss": 12.1628, "step": 40100 }, { "epoch": 17.44900842162456, "grad_norm": 22.68800926208496, "learning_rate": 6.567382726874179e-06, "loss": 12.1489, "step": 40150 }, { "epoch": 17.470741646291767, "grad_norm": 17.155860900878906, "learning_rate": 6.562998684787375e-06, "loss": 12.1374, "step": 40200 }, { "epoch": 17.49247487095898, "grad_norm": 19.839338302612305, "learning_rate": 6.5586146427005705e-06, "loss": 12.1891, "step": 40250 }, { "epoch": 17.514208095626188, "grad_norm": 24.002262115478516, "learning_rate": 6.554230600613767e-06, "loss": 12.1819, "step": 40300 }, { "epoch": 17.5359413202934, "grad_norm": 14.681846618652344, "learning_rate": 6.549846558526962e-06, "loss": 12.1583, "step": 40350 }, { "epoch": 17.55767454496061, "grad_norm": 28.004215240478516, "learning_rate": 6.545462516440158e-06, "loss": 12.1953, "step": 40400 }, { "epoch": 17.579407769627817, "grad_norm": 18.857913970947266, "learning_rate": 6.541078474353354e-06, "loss": 12.1922, "step": 40450 }, { "epoch": 17.60114099429503, "grad_norm": 27.09821319580078, "learning_rate": 6.536694432266551e-06, "loss": 12.1901, "step": 40500 }, { "epoch": 17.622874218962238, "grad_norm": 15.759273529052734, "learning_rate": 6.532310390179745e-06, "loss": 12.1536, "step": 40550 }, { "epoch": 17.64460744362945, "grad_norm": 13.474365234375, "learning_rate": 6.5279263480929425e-06, "loss": 12.1806, "step": 40600 }, { "epoch": 17.66634066829666, "grad_norm": 14.334114074707031, "learning_rate": 6.523542306006139e-06, "loss": 12.1881, "step": 40650 }, { "epoch": 17.688073892963867, "grad_norm": 28.76114845275879, "learning_rate": 6.519158263919334e-06, "loss": 12.2126, "step": 40700 }, { "epoch": 17.70980711763108, "grad_norm": 22.386192321777344, "learning_rate": 6.51477422183253e-06, "loss": 12.153, "step": 40750 }, { "epoch": 17.73154034229829, "grad_norm": 12.762558937072754, "learning_rate": 6.510390179745726e-06, "loss": 12.1787, "step": 40800 }, { "epoch": 17.753273566965497, "grad_norm": 15.222195625305176, "learning_rate": 6.506006137658922e-06, "loss": 12.1646, "step": 40850 }, { "epoch": 17.77500679163271, "grad_norm": 35.954437255859375, "learning_rate": 6.501622095572117e-06, "loss": 12.1707, "step": 40900 }, { "epoch": 17.796740016299918, "grad_norm": 11.987882614135742, "learning_rate": 6.497238053485314e-06, "loss": 12.1824, "step": 40950 }, { "epoch": 17.81847324096713, "grad_norm": 31.296215057373047, "learning_rate": 6.49285401139851e-06, "loss": 12.213, "step": 41000 }, { "epoch": 17.84020646563434, "grad_norm": 16.63829231262207, "learning_rate": 6.488469969311706e-06, "loss": 12.1762, "step": 41050 }, { "epoch": 17.861939690301547, "grad_norm": 13.500885963439941, "learning_rate": 6.484085927224902e-06, "loss": 12.1677, "step": 41100 }, { "epoch": 17.88367291496876, "grad_norm": 29.857112884521484, "learning_rate": 6.479701885138098e-06, "loss": 12.1812, "step": 41150 }, { "epoch": 17.905406139635968, "grad_norm": 14.494293212890625, "learning_rate": 6.475317843051294e-06, "loss": 12.1565, "step": 41200 }, { "epoch": 17.92713936430318, "grad_norm": 11.10816478729248, "learning_rate": 6.470933800964489e-06, "loss": 12.1799, "step": 41250 }, { "epoch": 17.94887258897039, "grad_norm": 19.924617767333984, "learning_rate": 6.466549758877686e-06, "loss": 12.1382, "step": 41300 }, { "epoch": 17.970605813637597, "grad_norm": 21.809062957763672, "learning_rate": 6.462165716790882e-06, "loss": 12.1727, "step": 41350 }, { "epoch": 17.99233903830481, "grad_norm": 12.314726829528809, "learning_rate": 6.457781674704078e-06, "loss": 12.1355, "step": 41400 }, { "epoch": 18.013909263787014, "grad_norm": 22.337913513183594, "learning_rate": 6.453397632617273e-06, "loss": 12.0397, "step": 41450 }, { "epoch": 18.035642488454226, "grad_norm": 14.111494064331055, "learning_rate": 6.44901359053047e-06, "loss": 12.0939, "step": 41500 }, { "epoch": 18.057375713121434, "grad_norm": 16.706897735595703, "learning_rate": 6.444629548443666e-06, "loss": 12.1425, "step": 41550 }, { "epoch": 18.079108937788643, "grad_norm": 26.20379066467285, "learning_rate": 6.440245506356861e-06, "loss": 12.1265, "step": 41600 }, { "epoch": 18.100842162455855, "grad_norm": 14.789849281311035, "learning_rate": 6.4358614642700574e-06, "loss": 12.1377, "step": 41650 }, { "epoch": 18.122575387123064, "grad_norm": 34.11836242675781, "learning_rate": 6.431477422183253e-06, "loss": 12.1273, "step": 41700 }, { "epoch": 18.144308611790276, "grad_norm": 32.26976013183594, "learning_rate": 6.42709338009645e-06, "loss": 12.0853, "step": 41750 }, { "epoch": 18.166041836457484, "grad_norm": 19.59932518005371, "learning_rate": 6.422709338009645e-06, "loss": 12.0887, "step": 41800 }, { "epoch": 18.187775061124693, "grad_norm": 16.68062400817871, "learning_rate": 6.4183252959228415e-06, "loss": 12.148, "step": 41850 }, { "epoch": 18.209508285791905, "grad_norm": 18.44430923461914, "learning_rate": 6.413941253836037e-06, "loss": 12.132, "step": 41900 }, { "epoch": 18.231241510459114, "grad_norm": 23.202688217163086, "learning_rate": 6.409557211749233e-06, "loss": 12.1606, "step": 41950 }, { "epoch": 18.252974735126326, "grad_norm": 11.007984161376953, "learning_rate": 6.405173169662429e-06, "loss": 12.1526, "step": 42000 }, { "epoch": 18.274707959793535, "grad_norm": 43.34115219116211, "learning_rate": 6.400789127575625e-06, "loss": 12.1461, "step": 42050 }, { "epoch": 18.296441184460743, "grad_norm": 21.273698806762695, "learning_rate": 6.396405085488822e-06, "loss": 12.116, "step": 42100 }, { "epoch": 18.318174409127955, "grad_norm": 21.992979049682617, "learning_rate": 6.392021043402017e-06, "loss": 12.0956, "step": 42150 }, { "epoch": 18.339907633795164, "grad_norm": 10.890033721923828, "learning_rate": 6.3876370013152135e-06, "loss": 12.0999, "step": 42200 }, { "epoch": 18.361640858462373, "grad_norm": 10.554021835327148, "learning_rate": 6.383252959228409e-06, "loss": 12.1158, "step": 42250 }, { "epoch": 18.383374083129585, "grad_norm": 11.385374069213867, "learning_rate": 6.378868917141605e-06, "loss": 12.1541, "step": 42300 }, { "epoch": 18.405107307796793, "grad_norm": 11.36735725402832, "learning_rate": 6.3744848750548005e-06, "loss": 12.119, "step": 42350 }, { "epoch": 18.426840532464006, "grad_norm": 19.5784969329834, "learning_rate": 6.370100832967997e-06, "loss": 12.1513, "step": 42400 }, { "epoch": 18.448573757131214, "grad_norm": 10.584908485412598, "learning_rate": 6.365716790881194e-06, "loss": 12.1572, "step": 42450 }, { "epoch": 18.470306981798423, "grad_norm": 23.416278839111328, "learning_rate": 6.361332748794389e-06, "loss": 12.1384, "step": 42500 }, { "epoch": 18.492040206465635, "grad_norm": 22.098583221435547, "learning_rate": 6.3569487067075854e-06, "loss": 12.1005, "step": 42550 }, { "epoch": 18.513773431132844, "grad_norm": 27.949371337890625, "learning_rate": 6.352564664620781e-06, "loss": 12.1248, "step": 42600 }, { "epoch": 18.535506655800056, "grad_norm": 13.4013090133667, "learning_rate": 6.348180622533977e-06, "loss": 12.1335, "step": 42650 }, { "epoch": 18.557239880467264, "grad_norm": 19.233583450317383, "learning_rate": 6.3437965804471724e-06, "loss": 12.1416, "step": 42700 }, { "epoch": 18.578973105134473, "grad_norm": 17.514616012573242, "learning_rate": 6.339412538360369e-06, "loss": 12.1561, "step": 42750 }, { "epoch": 18.600706329801685, "grad_norm": 17.83085823059082, "learning_rate": 6.335028496273564e-06, "loss": 12.1655, "step": 42800 }, { "epoch": 18.622439554468894, "grad_norm": 32.00389099121094, "learning_rate": 6.33064445418676e-06, "loss": 12.1489, "step": 42850 }, { "epoch": 18.644172779136106, "grad_norm": 36.5909309387207, "learning_rate": 6.326260412099957e-06, "loss": 12.1353, "step": 42900 }, { "epoch": 18.665906003803315, "grad_norm": 19.841901779174805, "learning_rate": 6.321876370013153e-06, "loss": 12.1237, "step": 42950 }, { "epoch": 18.687639228470523, "grad_norm": 12.05302619934082, "learning_rate": 6.317492327926349e-06, "loss": 12.0931, "step": 43000 }, { "epoch": 18.709372453137735, "grad_norm": 51.42092514038086, "learning_rate": 6.313108285839544e-06, "loss": 12.0907, "step": 43050 }, { "epoch": 18.731105677804944, "grad_norm": 21.547746658325195, "learning_rate": 6.308724243752741e-06, "loss": 12.1051, "step": 43100 }, { "epoch": 18.752838902472153, "grad_norm": 17.779346466064453, "learning_rate": 6.304340201665936e-06, "loss": 12.1208, "step": 43150 }, { "epoch": 18.774572127139365, "grad_norm": 12.786531448364258, "learning_rate": 6.299956159579132e-06, "loss": 12.1527, "step": 43200 }, { "epoch": 18.796305351806573, "grad_norm": 15.865018844604492, "learning_rate": 6.295572117492329e-06, "loss": 12.1003, "step": 43250 }, { "epoch": 18.818038576473786, "grad_norm": 12.622864723205566, "learning_rate": 6.291188075405525e-06, "loss": 12.1439, "step": 43300 }, { "epoch": 18.839771801140994, "grad_norm": 12.189949035644531, "learning_rate": 6.286804033318721e-06, "loss": 12.132, "step": 43350 }, { "epoch": 18.861505025808203, "grad_norm": 18.03951072692871, "learning_rate": 6.282419991231916e-06, "loss": 12.1327, "step": 43400 }, { "epoch": 18.883238250475415, "grad_norm": 25.907819747924805, "learning_rate": 6.2780359491451126e-06, "loss": 12.1319, "step": 43450 }, { "epoch": 18.904971475142624, "grad_norm": 39.924564361572266, "learning_rate": 6.273651907058308e-06, "loss": 12.1779, "step": 43500 }, { "epoch": 18.926704699809836, "grad_norm": 10.564095497131348, "learning_rate": 6.269267864971504e-06, "loss": 12.1198, "step": 43550 }, { "epoch": 18.948437924477044, "grad_norm": 16.400606155395508, "learning_rate": 6.2648838228846996e-06, "loss": 12.1314, "step": 43600 }, { "epoch": 18.970171149144253, "grad_norm": 16.357927322387695, "learning_rate": 6.260499780797896e-06, "loss": 12.1305, "step": 43650 }, { "epoch": 18.991904373811465, "grad_norm": 18.073299407958984, "learning_rate": 6.256115738711093e-06, "loss": 12.1585, "step": 43700 }, { "epoch": 19.01347459929367, "grad_norm": 14.831045150756836, "learning_rate": 6.251731696624288e-06, "loss": 12.023, "step": 43750 }, { "epoch": 19.03520782396088, "grad_norm": 20.606718063354492, "learning_rate": 6.2473476545374845e-06, "loss": 12.0678, "step": 43800 }, { "epoch": 19.05694104862809, "grad_norm": 20.03177261352539, "learning_rate": 6.24296361245068e-06, "loss": 12.1054, "step": 43850 }, { "epoch": 19.0786742732953, "grad_norm": 16.764787673950195, "learning_rate": 6.238579570363876e-06, "loss": 12.076, "step": 43900 }, { "epoch": 19.10040749796251, "grad_norm": 20.074857711791992, "learning_rate": 6.2341955282770715e-06, "loss": 12.0784, "step": 43950 }, { "epoch": 19.12214072262972, "grad_norm": 14.84661865234375, "learning_rate": 6.229811486190268e-06, "loss": 12.0933, "step": 44000 }, { "epoch": 19.14387394729693, "grad_norm": 17.447168350219727, "learning_rate": 6.225427444103463e-06, "loss": 12.0883, "step": 44050 }, { "epoch": 19.16560717196414, "grad_norm": 21.10520362854004, "learning_rate": 6.22104340201666e-06, "loss": 12.0839, "step": 44100 }, { "epoch": 19.18734039663135, "grad_norm": 17.273950576782227, "learning_rate": 6.2166593599298564e-06, "loss": 12.0517, "step": 44150 }, { "epoch": 19.20907362129856, "grad_norm": 11.963603019714355, "learning_rate": 6.212275317843052e-06, "loss": 12.1211, "step": 44200 }, { "epoch": 19.23080684596577, "grad_norm": 28.02683448791504, "learning_rate": 6.207891275756248e-06, "loss": 12.1012, "step": 44250 }, { "epoch": 19.252540070632982, "grad_norm": 18.750391006469727, "learning_rate": 6.2035072336694435e-06, "loss": 12.1257, "step": 44300 }, { "epoch": 19.27427329530019, "grad_norm": 13.95964241027832, "learning_rate": 6.19912319158264e-06, "loss": 12.1096, "step": 44350 }, { "epoch": 19.2960065199674, "grad_norm": 13.954286575317383, "learning_rate": 6.194739149495835e-06, "loss": 12.096, "step": 44400 }, { "epoch": 19.31773974463461, "grad_norm": 19.977340698242188, "learning_rate": 6.190355107409032e-06, "loss": 12.1119, "step": 44450 }, { "epoch": 19.33947296930182, "grad_norm": 20.84231948852539, "learning_rate": 6.1859710653222275e-06, "loss": 12.117, "step": 44500 }, { "epoch": 19.36120619396903, "grad_norm": 15.089071273803711, "learning_rate": 6.181587023235424e-06, "loss": 12.0531, "step": 44550 }, { "epoch": 19.38293941863624, "grad_norm": 19.530078887939453, "learning_rate": 6.17720298114862e-06, "loss": 12.0606, "step": 44600 }, { "epoch": 19.40467264330345, "grad_norm": 35.273353576660156, "learning_rate": 6.172818939061815e-06, "loss": 12.0703, "step": 44650 }, { "epoch": 19.42640586797066, "grad_norm": 27.611345291137695, "learning_rate": 6.168434896975012e-06, "loss": 12.0979, "step": 44700 }, { "epoch": 19.44813909263787, "grad_norm": 12.463072776794434, "learning_rate": 6.164050854888207e-06, "loss": 12.1186, "step": 44750 }, { "epoch": 19.46987231730508, "grad_norm": 13.169920921325684, "learning_rate": 6.159666812801403e-06, "loss": 12.1214, "step": 44800 }, { "epoch": 19.49160554197229, "grad_norm": 19.480173110961914, "learning_rate": 6.155282770714599e-06, "loss": 12.0956, "step": 44850 }, { "epoch": 19.5133387666395, "grad_norm": 12.746538162231445, "learning_rate": 6.150898728627796e-06, "loss": 12.0653, "step": 44900 }, { "epoch": 19.53507199130671, "grad_norm": 20.619016647338867, "learning_rate": 6.146514686540991e-06, "loss": 12.1175, "step": 44950 }, { "epoch": 19.55680521597392, "grad_norm": 19.82939910888672, "learning_rate": 6.142130644454187e-06, "loss": 12.126, "step": 45000 }, { "epoch": 19.57853844064113, "grad_norm": 14.061666488647461, "learning_rate": 6.1377466023673836e-06, "loss": 12.0754, "step": 45050 }, { "epoch": 19.60027166530834, "grad_norm": 13.00661849975586, "learning_rate": 6.133362560280579e-06, "loss": 12.0868, "step": 45100 }, { "epoch": 19.62200488997555, "grad_norm": 14.957731246948242, "learning_rate": 6.128978518193775e-06, "loss": 12.0564, "step": 45150 }, { "epoch": 19.64373811464276, "grad_norm": 14.701393127441406, "learning_rate": 6.124594476106971e-06, "loss": 12.1071, "step": 45200 }, { "epoch": 19.66547133930997, "grad_norm": 17.358051300048828, "learning_rate": 6.120210434020168e-06, "loss": 12.118, "step": 45250 }, { "epoch": 19.68720456397718, "grad_norm": 14.36281967163086, "learning_rate": 6.115826391933363e-06, "loss": 12.1246, "step": 45300 }, { "epoch": 19.70893778864439, "grad_norm": 30.517263412475586, "learning_rate": 6.111442349846559e-06, "loss": 12.1116, "step": 45350 }, { "epoch": 19.7306710133116, "grad_norm": 16.39494514465332, "learning_rate": 6.107058307759755e-06, "loss": 12.1275, "step": 45400 }, { "epoch": 19.75240423797881, "grad_norm": 15.935347557067871, "learning_rate": 6.102674265672951e-06, "loss": 12.0961, "step": 45450 }, { "epoch": 19.77413746264602, "grad_norm": 17.69158172607422, "learning_rate": 6.098290223586147e-06, "loss": 12.1242, "step": 45500 }, { "epoch": 19.79587068731323, "grad_norm": 17.6668758392334, "learning_rate": 6.0939061814993425e-06, "loss": 12.0872, "step": 45550 }, { "epoch": 19.81760391198044, "grad_norm": 16.675373077392578, "learning_rate": 6.08952213941254e-06, "loss": 12.0705, "step": 45600 }, { "epoch": 19.83933713664765, "grad_norm": 18.560033798217773, "learning_rate": 6.085138097325735e-06, "loss": 12.098, "step": 45650 }, { "epoch": 19.86107036131486, "grad_norm": 18.61153793334961, "learning_rate": 6.080754055238931e-06, "loss": 12.1017, "step": 45700 }, { "epoch": 19.88280358598207, "grad_norm": 23.753692626953125, "learning_rate": 6.076370013152127e-06, "loss": 12.101, "step": 45750 }, { "epoch": 19.90453681064928, "grad_norm": 12.80927848815918, "learning_rate": 6.071985971065323e-06, "loss": 12.116, "step": 45800 }, { "epoch": 19.92627003531649, "grad_norm": 22.449129104614258, "learning_rate": 6.067601928978518e-06, "loss": 12.1071, "step": 45850 }, { "epoch": 19.9480032599837, "grad_norm": 53.62459945678711, "learning_rate": 6.0632178868917145e-06, "loss": 12.1369, "step": 45900 }, { "epoch": 19.96973648465091, "grad_norm": 18.846603393554688, "learning_rate": 6.058833844804911e-06, "loss": 12.099, "step": 45950 }, { "epoch": 19.99146970931812, "grad_norm": 28.6248836517334, "learning_rate": 6.054449802718106e-06, "loss": 12.1369, "step": 46000 }, { "epoch": 20.013039934800325, "grad_norm": 17.0070858001709, "learning_rate": 6.050065760631303e-06, "loss": 11.9934, "step": 46050 }, { "epoch": 20.034773159467537, "grad_norm": 30.195463180541992, "learning_rate": 6.0456817185444986e-06, "loss": 12.0606, "step": 46100 }, { "epoch": 20.056506384134746, "grad_norm": 15.557343482971191, "learning_rate": 6.041297676457695e-06, "loss": 12.0555, "step": 46150 }, { "epoch": 20.078239608801955, "grad_norm": 20.677410125732422, "learning_rate": 6.03691363437089e-06, "loss": 12.0169, "step": 46200 }, { "epoch": 20.099972833469167, "grad_norm": 8.35476016998291, "learning_rate": 6.032529592284086e-06, "loss": 12.1085, "step": 46250 }, { "epoch": 20.121706058136375, "grad_norm": 21.85611915588379, "learning_rate": 6.028145550197282e-06, "loss": 12.0724, "step": 46300 }, { "epoch": 20.143439282803588, "grad_norm": 15.336892127990723, "learning_rate": 6.023761508110478e-06, "loss": 12.0564, "step": 46350 }, { "epoch": 20.165172507470796, "grad_norm": 14.198942184448242, "learning_rate": 6.019377466023675e-06, "loss": 12.0629, "step": 46400 }, { "epoch": 20.186905732138005, "grad_norm": 19.750280380249023, "learning_rate": 6.0149934239368705e-06, "loss": 12.0606, "step": 46450 }, { "epoch": 20.208638956805217, "grad_norm": 23.643993377685547, "learning_rate": 6.010609381850067e-06, "loss": 12.0624, "step": 46500 }, { "epoch": 20.230372181472426, "grad_norm": 15.32921028137207, "learning_rate": 6.006225339763262e-06, "loss": 12.0515, "step": 46550 }, { "epoch": 20.252105406139634, "grad_norm": 18.966848373413086, "learning_rate": 6.001841297676458e-06, "loss": 12.0743, "step": 46600 }, { "epoch": 20.273838630806846, "grad_norm": 15.885478973388672, "learning_rate": 5.997457255589654e-06, "loss": 12.068, "step": 46650 }, { "epoch": 20.295571855474055, "grad_norm": 25.81429672241211, "learning_rate": 5.99307321350285e-06, "loss": 12.1066, "step": 46700 }, { "epoch": 20.317305080141267, "grad_norm": 14.397024154663086, "learning_rate": 5.988689171416045e-06, "loss": 12.1048, "step": 46750 }, { "epoch": 20.339038304808476, "grad_norm": 38.001121520996094, "learning_rate": 5.984305129329242e-06, "loss": 12.0548, "step": 46800 }, { "epoch": 20.360771529475684, "grad_norm": 19.49797248840332, "learning_rate": 5.979921087242439e-06, "loss": 12.0747, "step": 46850 }, { "epoch": 20.382504754142897, "grad_norm": 13.953147888183594, "learning_rate": 5.975537045155634e-06, "loss": 12.0704, "step": 46900 }, { "epoch": 20.404237978810105, "grad_norm": 33.00684356689453, "learning_rate": 5.97115300306883e-06, "loss": 12.0737, "step": 46950 }, { "epoch": 20.425971203477317, "grad_norm": 14.40523910522461, "learning_rate": 5.966768960982026e-06, "loss": 12.0644, "step": 47000 }, { "epoch": 20.447704428144526, "grad_norm": 17.341297149658203, "learning_rate": 5.962384918895222e-06, "loss": 12.0375, "step": 47050 }, { "epoch": 20.469437652811735, "grad_norm": 11.500914573669434, "learning_rate": 5.958000876808417e-06, "loss": 12.0957, "step": 47100 }, { "epoch": 20.491170877478947, "grad_norm": 14.926876068115234, "learning_rate": 5.9536168347216135e-06, "loss": 12.0661, "step": 47150 }, { "epoch": 20.512904102146155, "grad_norm": 33.41230392456055, "learning_rate": 5.949232792634809e-06, "loss": 12.0683, "step": 47200 }, { "epoch": 20.534637326813368, "grad_norm": 11.592459678649902, "learning_rate": 5.944848750548006e-06, "loss": 12.0852, "step": 47250 }, { "epoch": 20.556370551480576, "grad_norm": 11.893900871276855, "learning_rate": 5.940464708461202e-06, "loss": 12.0927, "step": 47300 }, { "epoch": 20.578103776147785, "grad_norm": 19.416147232055664, "learning_rate": 5.936080666374398e-06, "loss": 12.0571, "step": 47350 }, { "epoch": 20.599837000814997, "grad_norm": 114.77404022216797, "learning_rate": 5.931696624287594e-06, "loss": 12.0694, "step": 47400 }, { "epoch": 20.621570225482206, "grad_norm": 22.660274505615234, "learning_rate": 5.927312582200789e-06, "loss": 12.0863, "step": 47450 }, { "epoch": 20.643303450149418, "grad_norm": 27.254777908325195, "learning_rate": 5.9229285401139855e-06, "loss": 12.0506, "step": 47500 }, { "epoch": 20.665036674816626, "grad_norm": 18.767820358276367, "learning_rate": 5.918544498027181e-06, "loss": 12.0552, "step": 47550 }, { "epoch": 20.686769899483835, "grad_norm": 12.995434761047363, "learning_rate": 5.914160455940378e-06, "loss": 12.0879, "step": 47600 }, { "epoch": 20.708503124151047, "grad_norm": 14.814035415649414, "learning_rate": 5.909776413853573e-06, "loss": 12.0959, "step": 47650 }, { "epoch": 20.730236348818256, "grad_norm": 25.315176010131836, "learning_rate": 5.90539237176677e-06, "loss": 12.0976, "step": 47700 }, { "epoch": 20.751969573485464, "grad_norm": 25.416751861572266, "learning_rate": 5.901008329679966e-06, "loss": 12.0528, "step": 47750 }, { "epoch": 20.773702798152677, "grad_norm": 16.93905258178711, "learning_rate": 5.896624287593161e-06, "loss": 12.0705, "step": 47800 }, { "epoch": 20.795436022819885, "grad_norm": 30.060588836669922, "learning_rate": 5.8922402455063574e-06, "loss": 12.0466, "step": 47850 }, { "epoch": 20.817169247487097, "grad_norm": 13.423187255859375, "learning_rate": 5.887856203419553e-06, "loss": 12.0681, "step": 47900 }, { "epoch": 20.838902472154306, "grad_norm": 13.607131004333496, "learning_rate": 5.883472161332749e-06, "loss": 12.0687, "step": 47950 }, { "epoch": 20.860635696821515, "grad_norm": 22.271543502807617, "learning_rate": 5.8790881192459444e-06, "loss": 12.1039, "step": 48000 }, { "epoch": 20.882368921488727, "grad_norm": 25.268817901611328, "learning_rate": 5.8747040771591415e-06, "loss": 12.09, "step": 48050 }, { "epoch": 20.904102146155935, "grad_norm": 15.665398597717285, "learning_rate": 5.870320035072338e-06, "loss": 12.0956, "step": 48100 }, { "epoch": 20.925835370823147, "grad_norm": 21.067293167114258, "learning_rate": 5.865935992985533e-06, "loss": 12.0499, "step": 48150 }, { "epoch": 20.947568595490356, "grad_norm": 22.776708602905273, "learning_rate": 5.861551950898729e-06, "loss": 12.0762, "step": 48200 }, { "epoch": 20.969301820157565, "grad_norm": 8.629790306091309, "learning_rate": 5.857167908811925e-06, "loss": 12.0614, "step": 48250 }, { "epoch": 20.991035044824777, "grad_norm": 15.550890922546387, "learning_rate": 5.852783866725121e-06, "loss": 12.0792, "step": 48300 }, { "epoch": 21.01260527030698, "grad_norm": 12.225948333740234, "learning_rate": 5.848399824638316e-06, "loss": 11.9374, "step": 48350 }, { "epoch": 21.034338494974193, "grad_norm": 14.14416790008545, "learning_rate": 5.8440157825515135e-06, "loss": 12.0157, "step": 48400 }, { "epoch": 21.056071719641402, "grad_norm": 17.12042236328125, "learning_rate": 5.839631740464709e-06, "loss": 12.0288, "step": 48450 }, { "epoch": 21.07780494430861, "grad_norm": 13.070446968078613, "learning_rate": 5.835247698377905e-06, "loss": 12.0528, "step": 48500 }, { "epoch": 21.099538168975823, "grad_norm": 22.833274841308594, "learning_rate": 5.830863656291101e-06, "loss": 12.0479, "step": 48550 }, { "epoch": 21.12127139364303, "grad_norm": 19.790773391723633, "learning_rate": 5.826479614204297e-06, "loss": 12.044, "step": 48600 }, { "epoch": 21.143004618310243, "grad_norm": 16.40357208251953, "learning_rate": 5.822095572117493e-06, "loss": 12.0299, "step": 48650 }, { "epoch": 21.164737842977452, "grad_norm": 13.88508129119873, "learning_rate": 5.817711530030688e-06, "loss": 12.0255, "step": 48700 }, { "epoch": 21.18647106764466, "grad_norm": 18.211301803588867, "learning_rate": 5.813327487943885e-06, "loss": 12.0283, "step": 48750 }, { "epoch": 21.208204292311873, "grad_norm": 13.291574478149414, "learning_rate": 5.80894344585708e-06, "loss": 12.0283, "step": 48800 }, { "epoch": 21.22993751697908, "grad_norm": 15.905344009399414, "learning_rate": 5.804559403770277e-06, "loss": 12.0526, "step": 48850 }, { "epoch": 21.251670741646294, "grad_norm": 11.572737693786621, "learning_rate": 5.800175361683472e-06, "loss": 12.0322, "step": 48900 }, { "epoch": 21.273403966313502, "grad_norm": 17.022083282470703, "learning_rate": 5.795791319596669e-06, "loss": 12.0426, "step": 48950 }, { "epoch": 21.29513719098071, "grad_norm": 23.31209945678711, "learning_rate": 5.791407277509865e-06, "loss": 12.0233, "step": 49000 }, { "epoch": 21.316870415647923, "grad_norm": 23.8966007232666, "learning_rate": 5.78702323542306e-06, "loss": 12.0526, "step": 49050 }, { "epoch": 21.33860364031513, "grad_norm": 17.35943031311035, "learning_rate": 5.7826391933362565e-06, "loss": 12.0379, "step": 49100 }, { "epoch": 21.36033686498234, "grad_norm": 33.082645416259766, "learning_rate": 5.778255151249452e-06, "loss": 12.0407, "step": 49150 }, { "epoch": 21.382070089649552, "grad_norm": 13.810714721679688, "learning_rate": 5.773871109162649e-06, "loss": 12.0193, "step": 49200 }, { "epoch": 21.40380331431676, "grad_norm": 15.985318183898926, "learning_rate": 5.769487067075844e-06, "loss": 12.0437, "step": 49250 }, { "epoch": 21.425536538983973, "grad_norm": 11.185006141662598, "learning_rate": 5.765103024989041e-06, "loss": 12.0347, "step": 49300 }, { "epoch": 21.44726976365118, "grad_norm": 13.088438034057617, "learning_rate": 5.760718982902236e-06, "loss": 12.036, "step": 49350 }, { "epoch": 21.46900298831839, "grad_norm": 35.933502197265625, "learning_rate": 5.756334940815432e-06, "loss": 12.0709, "step": 49400 }, { "epoch": 21.490736212985603, "grad_norm": 13.896368026733398, "learning_rate": 5.7519508987286285e-06, "loss": 12.0181, "step": 49450 }, { "epoch": 21.51246943765281, "grad_norm": 15.991681098937988, "learning_rate": 5.747566856641824e-06, "loss": 12.0274, "step": 49500 }, { "epoch": 21.534202662320023, "grad_norm": 21.10006332397461, "learning_rate": 5.743182814555021e-06, "loss": 12.0587, "step": 49550 }, { "epoch": 21.555935886987232, "grad_norm": 18.29193115234375, "learning_rate": 5.738798772468216e-06, "loss": 12.028, "step": 49600 }, { "epoch": 21.57766911165444, "grad_norm": 27.753482818603516, "learning_rate": 5.7344147303814125e-06, "loss": 11.9988, "step": 49650 }, { "epoch": 21.599402336321653, "grad_norm": 24.744070053100586, "learning_rate": 5.730030688294608e-06, "loss": 12.0743, "step": 49700 }, { "epoch": 21.62113556098886, "grad_norm": 21.145042419433594, "learning_rate": 5.725646646207804e-06, "loss": 12.0425, "step": 49750 }, { "epoch": 21.64286878565607, "grad_norm": 13.751763343811035, "learning_rate": 5.7212626041209996e-06, "loss": 12.077, "step": 49800 }, { "epoch": 21.664602010323282, "grad_norm": 31.52511978149414, "learning_rate": 5.716878562034196e-06, "loss": 12.0228, "step": 49850 }, { "epoch": 21.68633523499049, "grad_norm": 51.40691375732422, "learning_rate": 5.712494519947393e-06, "loss": 12.0487, "step": 49900 }, { "epoch": 21.708068459657703, "grad_norm": 12.909490585327148, "learning_rate": 5.708110477860587e-06, "loss": 12.0468, "step": 49950 }, { "epoch": 21.72980168432491, "grad_norm": 14.6589937210083, "learning_rate": 5.7037264357737845e-06, "loss": 12.0168, "step": 50000 }, { "epoch": 21.72980168432491, "eval_cer": 0.07568846975176824, "eval_loss": 2.362048864364624, "eval_runtime": 397.6775, "eval_samples_per_second": 13.594, "eval_steps_per_second": 3.4, "eval_wer": 0.22898414193750982, "step": 50000 }, { "epoch": 21.75153490899212, "grad_norm": 20.892807006835938, "learning_rate": 5.69934239368698e-06, "loss": 12.021, "step": 50050 }, { "epoch": 21.773268133659332, "grad_norm": 14.854979515075684, "learning_rate": 5.694958351600176e-06, "loss": 12.0355, "step": 50100 }, { "epoch": 21.79500135832654, "grad_norm": 18.140365600585938, "learning_rate": 5.6905743095133715e-06, "loss": 12.0173, "step": 50150 }, { "epoch": 21.816734582993753, "grad_norm": 17.70104217529297, "learning_rate": 5.686190267426568e-06, "loss": 12.0801, "step": 50200 }, { "epoch": 21.83846780766096, "grad_norm": 18.51262092590332, "learning_rate": 5.681806225339763e-06, "loss": 12.0334, "step": 50250 }, { "epoch": 21.86020103232817, "grad_norm": 15.687026023864746, "learning_rate": 5.677422183252959e-06, "loss": 12.0553, "step": 50300 }, { "epoch": 21.881934256995383, "grad_norm": 19.184951782226562, "learning_rate": 5.6730381411661564e-06, "loss": 12.0409, "step": 50350 }, { "epoch": 21.90366748166259, "grad_norm": 18.097457885742188, "learning_rate": 5.668654099079352e-06, "loss": 12.058, "step": 50400 }, { "epoch": 21.925400706329803, "grad_norm": 26.270936965942383, "learning_rate": 5.664270056992548e-06, "loss": 12.064, "step": 50450 }, { "epoch": 21.947133930997012, "grad_norm": 26.288280487060547, "learning_rate": 5.6598860149057434e-06, "loss": 12.034, "step": 50500 }, { "epoch": 21.96886715566422, "grad_norm": 10.051491737365723, "learning_rate": 5.65550197281894e-06, "loss": 12.0676, "step": 50550 }, { "epoch": 21.990600380331433, "grad_norm": 15.91609001159668, "learning_rate": 5.651117930732135e-06, "loss": 12.0488, "step": 50600 }, { "epoch": 22.012170605813637, "grad_norm": 16.341890335083008, "learning_rate": 5.646733888645331e-06, "loss": 11.9307, "step": 50650 }, { "epoch": 22.03390383048085, "grad_norm": 17.389766693115234, "learning_rate": 5.642349846558527e-06, "loss": 11.9959, "step": 50700 }, { "epoch": 22.055637055148058, "grad_norm": 15.45628547668457, "learning_rate": 5.637965804471724e-06, "loss": 12.0322, "step": 50750 }, { "epoch": 22.077370279815266, "grad_norm": 14.2662935256958, "learning_rate": 5.63358176238492e-06, "loss": 12.0679, "step": 50800 }, { "epoch": 22.09910350448248, "grad_norm": 18.397008895874023, "learning_rate": 5.629197720298115e-06, "loss": 12.0007, "step": 50850 }, { "epoch": 22.120836729149687, "grad_norm": 14.498343467712402, "learning_rate": 5.624813678211312e-06, "loss": 11.9903, "step": 50900 }, { "epoch": 22.1425699538169, "grad_norm": 26.300201416015625, "learning_rate": 5.620429636124507e-06, "loss": 12.0488, "step": 50950 }, { "epoch": 22.164303178484108, "grad_norm": 17.42373275756836, "learning_rate": 5.616045594037703e-06, "loss": 12.0156, "step": 51000 }, { "epoch": 22.186036403151316, "grad_norm": 13.430180549621582, "learning_rate": 5.611661551950899e-06, "loss": 12.0147, "step": 51050 }, { "epoch": 22.20776962781853, "grad_norm": 8.827760696411133, "learning_rate": 5.607277509864095e-06, "loss": 12.0464, "step": 51100 }, { "epoch": 22.229502852485737, "grad_norm": 13.834342002868652, "learning_rate": 5.60289346777729e-06, "loss": 11.9739, "step": 51150 }, { "epoch": 22.251236077152946, "grad_norm": 15.042898178100586, "learning_rate": 5.598509425690487e-06, "loss": 12.0098, "step": 51200 }, { "epoch": 22.272969301820158, "grad_norm": 19.06934356689453, "learning_rate": 5.5941253836036836e-06, "loss": 11.9855, "step": 51250 }, { "epoch": 22.294702526487367, "grad_norm": 11.361977577209473, "learning_rate": 5.589741341516879e-06, "loss": 12.0193, "step": 51300 }, { "epoch": 22.31643575115458, "grad_norm": 19.977092742919922, "learning_rate": 5.585357299430075e-06, "loss": 12.0072, "step": 51350 }, { "epoch": 22.338168975821787, "grad_norm": 18.312875747680664, "learning_rate": 5.5809732573432706e-06, "loss": 12.0161, "step": 51400 }, { "epoch": 22.359902200488996, "grad_norm": 10.536518096923828, "learning_rate": 5.576589215256467e-06, "loss": 12.0285, "step": 51450 }, { "epoch": 22.381635425156208, "grad_norm": 15.011421203613281, "learning_rate": 5.572205173169662e-06, "loss": 11.9876, "step": 51500 }, { "epoch": 22.403368649823417, "grad_norm": 17.05405616760254, "learning_rate": 5.567821131082859e-06, "loss": 12.0425, "step": 51550 }, { "epoch": 22.42510187449063, "grad_norm": 16.87340545654297, "learning_rate": 5.563437088996055e-06, "loss": 12.0218, "step": 51600 }, { "epoch": 22.446835099157838, "grad_norm": 19.586755752563477, "learning_rate": 5.559053046909251e-06, "loss": 12.032, "step": 51650 }, { "epoch": 22.468568323825046, "grad_norm": 27.009822845458984, "learning_rate": 5.554669004822447e-06, "loss": 12.0083, "step": 51700 }, { "epoch": 22.49030154849226, "grad_norm": 11.635884284973145, "learning_rate": 5.5502849627356425e-06, "loss": 12.025, "step": 51750 }, { "epoch": 22.512034773159467, "grad_norm": 17.531131744384766, "learning_rate": 5.545900920648839e-06, "loss": 12.0123, "step": 51800 }, { "epoch": 22.53376799782668, "grad_norm": 10.203145980834961, "learning_rate": 5.541516878562034e-06, "loss": 12.0013, "step": 51850 }, { "epoch": 22.555501222493888, "grad_norm": 19.1767635345459, "learning_rate": 5.537132836475231e-06, "loss": 12.0279, "step": 51900 }, { "epoch": 22.577234447161096, "grad_norm": 31.68284034729004, "learning_rate": 5.532748794388426e-06, "loss": 12.0053, "step": 51950 }, { "epoch": 22.59896767182831, "grad_norm": 10.772562980651855, "learning_rate": 5.528364752301623e-06, "loss": 12.0153, "step": 52000 }, { "epoch": 22.620700896495517, "grad_norm": 99.19184875488281, "learning_rate": 5.523980710214818e-06, "loss": 12.0059, "step": 52050 }, { "epoch": 22.642434121162726, "grad_norm": 20.737354278564453, "learning_rate": 5.5195966681280145e-06, "loss": 12.0263, "step": 52100 }, { "epoch": 22.664167345829938, "grad_norm": 15.494745254516602, "learning_rate": 5.515212626041211e-06, "loss": 12.0129, "step": 52150 }, { "epoch": 22.685900570497147, "grad_norm": 34.782100677490234, "learning_rate": 5.510828583954406e-06, "loss": 12.0497, "step": 52200 }, { "epoch": 22.70763379516436, "grad_norm": 18.235090255737305, "learning_rate": 5.506444541867602e-06, "loss": 11.9992, "step": 52250 }, { "epoch": 22.729367019831567, "grad_norm": 27.689912796020508, "learning_rate": 5.502060499780798e-06, "loss": 12.0023, "step": 52300 }, { "epoch": 22.751100244498776, "grad_norm": 18.36990737915039, "learning_rate": 5.497676457693995e-06, "loss": 12.0056, "step": 52350 }, { "epoch": 22.772833469165988, "grad_norm": 18.038314819335938, "learning_rate": 5.49329241560719e-06, "loss": 12.0212, "step": 52400 }, { "epoch": 22.794566693833197, "grad_norm": 8.06163501739502, "learning_rate": 5.488908373520386e-06, "loss": 12.0274, "step": 52450 }, { "epoch": 22.81629991850041, "grad_norm": 15.676831245422363, "learning_rate": 5.484524331433583e-06, "loss": 12.0148, "step": 52500 }, { "epoch": 22.838033143167618, "grad_norm": 24.74848747253418, "learning_rate": 5.480140289346778e-06, "loss": 12.0186, "step": 52550 }, { "epoch": 22.859766367834826, "grad_norm": 10.006168365478516, "learning_rate": 5.475756247259974e-06, "loss": 12.0071, "step": 52600 }, { "epoch": 22.88149959250204, "grad_norm": 10.135807991027832, "learning_rate": 5.47137220517317e-06, "loss": 12.0224, "step": 52650 }, { "epoch": 22.903232817169247, "grad_norm": 16.03304100036621, "learning_rate": 5.466988163086367e-06, "loss": 12.0253, "step": 52700 }, { "epoch": 22.92496604183646, "grad_norm": 15.307913780212402, "learning_rate": 5.462604120999562e-06, "loss": 12.0234, "step": 52750 }, { "epoch": 22.946699266503668, "grad_norm": 27.5895938873291, "learning_rate": 5.458220078912758e-06, "loss": 12.0162, "step": 52800 }, { "epoch": 22.968432491170876, "grad_norm": 14.608256340026855, "learning_rate": 5.453836036825954e-06, "loss": 12.0005, "step": 52850 }, { "epoch": 22.99016571583809, "grad_norm": 41.10546112060547, "learning_rate": 5.44945199473915e-06, "loss": 12.0735, "step": 52900 }, { "epoch": 23.011735941320293, "grad_norm": 12.675127983093262, "learning_rate": 5.445067952652346e-06, "loss": 11.9152, "step": 52950 }, { "epoch": 23.033469165987505, "grad_norm": 16.779767990112305, "learning_rate": 5.440683910565542e-06, "loss": 11.9743, "step": 53000 }, { "epoch": 23.055202390654713, "grad_norm": 29.24107551574707, "learning_rate": 5.436299868478739e-06, "loss": 11.9844, "step": 53050 }, { "epoch": 23.076935615321922, "grad_norm": 15.517463684082031, "learning_rate": 5.431915826391933e-06, "loss": 12.0084, "step": 53100 }, { "epoch": 23.098668839989134, "grad_norm": 14.068320274353027, "learning_rate": 5.42753178430513e-06, "loss": 11.982, "step": 53150 }, { "epoch": 23.120402064656343, "grad_norm": 13.296953201293945, "learning_rate": 5.423147742218326e-06, "loss": 12.0076, "step": 53200 }, { "epoch": 23.142135289323555, "grad_norm": 11.365141868591309, "learning_rate": 5.418763700131522e-06, "loss": 11.9825, "step": 53250 }, { "epoch": 23.163868513990764, "grad_norm": 11.649621963500977, "learning_rate": 5.414379658044717e-06, "loss": 11.9874, "step": 53300 }, { "epoch": 23.185601738657972, "grad_norm": 12.506479263305664, "learning_rate": 5.4099956159579135e-06, "loss": 12.0203, "step": 53350 }, { "epoch": 23.207334963325184, "grad_norm": 26.387269973754883, "learning_rate": 5.40561157387111e-06, "loss": 11.9718, "step": 53400 }, { "epoch": 23.229068187992393, "grad_norm": 30.277488708496094, "learning_rate": 5.401227531784305e-06, "loss": 11.9922, "step": 53450 }, { "epoch": 23.2508014126596, "grad_norm": 16.27001953125, "learning_rate": 5.396843489697502e-06, "loss": 12.0103, "step": 53500 }, { "epoch": 23.272534637326814, "grad_norm": 10.601898193359375, "learning_rate": 5.392459447610698e-06, "loss": 11.982, "step": 53550 }, { "epoch": 23.294267861994022, "grad_norm": 16.928091049194336, "learning_rate": 5.388075405523894e-06, "loss": 11.9921, "step": 53600 }, { "epoch": 23.316001086661235, "grad_norm": 17.180408477783203, "learning_rate": 5.383691363437089e-06, "loss": 11.9681, "step": 53650 }, { "epoch": 23.337734311328443, "grad_norm": 9.645658493041992, "learning_rate": 5.3793073213502855e-06, "loss": 11.9921, "step": 53700 }, { "epoch": 23.359467535995652, "grad_norm": 7.888517379760742, "learning_rate": 5.374923279263481e-06, "loss": 11.9957, "step": 53750 }, { "epoch": 23.381200760662864, "grad_norm": 23.52006721496582, "learning_rate": 5.370539237176677e-06, "loss": 11.9913, "step": 53800 }, { "epoch": 23.402933985330073, "grad_norm": 17.327842712402344, "learning_rate": 5.366155195089874e-06, "loss": 11.985, "step": 53850 }, { "epoch": 23.424667209997285, "grad_norm": 15.461244583129883, "learning_rate": 5.3617711530030696e-06, "loss": 11.9856, "step": 53900 }, { "epoch": 23.446400434664493, "grad_norm": 10.2888822555542, "learning_rate": 5.357387110916266e-06, "loss": 12.0014, "step": 53950 }, { "epoch": 23.468133659331702, "grad_norm": 16.063997268676758, "learning_rate": 5.353003068829461e-06, "loss": 11.997, "step": 54000 }, { "epoch": 23.489866883998914, "grad_norm": 28.185026168823242, "learning_rate": 5.3486190267426574e-06, "loss": 11.9855, "step": 54050 }, { "epoch": 23.511600108666123, "grad_norm": 16.92442512512207, "learning_rate": 5.344234984655853e-06, "loss": 12.0206, "step": 54100 }, { "epoch": 23.533333333333335, "grad_norm": 6.245467662811279, "learning_rate": 5.339850942569049e-06, "loss": 11.9748, "step": 54150 }, { "epoch": 23.555066558000544, "grad_norm": 14.348546981811523, "learning_rate": 5.3354669004822444e-06, "loss": 11.9609, "step": 54200 }, { "epoch": 23.576799782667752, "grad_norm": 10.864014625549316, "learning_rate": 5.331082858395441e-06, "loss": 11.9947, "step": 54250 }, { "epoch": 23.598533007334964, "grad_norm": 8.79773998260498, "learning_rate": 5.326698816308638e-06, "loss": 12.0031, "step": 54300 }, { "epoch": 23.620266232002173, "grad_norm": 19.14083480834961, "learning_rate": 5.322314774221833e-06, "loss": 11.9738, "step": 54350 }, { "epoch": 23.64199945666938, "grad_norm": 10.049248695373535, "learning_rate": 5.317930732135029e-06, "loss": 11.9514, "step": 54400 }, { "epoch": 23.663732681336594, "grad_norm": 11.119285583496094, "learning_rate": 5.313546690048225e-06, "loss": 11.9914, "step": 54450 }, { "epoch": 23.685465906003802, "grad_norm": 8.268950462341309, "learning_rate": 5.309162647961421e-06, "loss": 11.994, "step": 54500 }, { "epoch": 23.707199130671015, "grad_norm": 14.429734230041504, "learning_rate": 5.304778605874616e-06, "loss": 11.975, "step": 54550 }, { "epoch": 23.728932355338223, "grad_norm": 15.248434066772461, "learning_rate": 5.300394563787813e-06, "loss": 11.9967, "step": 54600 }, { "epoch": 23.750665580005432, "grad_norm": 27.12610626220703, "learning_rate": 5.296010521701008e-06, "loss": 12.0066, "step": 54650 }, { "epoch": 23.772398804672644, "grad_norm": 11.624201774597168, "learning_rate": 5.291626479614205e-06, "loss": 11.9857, "step": 54700 }, { "epoch": 23.794132029339853, "grad_norm": 38.6632194519043, "learning_rate": 5.287242437527401e-06, "loss": 12.0068, "step": 54750 }, { "epoch": 23.815865254007065, "grad_norm": 21.433034896850586, "learning_rate": 5.282858395440597e-06, "loss": 11.9545, "step": 54800 }, { "epoch": 23.837598478674273, "grad_norm": 12.88279914855957, "learning_rate": 5.278474353353793e-06, "loss": 11.9675, "step": 54850 }, { "epoch": 23.859331703341482, "grad_norm": 11.213829040527344, "learning_rate": 5.274090311266988e-06, "loss": 11.9907, "step": 54900 }, { "epoch": 23.881064928008694, "grad_norm": 32.87601852416992, "learning_rate": 5.2697062691801846e-06, "loss": 12.0041, "step": 54950 }, { "epoch": 23.902798152675903, "grad_norm": 12.214354515075684, "learning_rate": 5.26532222709338e-06, "loss": 12.0013, "step": 55000 }, { "epoch": 23.924531377343115, "grad_norm": 18.823352813720703, "learning_rate": 5.260938185006577e-06, "loss": 12.0205, "step": 55050 }, { "epoch": 23.946264602010324, "grad_norm": 11.764278411865234, "learning_rate": 5.2565541429197716e-06, "loss": 12.0045, "step": 55100 }, { "epoch": 23.967997826677532, "grad_norm": 33.26872253417969, "learning_rate": 5.252170100832969e-06, "loss": 11.9852, "step": 55150 }, { "epoch": 23.989731051344744, "grad_norm": 20.137388229370117, "learning_rate": 5.247786058746165e-06, "loss": 12.0023, "step": 55200 }, { "epoch": 24.01130127682695, "grad_norm": 13.359118461608887, "learning_rate": 5.24340201665936e-06, "loss": 11.8893, "step": 55250 }, { "epoch": 24.03303450149416, "grad_norm": 12.654318809509277, "learning_rate": 5.2390179745725565e-06, "loss": 11.9913, "step": 55300 }, { "epoch": 24.05476772616137, "grad_norm": 12.723244667053223, "learning_rate": 5.234633932485752e-06, "loss": 11.9835, "step": 55350 }, { "epoch": 24.076500950828578, "grad_norm": 10.007128715515137, "learning_rate": 5.230249890398948e-06, "loss": 11.9639, "step": 55400 }, { "epoch": 24.09823417549579, "grad_norm": 24.932937622070312, "learning_rate": 5.2258658483121435e-06, "loss": 11.9567, "step": 55450 }, { "epoch": 24.119967400163, "grad_norm": 13.288817405700684, "learning_rate": 5.221481806225341e-06, "loss": 11.9896, "step": 55500 }, { "epoch": 24.14170062483021, "grad_norm": 24.153135299682617, "learning_rate": 5.217097764138536e-06, "loss": 11.9458, "step": 55550 }, { "epoch": 24.16343384949742, "grad_norm": 21.456832885742188, "learning_rate": 5.212713722051732e-06, "loss": 11.9637, "step": 55600 }, { "epoch": 24.185167074164628, "grad_norm": 11.885467529296875, "learning_rate": 5.2083296799649284e-06, "loss": 11.9763, "step": 55650 }, { "epoch": 24.20690029883184, "grad_norm": 18.14926528930664, "learning_rate": 5.203945637878124e-06, "loss": 11.9793, "step": 55700 }, { "epoch": 24.22863352349905, "grad_norm": 10.626521110534668, "learning_rate": 5.19956159579132e-06, "loss": 11.9717, "step": 55750 }, { "epoch": 24.250366748166257, "grad_norm": 18.046018600463867, "learning_rate": 5.1951775537045155e-06, "loss": 11.9679, "step": 55800 }, { "epoch": 24.27209997283347, "grad_norm": 19.871051788330078, "learning_rate": 5.1907935116177125e-06, "loss": 11.9655, "step": 55850 }, { "epoch": 24.29383319750068, "grad_norm": 26.990354537963867, "learning_rate": 5.186409469530908e-06, "loss": 11.9776, "step": 55900 }, { "epoch": 24.31556642216789, "grad_norm": 13.593362808227539, "learning_rate": 5.182025427444104e-06, "loss": 11.9765, "step": 55950 }, { "epoch": 24.3372996468351, "grad_norm": 21.99699592590332, "learning_rate": 5.1776413853572995e-06, "loss": 11.9698, "step": 56000 }, { "epoch": 24.359032871502308, "grad_norm": 17.28653335571289, "learning_rate": 5.173257343270496e-06, "loss": 11.9668, "step": 56050 }, { "epoch": 24.38076609616952, "grad_norm": 46.031005859375, "learning_rate": 5.168873301183692e-06, "loss": 11.9729, "step": 56100 }, { "epoch": 24.40249932083673, "grad_norm": 32.24114227294922, "learning_rate": 5.164489259096887e-06, "loss": 11.9543, "step": 56150 }, { "epoch": 24.42423254550394, "grad_norm": 32.9847297668457, "learning_rate": 5.160105217010084e-06, "loss": 11.9631, "step": 56200 }, { "epoch": 24.44596577017115, "grad_norm": 28.538616180419922, "learning_rate": 5.155721174923279e-06, "loss": 11.9914, "step": 56250 }, { "epoch": 24.467698994838358, "grad_norm": 10.636951446533203, "learning_rate": 5.151337132836476e-06, "loss": 11.9533, "step": 56300 }, { "epoch": 24.48943221950557, "grad_norm": 18.541378021240234, "learning_rate": 5.1469530907496715e-06, "loss": 11.9635, "step": 56350 }, { "epoch": 24.51116544417278, "grad_norm": 15.477215766906738, "learning_rate": 5.142569048662868e-06, "loss": 11.973, "step": 56400 }, { "epoch": 24.53289866883999, "grad_norm": 8.257668495178223, "learning_rate": 5.138185006576063e-06, "loss": 11.9541, "step": 56450 }, { "epoch": 24.5546318935072, "grad_norm": 12.362825393676758, "learning_rate": 5.133800964489259e-06, "loss": 11.9543, "step": 56500 }, { "epoch": 24.576365118174408, "grad_norm": 18.897563934326172, "learning_rate": 5.1294169224024556e-06, "loss": 11.9828, "step": 56550 }, { "epoch": 24.59809834284162, "grad_norm": 22.83639907836914, "learning_rate": 5.125032880315651e-06, "loss": 11.9907, "step": 56600 }, { "epoch": 24.61983156750883, "grad_norm": 26.016014099121094, "learning_rate": 5.120648838228848e-06, "loss": 11.9798, "step": 56650 }, { "epoch": 24.641564792176037, "grad_norm": 7.745444297790527, "learning_rate": 5.1162647961420434e-06, "loss": 12.0051, "step": 56700 }, { "epoch": 24.66329801684325, "grad_norm": 14.89815616607666, "learning_rate": 5.11188075405524e-06, "loss": 11.9648, "step": 56750 }, { "epoch": 24.68503124151046, "grad_norm": 13.663446426391602, "learning_rate": 5.107496711968435e-06, "loss": 11.9961, "step": 56800 }, { "epoch": 24.70676446617767, "grad_norm": 15.474350929260254, "learning_rate": 5.103112669881631e-06, "loss": 11.9687, "step": 56850 }, { "epoch": 24.72849769084488, "grad_norm": 32.1036376953125, "learning_rate": 5.0987286277948275e-06, "loss": 12.0102, "step": 56900 }, { "epoch": 24.750230915512088, "grad_norm": 21.14737892150879, "learning_rate": 5.094344585708023e-06, "loss": 11.9577, "step": 56950 }, { "epoch": 24.7719641401793, "grad_norm": 26.35091781616211, "learning_rate": 5.08996054362122e-06, "loss": 11.9979, "step": 57000 }, { "epoch": 24.79369736484651, "grad_norm": 40.08930587768555, "learning_rate": 5.085576501534415e-06, "loss": 11.956, "step": 57050 }, { "epoch": 24.81543058951372, "grad_norm": 21.480506896972656, "learning_rate": 5.081192459447612e-06, "loss": 11.9701, "step": 57100 }, { "epoch": 24.83716381418093, "grad_norm": 12.940244674682617, "learning_rate": 5.076808417360807e-06, "loss": 11.9642, "step": 57150 }, { "epoch": 24.858897038848138, "grad_norm": 14.284876823425293, "learning_rate": 5.072424375274003e-06, "loss": 11.9604, "step": 57200 }, { "epoch": 24.88063026351535, "grad_norm": 9.244315147399902, "learning_rate": 5.068040333187199e-06, "loss": 11.9762, "step": 57250 }, { "epoch": 24.90236348818256, "grad_norm": 21.19985580444336, "learning_rate": 5.063656291100395e-06, "loss": 11.9824, "step": 57300 }, { "epoch": 24.92409671284977, "grad_norm": 20.60128402709961, "learning_rate": 5.059272249013591e-06, "loss": 11.9888, "step": 57350 }, { "epoch": 24.94582993751698, "grad_norm": 22.071367263793945, "learning_rate": 5.0548882069267865e-06, "loss": 11.9722, "step": 57400 }, { "epoch": 24.967563162184188, "grad_norm": 19.631771087646484, "learning_rate": 5.0505041648399836e-06, "loss": 11.9691, "step": 57450 }, { "epoch": 24.9892963868514, "grad_norm": 11.300741195678711, "learning_rate": 5.046120122753179e-06, "loss": 11.9764, "step": 57500 }, { "epoch": 25.010866612333604, "grad_norm": 28.297489166259766, "learning_rate": 5.041736080666375e-06, "loss": 11.8502, "step": 57550 }, { "epoch": 25.032599837000816, "grad_norm": 19.681974411010742, "learning_rate": 5.0373520385795706e-06, "loss": 11.9632, "step": 57600 }, { "epoch": 25.054333061668025, "grad_norm": 9.978123664855957, "learning_rate": 5.032967996492767e-06, "loss": 11.9736, "step": 57650 }, { "epoch": 25.076066286335234, "grad_norm": 22.59627342224121, "learning_rate": 5.028583954405962e-06, "loss": 11.9202, "step": 57700 }, { "epoch": 25.097799511002446, "grad_norm": 15.177567481994629, "learning_rate": 5.024199912319158e-06, "loss": 11.9432, "step": 57750 }, { "epoch": 25.119532735669654, "grad_norm": 11.103377342224121, "learning_rate": 5.0198158702323555e-06, "loss": 11.933, "step": 57800 }, { "epoch": 25.141265960336867, "grad_norm": 15.902565956115723, "learning_rate": 5.015431828145551e-06, "loss": 11.9292, "step": 57850 }, { "epoch": 25.162999185004075, "grad_norm": 21.157047271728516, "learning_rate": 5.011047786058747e-06, "loss": 11.9355, "step": 57900 }, { "epoch": 25.184732409671284, "grad_norm": 15.4396333694458, "learning_rate": 5.0066637439719425e-06, "loss": 11.9417, "step": 57950 }, { "epoch": 25.206465634338496, "grad_norm": 17.689163208007812, "learning_rate": 5.002279701885139e-06, "loss": 11.9411, "step": 58000 }, { "epoch": 25.228198859005705, "grad_norm": 20.323307037353516, "learning_rate": 4.997895659798335e-06, "loss": 11.9537, "step": 58050 }, { "epoch": 25.249932083672913, "grad_norm": 11.056938171386719, "learning_rate": 4.99351161771153e-06, "loss": 11.9289, "step": 58100 }, { "epoch": 25.271665308340125, "grad_norm": 13.280766487121582, "learning_rate": 4.989127575624727e-06, "loss": 11.9525, "step": 58150 }, { "epoch": 25.293398533007334, "grad_norm": 19.21057891845703, "learning_rate": 4.984743533537922e-06, "loss": 11.9329, "step": 58200 }, { "epoch": 25.315131757674546, "grad_norm": 11.26260757446289, "learning_rate": 4.980359491451118e-06, "loss": 11.9324, "step": 58250 }, { "epoch": 25.336864982341755, "grad_norm": 23.691085815429688, "learning_rate": 4.9759754493643145e-06, "loss": 11.9377, "step": 58300 }, { "epoch": 25.358598207008963, "grad_norm": 14.544368743896484, "learning_rate": 4.97159140727751e-06, "loss": 11.9555, "step": 58350 }, { "epoch": 25.380331431676176, "grad_norm": 30.192901611328125, "learning_rate": 4.967207365190706e-06, "loss": 11.9389, "step": 58400 }, { "epoch": 25.402064656343384, "grad_norm": 13.255487442016602, "learning_rate": 4.962823323103902e-06, "loss": 11.9655, "step": 58450 }, { "epoch": 25.423797881010596, "grad_norm": 21.28059959411621, "learning_rate": 4.9584392810170985e-06, "loss": 11.9339, "step": 58500 }, { "epoch": 25.445531105677805, "grad_norm": 19.402381896972656, "learning_rate": 4.954055238930294e-06, "loss": 11.9438, "step": 58550 }, { "epoch": 25.467264330345014, "grad_norm": 23.586254119873047, "learning_rate": 4.94967119684349e-06, "loss": 11.9565, "step": 58600 }, { "epoch": 25.488997555012226, "grad_norm": 12.589113235473633, "learning_rate": 4.945287154756686e-06, "loss": 11.9255, "step": 58650 }, { "epoch": 25.510730779679434, "grad_norm": 13.459474563598633, "learning_rate": 4.940903112669882e-06, "loss": 11.9577, "step": 58700 }, { "epoch": 25.532464004346647, "grad_norm": 44.4463005065918, "learning_rate": 4.936519070583078e-06, "loss": 11.9748, "step": 58750 }, { "epoch": 25.554197229013855, "grad_norm": 17.335121154785156, "learning_rate": 4.932135028496273e-06, "loss": 11.9485, "step": 58800 }, { "epoch": 25.575930453681064, "grad_norm": 13.910146713256836, "learning_rate": 4.92775098640947e-06, "loss": 11.9251, "step": 58850 }, { "epoch": 25.597663678348276, "grad_norm": 12.966668128967285, "learning_rate": 4.923366944322666e-06, "loss": 11.9461, "step": 58900 }, { "epoch": 25.619396903015485, "grad_norm": 11.38027572631836, "learning_rate": 4.918982902235862e-06, "loss": 11.9485, "step": 58950 }, { "epoch": 25.641130127682693, "grad_norm": 19.2831974029541, "learning_rate": 4.914598860149058e-06, "loss": 11.9539, "step": 59000 }, { "epoch": 25.662863352349905, "grad_norm": 14.93049144744873, "learning_rate": 4.910214818062254e-06, "loss": 11.9358, "step": 59050 }, { "epoch": 25.684596577017114, "grad_norm": 20.345487594604492, "learning_rate": 4.90583077597545e-06, "loss": 11.963, "step": 59100 }, { "epoch": 25.706329801684326, "grad_norm": 22.333740234375, "learning_rate": 4.901446733888645e-06, "loss": 11.9593, "step": 59150 }, { "epoch": 25.728063026351535, "grad_norm": 15.723165512084961, "learning_rate": 4.897062691801842e-06, "loss": 11.9462, "step": 59200 }, { "epoch": 25.749796251018743, "grad_norm": 23.927995681762695, "learning_rate": 4.892678649715038e-06, "loss": 11.9395, "step": 59250 }, { "epoch": 25.771529475685956, "grad_norm": 9.985795974731445, "learning_rate": 4.888294607628233e-06, "loss": 11.9668, "step": 59300 }, { "epoch": 25.793262700353164, "grad_norm": 13.037304878234863, "learning_rate": 4.8839105655414294e-06, "loss": 11.9362, "step": 59350 }, { "epoch": 25.814995925020376, "grad_norm": 14.396384239196777, "learning_rate": 4.879526523454626e-06, "loss": 11.9613, "step": 59400 }, { "epoch": 25.836729149687585, "grad_norm": 12.580947875976562, "learning_rate": 4.875142481367822e-06, "loss": 11.947, "step": 59450 }, { "epoch": 25.858462374354794, "grad_norm": 9.566840171813965, "learning_rate": 4.870758439281017e-06, "loss": 11.9588, "step": 59500 }, { "epoch": 25.880195599022006, "grad_norm": 14.287603378295898, "learning_rate": 4.8663743971942135e-06, "loss": 11.9367, "step": 59550 }, { "epoch": 25.901928823689214, "grad_norm": 22.067798614501953, "learning_rate": 4.86199035510741e-06, "loss": 11.9608, "step": 59600 }, { "epoch": 25.923662048356427, "grad_norm": 18.1433162689209, "learning_rate": 4.857606313020605e-06, "loss": 11.9365, "step": 59650 }, { "epoch": 25.945395273023635, "grad_norm": 19.52138900756836, "learning_rate": 4.853222270933801e-06, "loss": 11.9533, "step": 59700 }, { "epoch": 25.967128497690844, "grad_norm": 8.619915008544922, "learning_rate": 4.848838228846997e-06, "loss": 11.9343, "step": 59750 }, { "epoch": 25.988861722358056, "grad_norm": 23.551292419433594, "learning_rate": 4.844454186760194e-06, "loss": 11.9369, "step": 59800 }, { "epoch": 26.01043194784026, "grad_norm": 11.714635848999023, "learning_rate": 4.840070144673389e-06, "loss": 11.8362, "step": 59850 }, { "epoch": 26.032165172507472, "grad_norm": 12.336874961853027, "learning_rate": 4.8356861025865855e-06, "loss": 11.8948, "step": 59900 }, { "epoch": 26.05389839717468, "grad_norm": 20.45733642578125, "learning_rate": 4.831302060499781e-06, "loss": 11.9095, "step": 59950 }, { "epoch": 26.07563162184189, "grad_norm": 19.363704681396484, "learning_rate": 4.826918018412977e-06, "loss": 11.9093, "step": 60000 }, { "epoch": 26.07563162184189, "eval_cer": 0.07668522335921395, "eval_loss": 2.3816096782684326, "eval_runtime": 394.5974, "eval_samples_per_second": 13.7, "eval_steps_per_second": 3.426, "eval_wer": 0.2290783482493327, "step": 60000 }, { "epoch": 26.0973648465091, "grad_norm": 10.920241355895996, "learning_rate": 4.822533976326173e-06, "loss": 11.8988, "step": 60050 }, { "epoch": 26.11909807117631, "grad_norm": 8.169657707214355, "learning_rate": 4.818149934239369e-06, "loss": 11.9302, "step": 60100 }, { "epoch": 26.140831295843522, "grad_norm": 21.631534576416016, "learning_rate": 4.813765892152565e-06, "loss": 11.9295, "step": 60150 }, { "epoch": 26.16256452051073, "grad_norm": 15.736180305480957, "learning_rate": 4.80938185006576e-06, "loss": 11.9402, "step": 60200 }, { "epoch": 26.18429774517794, "grad_norm": 8.994476318359375, "learning_rate": 4.804997807978957e-06, "loss": 11.9254, "step": 60250 }, { "epoch": 26.206030969845152, "grad_norm": 15.551674842834473, "learning_rate": 4.800613765892153e-06, "loss": 11.9274, "step": 60300 }, { "epoch": 26.22776419451236, "grad_norm": 8.010394096374512, "learning_rate": 4.796229723805349e-06, "loss": 11.9204, "step": 60350 }, { "epoch": 26.24949741917957, "grad_norm": 8.433065414428711, "learning_rate": 4.791845681718545e-06, "loss": 11.9123, "step": 60400 }, { "epoch": 26.27123064384678, "grad_norm": 11.69290542602539, "learning_rate": 4.787461639631741e-06, "loss": 11.9423, "step": 60450 }, { "epoch": 26.29296386851399, "grad_norm": 11.806631088256836, "learning_rate": 4.783077597544937e-06, "loss": 11.9229, "step": 60500 }, { "epoch": 26.314697093181202, "grad_norm": 9.421358108520508, "learning_rate": 4.778693555458132e-06, "loss": 11.9254, "step": 60550 }, { "epoch": 26.33643031784841, "grad_norm": 15.151471138000488, "learning_rate": 4.7743095133713285e-06, "loss": 11.9324, "step": 60600 }, { "epoch": 26.35816354251562, "grad_norm": 7.523982524871826, "learning_rate": 4.769925471284525e-06, "loss": 11.9094, "step": 60650 }, { "epoch": 26.37989676718283, "grad_norm": 7.315085411071777, "learning_rate": 4.765541429197721e-06, "loss": 11.9156, "step": 60700 }, { "epoch": 26.40162999185004, "grad_norm": 37.69257354736328, "learning_rate": 4.761157387110917e-06, "loss": 11.9491, "step": 60750 }, { "epoch": 26.423363216517252, "grad_norm": 12.536825180053711, "learning_rate": 4.756773345024113e-06, "loss": 11.937, "step": 60800 }, { "epoch": 26.44509644118446, "grad_norm": 19.952590942382812, "learning_rate": 4.752389302937309e-06, "loss": 11.9031, "step": 60850 }, { "epoch": 26.46682966585167, "grad_norm": 9.468097686767578, "learning_rate": 4.748005260850504e-06, "loss": 11.93, "step": 60900 }, { "epoch": 26.48856289051888, "grad_norm": 9.063526153564453, "learning_rate": 4.7436212187637005e-06, "loss": 11.9282, "step": 60950 }, { "epoch": 26.51029611518609, "grad_norm": 23.76058006286621, "learning_rate": 4.739237176676897e-06, "loss": 11.9431, "step": 61000 }, { "epoch": 26.532029339853302, "grad_norm": 16.783021926879883, "learning_rate": 4.734853134590092e-06, "loss": 11.9221, "step": 61050 }, { "epoch": 26.55376256452051, "grad_norm": 20.15511131286621, "learning_rate": 4.730469092503288e-06, "loss": 11.9392, "step": 61100 }, { "epoch": 26.57549578918772, "grad_norm": 14.874903678894043, "learning_rate": 4.7260850504164845e-06, "loss": 11.953, "step": 61150 }, { "epoch": 26.597229013854932, "grad_norm": 7.126718044281006, "learning_rate": 4.721701008329681e-06, "loss": 11.9446, "step": 61200 }, { "epoch": 26.61896223852214, "grad_norm": 9.697017669677734, "learning_rate": 4.717316966242876e-06, "loss": 11.9047, "step": 61250 }, { "epoch": 26.64069546318935, "grad_norm": 16.13836097717285, "learning_rate": 4.712932924156072e-06, "loss": 11.9156, "step": 61300 }, { "epoch": 26.66242868785656, "grad_norm": 10.770340919494629, "learning_rate": 4.708548882069268e-06, "loss": 11.9602, "step": 61350 }, { "epoch": 26.68416191252377, "grad_norm": 20.800886154174805, "learning_rate": 4.704164839982464e-06, "loss": 11.9496, "step": 61400 }, { "epoch": 26.705895137190982, "grad_norm": 14.415149688720703, "learning_rate": 4.69978079789566e-06, "loss": 11.8953, "step": 61450 }, { "epoch": 26.72762836185819, "grad_norm": 16.533891677856445, "learning_rate": 4.695396755808856e-06, "loss": 11.9318, "step": 61500 }, { "epoch": 26.7493615865254, "grad_norm": 12.036311149597168, "learning_rate": 4.691012713722053e-06, "loss": 11.9219, "step": 61550 }, { "epoch": 26.77109481119261, "grad_norm": 9.894879341125488, "learning_rate": 4.686628671635248e-06, "loss": 11.925, "step": 61600 }, { "epoch": 26.79282803585982, "grad_norm": 13.89318561553955, "learning_rate": 4.682244629548444e-06, "loss": 11.9642, "step": 61650 }, { "epoch": 26.814561260527032, "grad_norm": 7.87830114364624, "learning_rate": 4.67786058746164e-06, "loss": 11.9357, "step": 61700 }, { "epoch": 26.83629448519424, "grad_norm": 12.9856595993042, "learning_rate": 4.673476545374836e-06, "loss": 11.9305, "step": 61750 }, { "epoch": 26.85802770986145, "grad_norm": 9.654988288879395, "learning_rate": 4.669092503288032e-06, "loss": 11.9184, "step": 61800 }, { "epoch": 26.87976093452866, "grad_norm": 58.50657653808594, "learning_rate": 4.664708461201228e-06, "loss": 11.9274, "step": 61850 }, { "epoch": 26.90149415919587, "grad_norm": 9.662385940551758, "learning_rate": 4.660324419114424e-06, "loss": 11.9326, "step": 61900 }, { "epoch": 26.92322738386308, "grad_norm": 11.249975204467773, "learning_rate": 4.655940377027619e-06, "loss": 11.9301, "step": 61950 }, { "epoch": 26.94496060853029, "grad_norm": 14.355755805969238, "learning_rate": 4.651556334940816e-06, "loss": 11.925, "step": 62000 }, { "epoch": 26.9666938331975, "grad_norm": 55.27675247192383, "learning_rate": 4.647172292854012e-06, "loss": 11.9072, "step": 62050 }, { "epoch": 26.988427057864712, "grad_norm": 9.871424674987793, "learning_rate": 4.642788250767208e-06, "loss": 11.9371, "step": 62100 }, { "epoch": 27.009997283346916, "grad_norm": 11.582411766052246, "learning_rate": 4.638404208680404e-06, "loss": 11.8118, "step": 62150 }, { "epoch": 27.031730508014128, "grad_norm": 7.072801113128662, "learning_rate": 4.6340201665935995e-06, "loss": 11.8716, "step": 62200 }, { "epoch": 27.053463732681337, "grad_norm": 12.715493202209473, "learning_rate": 4.629636124506796e-06, "loss": 11.9066, "step": 62250 }, { "epoch": 27.075196957348545, "grad_norm": 13.285543441772461, "learning_rate": 4.625252082419991e-06, "loss": 11.9065, "step": 62300 }, { "epoch": 27.096930182015758, "grad_norm": 14.948770523071289, "learning_rate": 4.620868040333187e-06, "loss": 11.8932, "step": 62350 }, { "epoch": 27.118663406682966, "grad_norm": 7.0187296867370605, "learning_rate": 4.616483998246384e-06, "loss": 11.9063, "step": 62400 }, { "epoch": 27.14039663135018, "grad_norm": 11.898140907287598, "learning_rate": 4.61209995615958e-06, "loss": 11.8861, "step": 62450 }, { "epoch": 27.162129856017387, "grad_norm": 7.729825496673584, "learning_rate": 4.607715914072775e-06, "loss": 11.9102, "step": 62500 }, { "epoch": 27.183863080684596, "grad_norm": 9.05493450164795, "learning_rate": 4.6033318719859715e-06, "loss": 11.901, "step": 62550 }, { "epoch": 27.205596305351808, "grad_norm": 48.41245651245117, "learning_rate": 4.598947829899168e-06, "loss": 11.9217, "step": 62600 }, { "epoch": 27.227329530019016, "grad_norm": 8.19921875, "learning_rate": 4.594563787812363e-06, "loss": 11.9103, "step": 62650 }, { "epoch": 27.249062754686225, "grad_norm": 7.067399024963379, "learning_rate": 4.590179745725559e-06, "loss": 11.9102, "step": 62700 }, { "epoch": 27.270795979353437, "grad_norm": 10.219547271728516, "learning_rate": 4.5857957036387556e-06, "loss": 11.9086, "step": 62750 }, { "epoch": 27.292529204020646, "grad_norm": 11.2730073928833, "learning_rate": 4.581411661551951e-06, "loss": 11.8907, "step": 62800 }, { "epoch": 27.314262428687858, "grad_norm": 23.644775390625, "learning_rate": 4.577027619465147e-06, "loss": 11.9194, "step": 62850 }, { "epoch": 27.335995653355067, "grad_norm": 13.088956832885742, "learning_rate": 4.572643577378343e-06, "loss": 11.9178, "step": 62900 }, { "epoch": 27.357728878022275, "grad_norm": 12.945446968078613, "learning_rate": 4.56825953529154e-06, "loss": 11.9127, "step": 62950 }, { "epoch": 27.379462102689487, "grad_norm": 7.951735019683838, "learning_rate": 4.563875493204735e-06, "loss": 11.9237, "step": 63000 }, { "epoch": 27.401195327356696, "grad_norm": 13.66278076171875, "learning_rate": 4.559491451117931e-06, "loss": 11.8985, "step": 63050 }, { "epoch": 27.422928552023908, "grad_norm": 6.567673683166504, "learning_rate": 4.555107409031127e-06, "loss": 11.9311, "step": 63100 }, { "epoch": 27.444661776691117, "grad_norm": 11.139328956604004, "learning_rate": 4.550723366944323e-06, "loss": 11.9207, "step": 63150 }, { "epoch": 27.466395001358325, "grad_norm": 18.506877899169922, "learning_rate": 4.546339324857519e-06, "loss": 11.9053, "step": 63200 }, { "epoch": 27.488128226025538, "grad_norm": 16.45941925048828, "learning_rate": 4.5419552827707145e-06, "loss": 11.9132, "step": 63250 }, { "epoch": 27.509861450692746, "grad_norm": 13.74703311920166, "learning_rate": 4.537571240683912e-06, "loss": 11.9032, "step": 63300 }, { "epoch": 27.531594675359955, "grad_norm": 5.686723232269287, "learning_rate": 4.533187198597107e-06, "loss": 11.9135, "step": 63350 }, { "epoch": 27.553327900027167, "grad_norm": 47.760013580322266, "learning_rate": 4.528803156510303e-06, "loss": 11.9263, "step": 63400 }, { "epoch": 27.575061124694376, "grad_norm": 13.832674026489258, "learning_rate": 4.524419114423499e-06, "loss": 11.9114, "step": 63450 }, { "epoch": 27.596794349361588, "grad_norm": 22.621736526489258, "learning_rate": 4.520035072336695e-06, "loss": 11.9134, "step": 63500 }, { "epoch": 27.618527574028796, "grad_norm": 13.379792213439941, "learning_rate": 4.515651030249891e-06, "loss": 11.9058, "step": 63550 }, { "epoch": 27.640260798696005, "grad_norm": 12.987919807434082, "learning_rate": 4.5112669881630865e-06, "loss": 11.9083, "step": 63600 }, { "epoch": 27.661994023363217, "grad_norm": 16.87094497680664, "learning_rate": 4.506882946076283e-06, "loss": 11.908, "step": 63650 }, { "epoch": 27.683727248030426, "grad_norm": 9.978212356567383, "learning_rate": 4.502498903989478e-06, "loss": 11.8919, "step": 63700 }, { "epoch": 27.705460472697638, "grad_norm": 13.08248519897461, "learning_rate": 4.498114861902675e-06, "loss": 11.9, "step": 63750 }, { "epoch": 27.727193697364847, "grad_norm": 14.08407974243164, "learning_rate": 4.4937308198158706e-06, "loss": 11.9366, "step": 63800 }, { "epoch": 27.748926922032055, "grad_norm": 11.779139518737793, "learning_rate": 4.489346777729067e-06, "loss": 11.9216, "step": 63850 }, { "epoch": 27.770660146699267, "grad_norm": 7.019837856292725, "learning_rate": 4.484962735642262e-06, "loss": 11.9144, "step": 63900 }, { "epoch": 27.792393371366476, "grad_norm": 8.715902328491211, "learning_rate": 4.480578693555458e-06, "loss": 11.9335, "step": 63950 }, { "epoch": 27.814126596033688, "grad_norm": 17.31736183166504, "learning_rate": 4.476194651468655e-06, "loss": 11.9164, "step": 64000 }, { "epoch": 27.835859820700897, "grad_norm": 7.397292613983154, "learning_rate": 4.47181060938185e-06, "loss": 11.8935, "step": 64050 }, { "epoch": 27.857593045368105, "grad_norm": 15.1404447555542, "learning_rate": 4.467426567295046e-06, "loss": 11.9156, "step": 64100 }, { "epoch": 27.879326270035317, "grad_norm": 16.563631057739258, "learning_rate": 4.4630425252082425e-06, "loss": 11.8858, "step": 64150 }, { "epoch": 27.901059494702526, "grad_norm": 10.400628089904785, "learning_rate": 4.458658483121439e-06, "loss": 11.9083, "step": 64200 }, { "epoch": 27.92279271936974, "grad_norm": 8.129082679748535, "learning_rate": 4.454274441034634e-06, "loss": 11.9361, "step": 64250 }, { "epoch": 27.944525944036947, "grad_norm": 22.946596145629883, "learning_rate": 4.44989039894783e-06, "loss": 11.8997, "step": 64300 }, { "epoch": 27.966259168704156, "grad_norm": 17.139440536499023, "learning_rate": 4.445506356861027e-06, "loss": 11.9067, "step": 64350 }, { "epoch": 27.987992393371368, "grad_norm": 8.700691223144531, "learning_rate": 4.441122314774222e-06, "loss": 11.8941, "step": 64400 }, { "epoch": 28.009562618853572, "grad_norm": 9.634552001953125, "learning_rate": 4.436738272687418e-06, "loss": 11.7983, "step": 64450 }, { "epoch": 28.031295843520784, "grad_norm": 12.564841270446777, "learning_rate": 4.432354230600614e-06, "loss": 11.8731, "step": 64500 }, { "epoch": 28.053029068187993, "grad_norm": 10.420557022094727, "learning_rate": 4.42797018851381e-06, "loss": 11.896, "step": 64550 }, { "epoch": 28.0747622928552, "grad_norm": 13.071510314941406, "learning_rate": 4.423586146427006e-06, "loss": 11.8855, "step": 64600 }, { "epoch": 28.096495517522413, "grad_norm": 11.409537315368652, "learning_rate": 4.419202104340202e-06, "loss": 11.8987, "step": 64650 }, { "epoch": 28.118228742189622, "grad_norm": 17.64859390258789, "learning_rate": 4.4148180622533985e-06, "loss": 11.8742, "step": 64700 }, { "epoch": 28.13996196685683, "grad_norm": 8.101343154907227, "learning_rate": 4.410434020166594e-06, "loss": 11.8781, "step": 64750 }, { "epoch": 28.161695191524043, "grad_norm": 11.35251522064209, "learning_rate": 4.40604997807979e-06, "loss": 11.8891, "step": 64800 }, { "epoch": 28.18342841619125, "grad_norm": 19.521108627319336, "learning_rate": 4.4016659359929855e-06, "loss": 11.882, "step": 64850 }, { "epoch": 28.205161640858464, "grad_norm": 14.904671669006348, "learning_rate": 4.397281893906182e-06, "loss": 11.8987, "step": 64900 }, { "epoch": 28.226894865525672, "grad_norm": 11.82111644744873, "learning_rate": 4.392897851819378e-06, "loss": 11.8718, "step": 64950 }, { "epoch": 28.24862809019288, "grad_norm": 7.986074924468994, "learning_rate": 4.388513809732573e-06, "loss": 11.8931, "step": 65000 }, { "epoch": 28.270361314860093, "grad_norm": 10.135086059570312, "learning_rate": 4.38412976764577e-06, "loss": 11.8845, "step": 65050 }, { "epoch": 28.2920945395273, "grad_norm": 9.275798797607422, "learning_rate": 4.379745725558966e-06, "loss": 11.8647, "step": 65100 }, { "epoch": 28.313827764194514, "grad_norm": 7.864231586456299, "learning_rate": 4.375361683472162e-06, "loss": 11.9019, "step": 65150 }, { "epoch": 28.335560988861722, "grad_norm": 37.51991653442383, "learning_rate": 4.3709776413853575e-06, "loss": 11.8779, "step": 65200 }, { "epoch": 28.35729421352893, "grad_norm": 7.752624034881592, "learning_rate": 4.366593599298554e-06, "loss": 11.9023, "step": 65250 }, { "epoch": 28.379027438196143, "grad_norm": 12.627674102783203, "learning_rate": 4.36220955721175e-06, "loss": 11.8921, "step": 65300 }, { "epoch": 28.40076066286335, "grad_norm": 26.206846237182617, "learning_rate": 4.357825515124945e-06, "loss": 11.891, "step": 65350 }, { "epoch": 28.422493887530564, "grad_norm": 18.58912467956543, "learning_rate": 4.3534414730381416e-06, "loss": 11.908, "step": 65400 }, { "epoch": 28.444227112197773, "grad_norm": 16.89732551574707, "learning_rate": 4.349057430951337e-06, "loss": 11.8899, "step": 65450 }, { "epoch": 28.46596033686498, "grad_norm": 7.8719964027404785, "learning_rate": 4.344673388864534e-06, "loss": 11.8946, "step": 65500 }, { "epoch": 28.487693561532193, "grad_norm": 11.639144897460938, "learning_rate": 4.3402893467777294e-06, "loss": 11.9214, "step": 65550 }, { "epoch": 28.509426786199402, "grad_norm": 29.2702579498291, "learning_rate": 4.335905304690926e-06, "loss": 11.9217, "step": 65600 }, { "epoch": 28.531160010866614, "grad_norm": 48.321807861328125, "learning_rate": 4.331521262604121e-06, "loss": 11.889, "step": 65650 }, { "epoch": 28.552893235533823, "grad_norm": 12.334220886230469, "learning_rate": 4.327137220517317e-06, "loss": 11.8814, "step": 65700 }, { "epoch": 28.57462646020103, "grad_norm": 13.60355281829834, "learning_rate": 4.3227531784305135e-06, "loss": 11.9137, "step": 65750 }, { "epoch": 28.596359684868244, "grad_norm": 12.374007225036621, "learning_rate": 4.318369136343709e-06, "loss": 11.9145, "step": 65800 }, { "epoch": 28.618092909535452, "grad_norm": 15.23318862915039, "learning_rate": 4.313985094256905e-06, "loss": 11.8971, "step": 65850 }, { "epoch": 28.63982613420266, "grad_norm": 8.697155952453613, "learning_rate": 4.3096010521701005e-06, "loss": 11.8899, "step": 65900 }, { "epoch": 28.661559358869873, "grad_norm": 6.101230621337891, "learning_rate": 4.305217010083298e-06, "loss": 11.8835, "step": 65950 }, { "epoch": 28.68329258353708, "grad_norm": 30.645008087158203, "learning_rate": 4.300832967996493e-06, "loss": 11.9118, "step": 66000 }, { "epoch": 28.705025808204294, "grad_norm": 10.432790756225586, "learning_rate": 4.296448925909689e-06, "loss": 11.898, "step": 66050 }, { "epoch": 28.726759032871502, "grad_norm": 22.726320266723633, "learning_rate": 4.2920648838228855e-06, "loss": 11.8829, "step": 66100 }, { "epoch": 28.74849225753871, "grad_norm": 15.002222061157227, "learning_rate": 4.287680841736081e-06, "loss": 11.8922, "step": 66150 }, { "epoch": 28.770225482205923, "grad_norm": 16.7822208404541, "learning_rate": 4.283296799649277e-06, "loss": 11.9015, "step": 66200 }, { "epoch": 28.79195870687313, "grad_norm": 10.86782455444336, "learning_rate": 4.2789127575624725e-06, "loss": 11.9095, "step": 66250 }, { "epoch": 28.813691931540344, "grad_norm": 14.24905776977539, "learning_rate": 4.274528715475669e-06, "loss": 11.8791, "step": 66300 }, { "epoch": 28.835425156207553, "grad_norm": 8.511114120483398, "learning_rate": 4.270144673388865e-06, "loss": 11.8846, "step": 66350 }, { "epoch": 28.85715838087476, "grad_norm": 10.261749267578125, "learning_rate": 4.265760631302061e-06, "loss": 11.9029, "step": 66400 }, { "epoch": 28.878891605541973, "grad_norm": 48.72242736816406, "learning_rate": 4.261376589215257e-06, "loss": 11.9049, "step": 66450 }, { "epoch": 28.900624830209182, "grad_norm": 10.668495178222656, "learning_rate": 4.256992547128453e-06, "loss": 11.8856, "step": 66500 }, { "epoch": 28.92235805487639, "grad_norm": 7.709607124328613, "learning_rate": 4.252608505041649e-06, "loss": 11.8888, "step": 66550 }, { "epoch": 28.944091279543603, "grad_norm": 30.70176124572754, "learning_rate": 4.248224462954844e-06, "loss": 11.9187, "step": 66600 }, { "epoch": 28.96582450421081, "grad_norm": 13.879278182983398, "learning_rate": 4.243840420868041e-06, "loss": 11.8825, "step": 66650 }, { "epoch": 28.987557728878024, "grad_norm": 7.8939714431762695, "learning_rate": 4.239456378781237e-06, "loss": 11.9042, "step": 66700 }, { "epoch": 29.009127954360228, "grad_norm": 16.196550369262695, "learning_rate": 4.235072336694432e-06, "loss": 11.8091, "step": 66750 }, { "epoch": 29.03086117902744, "grad_norm": 10.502305030822754, "learning_rate": 4.2306882946076285e-06, "loss": 11.8647, "step": 66800 }, { "epoch": 29.05259440369465, "grad_norm": 28.054792404174805, "learning_rate": 4.226304252520825e-06, "loss": 11.8775, "step": 66850 }, { "epoch": 29.074327628361857, "grad_norm": 5.852464199066162, "learning_rate": 4.221920210434021e-06, "loss": 11.8717, "step": 66900 }, { "epoch": 29.09606085302907, "grad_norm": 10.438371658325195, "learning_rate": 4.217536168347216e-06, "loss": 11.8783, "step": 66950 }, { "epoch": 29.117794077696278, "grad_norm": 5.391887664794922, "learning_rate": 4.213152126260413e-06, "loss": 11.8597, "step": 67000 }, { "epoch": 29.139527302363486, "grad_norm": 15.71295166015625, "learning_rate": 4.208768084173608e-06, "loss": 11.8726, "step": 67050 }, { "epoch": 29.1612605270307, "grad_norm": 15.637112617492676, "learning_rate": 4.204384042086804e-06, "loss": 11.8549, "step": 67100 }, { "epoch": 29.182993751697907, "grad_norm": 16.201160430908203, "learning_rate": 4.2000000000000004e-06, "loss": 11.8751, "step": 67150 }, { "epoch": 29.20472697636512, "grad_norm": 18.363697052001953, "learning_rate": 4.195615957913196e-06, "loss": 11.9021, "step": 67200 }, { "epoch": 29.226460201032328, "grad_norm": 15.013435363769531, "learning_rate": 4.191231915826393e-06, "loss": 11.8747, "step": 67250 }, { "epoch": 29.248193425699537, "grad_norm": 14.785465240478516, "learning_rate": 4.186847873739588e-06, "loss": 11.8775, "step": 67300 }, { "epoch": 29.26992665036675, "grad_norm": 13.100189208984375, "learning_rate": 4.1824638316527845e-06, "loss": 11.8587, "step": 67350 }, { "epoch": 29.291659875033957, "grad_norm": 9.864031791687012, "learning_rate": 4.17807978956598e-06, "loss": 11.9118, "step": 67400 }, { "epoch": 29.31339309970117, "grad_norm": 19.341495513916016, "learning_rate": 4.173695747479176e-06, "loss": 11.8819, "step": 67450 }, { "epoch": 29.335126324368378, "grad_norm": 7.35308837890625, "learning_rate": 4.169311705392372e-06, "loss": 11.8731, "step": 67500 }, { "epoch": 29.356859549035587, "grad_norm": 8.811240196228027, "learning_rate": 4.164927663305568e-06, "loss": 11.8819, "step": 67550 }, { "epoch": 29.3785927737028, "grad_norm": 9.851766586303711, "learning_rate": 4.160543621218764e-06, "loss": 11.8942, "step": 67600 }, { "epoch": 29.400325998370008, "grad_norm": 14.708338737487793, "learning_rate": 4.156159579131959e-06, "loss": 11.8899, "step": 67650 }, { "epoch": 29.42205922303722, "grad_norm": 11.063777923583984, "learning_rate": 4.1517755370451565e-06, "loss": 11.8602, "step": 67700 }, { "epoch": 29.44379244770443, "grad_norm": 11.282812118530273, "learning_rate": 4.147391494958352e-06, "loss": 11.8651, "step": 67750 }, { "epoch": 29.465525672371637, "grad_norm": 258.5189514160156, "learning_rate": 4.143007452871548e-06, "loss": 11.8813, "step": 67800 }, { "epoch": 29.48725889703885, "grad_norm": 17.533771514892578, "learning_rate": 4.138623410784744e-06, "loss": 11.8777, "step": 67850 }, { "epoch": 29.508992121706058, "grad_norm": 9.061328887939453, "learning_rate": 4.13423936869794e-06, "loss": 11.863, "step": 67900 }, { "epoch": 29.530725346373266, "grad_norm": 14.129364013671875, "learning_rate": 4.129855326611136e-06, "loss": 11.8837, "step": 67950 }, { "epoch": 29.55245857104048, "grad_norm": 21.77886390686035, "learning_rate": 4.125471284524331e-06, "loss": 11.8897, "step": 68000 }, { "epoch": 29.574191795707687, "grad_norm": 8.441765785217285, "learning_rate": 4.1210872424375276e-06, "loss": 11.9048, "step": 68050 }, { "epoch": 29.5959250203749, "grad_norm": 11.595650672912598, "learning_rate": 4.116703200350724e-06, "loss": 11.8899, "step": 68100 }, { "epoch": 29.617658245042108, "grad_norm": 16.048147201538086, "learning_rate": 4.11231915826392e-06, "loss": 11.8787, "step": 68150 }, { "epoch": 29.639391469709317, "grad_norm": 9.9227294921875, "learning_rate": 4.1079351161771154e-06, "loss": 11.8748, "step": 68200 }, { "epoch": 29.66112469437653, "grad_norm": 9.97187614440918, "learning_rate": 4.103551074090312e-06, "loss": 11.8759, "step": 68250 }, { "epoch": 29.682857919043737, "grad_norm": 18.43181610107422, "learning_rate": 4.099167032003508e-06, "loss": 11.8683, "step": 68300 }, { "epoch": 29.70459114371095, "grad_norm": 18.20121192932129, "learning_rate": 4.094782989916703e-06, "loss": 11.8865, "step": 68350 }, { "epoch": 29.726324368378158, "grad_norm": 6.934305667877197, "learning_rate": 4.0903989478298995e-06, "loss": 11.8502, "step": 68400 }, { "epoch": 29.748057593045367, "grad_norm": 12.715697288513184, "learning_rate": 4.086014905743096e-06, "loss": 11.8733, "step": 68450 }, { "epoch": 29.76979081771258, "grad_norm": 9.016664505004883, "learning_rate": 4.081630863656291e-06, "loss": 11.8775, "step": 68500 }, { "epoch": 29.791524042379788, "grad_norm": 7.763296127319336, "learning_rate": 4.077246821569487e-06, "loss": 11.8733, "step": 68550 }, { "epoch": 29.813257267047, "grad_norm": 10.350701332092285, "learning_rate": 4.072862779482684e-06, "loss": 11.8632, "step": 68600 }, { "epoch": 29.83499049171421, "grad_norm": 6.480827331542969, "learning_rate": 4.06847873739588e-06, "loss": 11.8788, "step": 68650 }, { "epoch": 29.856723716381417, "grad_norm": 20.947677612304688, "learning_rate": 4.064094695309075e-06, "loss": 11.8773, "step": 68700 }, { "epoch": 29.87845694104863, "grad_norm": 10.931136131286621, "learning_rate": 4.0597106532222715e-06, "loss": 11.8612, "step": 68750 }, { "epoch": 29.900190165715838, "grad_norm": 10.79286003112793, "learning_rate": 4.055326611135467e-06, "loss": 11.8702, "step": 68800 }, { "epoch": 29.921923390383046, "grad_norm": 45.66188049316406, "learning_rate": 4.050942569048663e-06, "loss": 11.8613, "step": 68850 }, { "epoch": 29.94365661505026, "grad_norm": 6.688445091247559, "learning_rate": 4.046558526961859e-06, "loss": 11.8739, "step": 68900 }, { "epoch": 29.965389839717467, "grad_norm": 14.173410415649414, "learning_rate": 4.042174484875055e-06, "loss": 11.8595, "step": 68950 }, { "epoch": 29.98712306438468, "grad_norm": 13.653775215148926, "learning_rate": 4.037790442788251e-06, "loss": 11.8927, "step": 69000 }, { "epoch": 30.008693289866883, "grad_norm": 9.138008117675781, "learning_rate": 4.033406400701447e-06, "loss": 11.7683, "step": 69050 }, { "epoch": 30.030426514534096, "grad_norm": 16.62093162536621, "learning_rate": 4.029022358614643e-06, "loss": 11.8934, "step": 69100 }, { "epoch": 30.052159739201304, "grad_norm": 7.672760486602783, "learning_rate": 4.024638316527839e-06, "loss": 11.8631, "step": 69150 }, { "epoch": 30.073892963868513, "grad_norm": 8.038310050964355, "learning_rate": 4.020254274441035e-06, "loss": 11.8599, "step": 69200 }, { "epoch": 30.095626188535725, "grad_norm": 10.817283630371094, "learning_rate": 4.015870232354231e-06, "loss": 11.8627, "step": 69250 }, { "epoch": 30.117359413202934, "grad_norm": 6.556225299835205, "learning_rate": 4.011486190267427e-06, "loss": 11.8671, "step": 69300 }, { "epoch": 30.139092637870142, "grad_norm": 16.242650985717773, "learning_rate": 4.007102148180623e-06, "loss": 11.875, "step": 69350 }, { "epoch": 30.160825862537354, "grad_norm": 5.174230575561523, "learning_rate": 4.002718106093818e-06, "loss": 11.8433, "step": 69400 }, { "epoch": 30.182559087204563, "grad_norm": 7.197856426239014, "learning_rate": 3.998334064007015e-06, "loss": 11.8622, "step": 69450 }, { "epoch": 30.204292311871775, "grad_norm": 21.63473892211914, "learning_rate": 3.993950021920211e-06, "loss": 11.8656, "step": 69500 }, { "epoch": 30.226025536538984, "grad_norm": 17.78504753112793, "learning_rate": 3.989565979833407e-06, "loss": 11.8521, "step": 69550 }, { "epoch": 30.247758761206192, "grad_norm": 18.68705940246582, "learning_rate": 3.985181937746602e-06, "loss": 11.8453, "step": 69600 }, { "epoch": 30.269491985873405, "grad_norm": 7.354127407073975, "learning_rate": 3.980797895659799e-06, "loss": 11.8665, "step": 69650 }, { "epoch": 30.291225210540613, "grad_norm": 7.651024341583252, "learning_rate": 3.976413853572995e-06, "loss": 11.853, "step": 69700 }, { "epoch": 30.312958435207825, "grad_norm": 8.84490966796875, "learning_rate": 3.97202981148619e-06, "loss": 11.8501, "step": 69750 }, { "epoch": 30.334691659875034, "grad_norm": 8.941247940063477, "learning_rate": 3.9676457693993865e-06, "loss": 11.8615, "step": 69800 }, { "epoch": 30.356424884542243, "grad_norm": 13.154361724853516, "learning_rate": 3.963261727312583e-06, "loss": 11.8584, "step": 69850 }, { "epoch": 30.378158109209455, "grad_norm": 13.795583724975586, "learning_rate": 3.958877685225779e-06, "loss": 11.8604, "step": 69900 }, { "epoch": 30.399891333876663, "grad_norm": 8.268631935119629, "learning_rate": 3.954493643138974e-06, "loss": 11.859, "step": 69950 }, { "epoch": 30.421624558543876, "grad_norm": 28.959548950195312, "learning_rate": 3.9501096010521705e-06, "loss": 11.8702, "step": 70000 }, { "epoch": 30.421624558543876, "eval_cer": 0.0757471023169121, "eval_loss": 2.39117431640625, "eval_runtime": 396.909, "eval_samples_per_second": 13.62, "eval_steps_per_second": 3.406, "eval_wer": 0.22849740932642487, "step": 70000 }, { "epoch": 30.443357783211084, "grad_norm": 15.879110336303711, "learning_rate": 3.945725558965367e-06, "loss": 11.8687, "step": 70050 }, { "epoch": 30.465091007878293, "grad_norm": 14.088164329528809, "learning_rate": 3.941341516878562e-06, "loss": 11.85, "step": 70100 }, { "epoch": 30.486824232545505, "grad_norm": 5.0238752365112305, "learning_rate": 3.936957474791758e-06, "loss": 11.8533, "step": 70150 }, { "epoch": 30.508557457212714, "grad_norm": 11.336899757385254, "learning_rate": 3.932573432704954e-06, "loss": 11.8699, "step": 70200 }, { "epoch": 30.530290681879922, "grad_norm": 17.313730239868164, "learning_rate": 3.92818939061815e-06, "loss": 11.8562, "step": 70250 }, { "epoch": 30.552023906547134, "grad_norm": 28.565584182739258, "learning_rate": 3.923805348531346e-06, "loss": 11.8547, "step": 70300 }, { "epoch": 30.573757131214343, "grad_norm": 6.773772239685059, "learning_rate": 3.9194213064445425e-06, "loss": 11.8538, "step": 70350 }, { "epoch": 30.595490355881555, "grad_norm": 15.116411209106445, "learning_rate": 3.915037264357739e-06, "loss": 11.8638, "step": 70400 }, { "epoch": 30.617223580548764, "grad_norm": 9.379572868347168, "learning_rate": 3.910653222270934e-06, "loss": 11.8757, "step": 70450 }, { "epoch": 30.638956805215972, "grad_norm": 12.259918212890625, "learning_rate": 3.90626918018413e-06, "loss": 11.876, "step": 70500 }, { "epoch": 30.660690029883185, "grad_norm": 12.57608699798584, "learning_rate": 3.901885138097326e-06, "loss": 11.8507, "step": 70550 }, { "epoch": 30.682423254550393, "grad_norm": 8.661283493041992, "learning_rate": 3.897501096010522e-06, "loss": 11.8491, "step": 70600 }, { "epoch": 30.704156479217605, "grad_norm": 9.84383773803711, "learning_rate": 3.893117053923718e-06, "loss": 11.8489, "step": 70650 }, { "epoch": 30.725889703884814, "grad_norm": 9.917572975158691, "learning_rate": 3.888733011836914e-06, "loss": 11.8785, "step": 70700 }, { "epoch": 30.747622928552023, "grad_norm": 7.059745788574219, "learning_rate": 3.88434896975011e-06, "loss": 11.8737, "step": 70750 }, { "epoch": 30.769356153219235, "grad_norm": 20.44463348388672, "learning_rate": 3.879964927663306e-06, "loss": 11.8699, "step": 70800 }, { "epoch": 30.791089377886443, "grad_norm": 6.311903476715088, "learning_rate": 3.875580885576502e-06, "loss": 11.8542, "step": 70850 }, { "epoch": 30.812822602553656, "grad_norm": 6.262167930603027, "learning_rate": 3.871196843489698e-06, "loss": 11.8626, "step": 70900 }, { "epoch": 30.834555827220864, "grad_norm": 8.859283447265625, "learning_rate": 3.866812801402894e-06, "loss": 11.8909, "step": 70950 }, { "epoch": 30.856289051888073, "grad_norm": 6.593499660491943, "learning_rate": 3.86242875931609e-06, "loss": 11.8474, "step": 71000 }, { "epoch": 30.878022276555285, "grad_norm": 16.074264526367188, "learning_rate": 3.8580447172292855e-06, "loss": 11.8634, "step": 71050 }, { "epoch": 30.899755501222494, "grad_norm": 16.934633255004883, "learning_rate": 3.853660675142482e-06, "loss": 11.8481, "step": 71100 }, { "epoch": 30.921488725889702, "grad_norm": 11.176169395446777, "learning_rate": 3.849276633055677e-06, "loss": 11.8678, "step": 71150 }, { "epoch": 30.943221950556914, "grad_norm": 13.823466300964355, "learning_rate": 3.844892590968873e-06, "loss": 11.8525, "step": 71200 }, { "epoch": 30.964955175224123, "grad_norm": 12.757974624633789, "learning_rate": 3.84050854888207e-06, "loss": 11.8596, "step": 71250 }, { "epoch": 30.986688399891335, "grad_norm": 6.2555108070373535, "learning_rate": 3.836124506795266e-06, "loss": 11.8729, "step": 71300 }, { "epoch": 31.00825862537354, "grad_norm": 7.998335361480713, "learning_rate": 3.831740464708461e-06, "loss": 11.7557, "step": 71350 }, { "epoch": 31.02999185004075, "grad_norm": 7.063460826873779, "learning_rate": 3.8273564226216575e-06, "loss": 11.8673, "step": 71400 }, { "epoch": 31.05172507470796, "grad_norm": 7.559152126312256, "learning_rate": 3.822972380534854e-06, "loss": 11.8614, "step": 71450 }, { "epoch": 31.07345829937517, "grad_norm": 9.765264511108398, "learning_rate": 3.818588338448049e-06, "loss": 11.8243, "step": 71500 }, { "epoch": 31.09519152404238, "grad_norm": 8.741211891174316, "learning_rate": 3.8142042963612453e-06, "loss": 11.8631, "step": 71550 }, { "epoch": 31.11692474870959, "grad_norm": 10.110342025756836, "learning_rate": 3.809820254274441e-06, "loss": 11.8624, "step": 71600 }, { "epoch": 31.138657973376798, "grad_norm": 7.525726318359375, "learning_rate": 3.805436212187637e-06, "loss": 11.8511, "step": 71650 }, { "epoch": 31.16039119804401, "grad_norm": 6.264368057250977, "learning_rate": 3.8010521701008336e-06, "loss": 11.8447, "step": 71700 }, { "epoch": 31.18212442271122, "grad_norm": 6.522670745849609, "learning_rate": 3.7966681280140294e-06, "loss": 11.8619, "step": 71750 }, { "epoch": 31.20385764737843, "grad_norm": 17.985116958618164, "learning_rate": 3.7922840859272252e-06, "loss": 11.8723, "step": 71800 }, { "epoch": 31.22559087204564, "grad_norm": 10.094488143920898, "learning_rate": 3.787900043840421e-06, "loss": 11.8429, "step": 71850 }, { "epoch": 31.24732409671285, "grad_norm": 12.937264442443848, "learning_rate": 3.7835160017536173e-06, "loss": 11.8569, "step": 71900 }, { "epoch": 31.26905732138006, "grad_norm": 20.594358444213867, "learning_rate": 3.779131959666813e-06, "loss": 11.8438, "step": 71950 }, { "epoch": 31.29079054604727, "grad_norm": 10.052034378051758, "learning_rate": 3.774747917580009e-06, "loss": 11.8419, "step": 72000 }, { "epoch": 31.31252377071448, "grad_norm": 8.929048538208008, "learning_rate": 3.7703638754932047e-06, "loss": 11.8299, "step": 72050 }, { "epoch": 31.33425699538169, "grad_norm": 9.807400703430176, "learning_rate": 3.7659798334064014e-06, "loss": 11.8243, "step": 72100 }, { "epoch": 31.3559902200489, "grad_norm": 17.955623626708984, "learning_rate": 3.761595791319597e-06, "loss": 11.8255, "step": 72150 }, { "epoch": 31.37772344471611, "grad_norm": 19.642745971679688, "learning_rate": 3.757211749232793e-06, "loss": 11.8498, "step": 72200 }, { "epoch": 31.39945666938332, "grad_norm": 8.74807357788086, "learning_rate": 3.752827707145989e-06, "loss": 11.8492, "step": 72250 }, { "epoch": 31.42118989405053, "grad_norm": 8.516878128051758, "learning_rate": 3.748443665059185e-06, "loss": 11.8481, "step": 72300 }, { "epoch": 31.44292311871774, "grad_norm": 26.898788452148438, "learning_rate": 3.744059622972381e-06, "loss": 11.8371, "step": 72350 }, { "epoch": 31.46465634338495, "grad_norm": 6.748674392700195, "learning_rate": 3.7396755808855766e-06, "loss": 11.8486, "step": 72400 }, { "epoch": 31.48638956805216, "grad_norm": 10.551872253417969, "learning_rate": 3.7352915387987725e-06, "loss": 11.8533, "step": 72450 }, { "epoch": 31.50812279271937, "grad_norm": 14.1845703125, "learning_rate": 3.7309074967119687e-06, "loss": 11.8414, "step": 72500 }, { "epoch": 31.529856017386578, "grad_norm": 16.51775360107422, "learning_rate": 3.726523454625165e-06, "loss": 11.8535, "step": 72550 }, { "epoch": 31.55158924205379, "grad_norm": 24.120222091674805, "learning_rate": 3.7221394125383607e-06, "loss": 11.8557, "step": 72600 }, { "epoch": 31.573322466721, "grad_norm": 6.063103199005127, "learning_rate": 3.717755370451557e-06, "loss": 11.8375, "step": 72650 }, { "epoch": 31.59505569138821, "grad_norm": 11.34897232055664, "learning_rate": 3.7133713283647528e-06, "loss": 11.8438, "step": 72700 }, { "epoch": 31.61678891605542, "grad_norm": 9.746992111206055, "learning_rate": 3.7089872862779486e-06, "loss": 11.8471, "step": 72750 }, { "epoch": 31.63852214072263, "grad_norm": 8.114310264587402, "learning_rate": 3.7046032441911444e-06, "loss": 11.8412, "step": 72800 }, { "epoch": 31.66025536538984, "grad_norm": 8.393730163574219, "learning_rate": 3.70021920210434e-06, "loss": 11.837, "step": 72850 }, { "epoch": 31.68198859005705, "grad_norm": 8.245162963867188, "learning_rate": 3.6958351600175364e-06, "loss": 11.8553, "step": 72900 }, { "epoch": 31.70372181472426, "grad_norm": 7.575582981109619, "learning_rate": 3.6914511179307323e-06, "loss": 11.845, "step": 72950 }, { "epoch": 31.72545503939147, "grad_norm": 7.178465366363525, "learning_rate": 3.6870670758439285e-06, "loss": 11.8327, "step": 73000 }, { "epoch": 31.74718826405868, "grad_norm": 8.260749816894531, "learning_rate": 3.6826830337571247e-06, "loss": 11.8478, "step": 73050 }, { "epoch": 31.76892148872589, "grad_norm": 45.3736457824707, "learning_rate": 3.6782989916703205e-06, "loss": 11.8445, "step": 73100 }, { "epoch": 31.7906547133931, "grad_norm": 15.68336296081543, "learning_rate": 3.6739149495835163e-06, "loss": 11.8551, "step": 73150 }, { "epoch": 31.81238793806031, "grad_norm": 5.821103572845459, "learning_rate": 3.669530907496712e-06, "loss": 11.8517, "step": 73200 }, { "epoch": 31.83412116272752, "grad_norm": 12.418885231018066, "learning_rate": 3.665146865409908e-06, "loss": 11.8517, "step": 73250 }, { "epoch": 31.85585438739473, "grad_norm": 8.705698013305664, "learning_rate": 3.660762823323104e-06, "loss": 11.8424, "step": 73300 }, { "epoch": 31.87758761206194, "grad_norm": 9.667759895324707, "learning_rate": 3.6563787812363e-06, "loss": 11.8561, "step": 73350 }, { "epoch": 31.89932083672915, "grad_norm": 14.76951789855957, "learning_rate": 3.651994739149496e-06, "loss": 11.8605, "step": 73400 }, { "epoch": 31.921054061396358, "grad_norm": 14.691853523254395, "learning_rate": 3.6476106970626925e-06, "loss": 11.8595, "step": 73450 }, { "epoch": 31.94278728606357, "grad_norm": 7.9246721267700195, "learning_rate": 3.6432266549758883e-06, "loss": 11.8473, "step": 73500 }, { "epoch": 31.96452051073078, "grad_norm": 5.882972240447998, "learning_rate": 3.638842612889084e-06, "loss": 11.8418, "step": 73550 }, { "epoch": 31.98625373539799, "grad_norm": 6.664644718170166, "learning_rate": 3.63445857080228e-06, "loss": 11.8388, "step": 73600 }, { "epoch": 32.007823960880195, "grad_norm": 7.938138961791992, "learning_rate": 3.630074528715476e-06, "loss": 11.7538, "step": 73650 }, { "epoch": 32.029557185547404, "grad_norm": 8.011933326721191, "learning_rate": 3.625690486628672e-06, "loss": 11.8182, "step": 73700 }, { "epoch": 32.05129041021461, "grad_norm": 11.604764938354492, "learning_rate": 3.6213064445418678e-06, "loss": 11.8178, "step": 73750 }, { "epoch": 32.07302363488183, "grad_norm": 13.241369247436523, "learning_rate": 3.6169224024550636e-06, "loss": 11.8383, "step": 73800 }, { "epoch": 32.09475685954904, "grad_norm": 5.11595344543457, "learning_rate": 3.6125383603682594e-06, "loss": 11.8519, "step": 73850 }, { "epoch": 32.116490084216245, "grad_norm": 17.570140838623047, "learning_rate": 3.608154318281456e-06, "loss": 11.8329, "step": 73900 }, { "epoch": 32.138223308883454, "grad_norm": 10.764384269714355, "learning_rate": 3.603770276194652e-06, "loss": 11.8312, "step": 73950 }, { "epoch": 32.15995653355066, "grad_norm": 21.758943557739258, "learning_rate": 3.5993862341078477e-06, "loss": 11.8513, "step": 74000 }, { "epoch": 32.18168975821788, "grad_norm": 6.227720260620117, "learning_rate": 3.595002192021044e-06, "loss": 11.8468, "step": 74050 }, { "epoch": 32.20342298288509, "grad_norm": 6.502994537353516, "learning_rate": 3.5906181499342397e-06, "loss": 11.8287, "step": 74100 }, { "epoch": 32.225156207552295, "grad_norm": 8.124176025390625, "learning_rate": 3.5862341078474355e-06, "loss": 11.8244, "step": 74150 }, { "epoch": 32.246889432219504, "grad_norm": 17.224422454833984, "learning_rate": 3.5818500657606313e-06, "loss": 11.8529, "step": 74200 }, { "epoch": 32.26862265688671, "grad_norm": 16.075273513793945, "learning_rate": 3.577466023673827e-06, "loss": 11.8337, "step": 74250 }, { "epoch": 32.29035588155393, "grad_norm": 10.724888801574707, "learning_rate": 3.573081981587024e-06, "loss": 11.8234, "step": 74300 }, { "epoch": 32.31208910622114, "grad_norm": 13.913077354431152, "learning_rate": 3.5686979395002196e-06, "loss": 11.824, "step": 74350 }, { "epoch": 32.333822330888346, "grad_norm": 5.98539924621582, "learning_rate": 3.5643138974134154e-06, "loss": 11.8469, "step": 74400 }, { "epoch": 32.355555555555554, "grad_norm": 16.95889663696289, "learning_rate": 3.5599298553266117e-06, "loss": 11.8407, "step": 74450 }, { "epoch": 32.37728878022276, "grad_norm": 11.7858304977417, "learning_rate": 3.5555458132398075e-06, "loss": 11.8458, "step": 74500 }, { "epoch": 32.39902200488998, "grad_norm": 12.35476303100586, "learning_rate": 3.5511617711530033e-06, "loss": 11.8322, "step": 74550 }, { "epoch": 32.42075522955719, "grad_norm": 8.592928886413574, "learning_rate": 3.546777729066199e-06, "loss": 11.8464, "step": 74600 }, { "epoch": 32.442488454224396, "grad_norm": 15.99875259399414, "learning_rate": 3.5423936869793953e-06, "loss": 11.8412, "step": 74650 }, { "epoch": 32.464221678891604, "grad_norm": 6.029876232147217, "learning_rate": 3.538009644892591e-06, "loss": 11.8529, "step": 74700 }, { "epoch": 32.48595490355881, "grad_norm": 25.144210815429688, "learning_rate": 3.5336256028057874e-06, "loss": 11.8538, "step": 74750 }, { "epoch": 32.50768812822603, "grad_norm": 4.607775688171387, "learning_rate": 3.529241560718983e-06, "loss": 11.8652, "step": 74800 }, { "epoch": 32.52942135289324, "grad_norm": 9.385605812072754, "learning_rate": 3.5248575186321794e-06, "loss": 11.8256, "step": 74850 }, { "epoch": 32.551154577560446, "grad_norm": 8.230783462524414, "learning_rate": 3.5204734765453752e-06, "loss": 11.8065, "step": 74900 }, { "epoch": 32.572887802227655, "grad_norm": 38.624691009521484, "learning_rate": 3.516089434458571e-06, "loss": 11.8167, "step": 74950 }, { "epoch": 32.59462102689486, "grad_norm": 17.61267852783203, "learning_rate": 3.511705392371767e-06, "loss": 11.8357, "step": 75000 }, { "epoch": 32.61635425156208, "grad_norm": 6.209005355834961, "learning_rate": 3.507321350284963e-06, "loss": 11.8375, "step": 75050 }, { "epoch": 32.63808747622929, "grad_norm": 14.121482849121094, "learning_rate": 3.502937308198159e-06, "loss": 11.8476, "step": 75100 }, { "epoch": 32.659820700896496, "grad_norm": 28.74132537841797, "learning_rate": 3.4985532661113547e-06, "loss": 11.8317, "step": 75150 }, { "epoch": 32.681553925563705, "grad_norm": 6.806987762451172, "learning_rate": 3.4941692240245514e-06, "loss": 11.8527, "step": 75200 }, { "epoch": 32.70328715023091, "grad_norm": 7.174561500549316, "learning_rate": 3.489785181937747e-06, "loss": 11.8188, "step": 75250 }, { "epoch": 32.72502037489812, "grad_norm": 13.119464874267578, "learning_rate": 3.485401139850943e-06, "loss": 11.8392, "step": 75300 }, { "epoch": 32.74675359956534, "grad_norm": 8.41006851196289, "learning_rate": 3.4810170977641388e-06, "loss": 11.8283, "step": 75350 }, { "epoch": 32.768486824232546, "grad_norm": 10.47354507446289, "learning_rate": 3.4766330556773346e-06, "loss": 11.8295, "step": 75400 }, { "epoch": 32.790220048899755, "grad_norm": 7.730106353759766, "learning_rate": 3.472249013590531e-06, "loss": 11.8672, "step": 75450 }, { "epoch": 32.811953273566964, "grad_norm": 6.337311744689941, "learning_rate": 3.4678649715037266e-06, "loss": 11.8289, "step": 75500 }, { "epoch": 32.83368649823417, "grad_norm": 9.5441255569458, "learning_rate": 3.4634809294169225e-06, "loss": 11.8215, "step": 75550 }, { "epoch": 32.85541972290139, "grad_norm": 8.01675796508789, "learning_rate": 3.4590968873301183e-06, "loss": 11.8303, "step": 75600 }, { "epoch": 32.8771529475686, "grad_norm": 10.308701515197754, "learning_rate": 3.454712845243315e-06, "loss": 11.8337, "step": 75650 }, { "epoch": 32.898886172235805, "grad_norm": 8.78437614440918, "learning_rate": 3.4503288031565107e-06, "loss": 11.8283, "step": 75700 }, { "epoch": 32.920619396903014, "grad_norm": 12.1674222946167, "learning_rate": 3.4459447610697065e-06, "loss": 11.8378, "step": 75750 }, { "epoch": 32.94235262157022, "grad_norm": 11.723808288574219, "learning_rate": 3.4415607189829024e-06, "loss": 11.8242, "step": 75800 }, { "epoch": 32.96408584623744, "grad_norm": 18.23768424987793, "learning_rate": 3.4371766768960986e-06, "loss": 11.8577, "step": 75850 }, { "epoch": 32.98581907090465, "grad_norm": 23.5877742767334, "learning_rate": 3.4327926348092944e-06, "loss": 11.8222, "step": 75900 }, { "epoch": 33.007389296386854, "grad_norm": 6.948608875274658, "learning_rate": 3.42840859272249e-06, "loss": 11.7388, "step": 75950 }, { "epoch": 33.02912252105406, "grad_norm": 4.4768476486206055, "learning_rate": 3.424024550635686e-06, "loss": 11.8317, "step": 76000 }, { "epoch": 33.05085574572127, "grad_norm": 7.8470282554626465, "learning_rate": 3.4196405085488823e-06, "loss": 11.8416, "step": 76050 }, { "epoch": 33.07258897038848, "grad_norm": 7.3259053230285645, "learning_rate": 3.4152564664620785e-06, "loss": 11.8322, "step": 76100 }, { "epoch": 33.09432219505569, "grad_norm": 16.797231674194336, "learning_rate": 3.4108724243752743e-06, "loss": 11.8436, "step": 76150 }, { "epoch": 33.116055419722905, "grad_norm": 4.982487201690674, "learning_rate": 3.4064883822884705e-06, "loss": 11.799, "step": 76200 }, { "epoch": 33.13778864439011, "grad_norm": 8.252666473388672, "learning_rate": 3.4021043402016663e-06, "loss": 11.8154, "step": 76250 }, { "epoch": 33.15952186905732, "grad_norm": 9.021413803100586, "learning_rate": 3.397720298114862e-06, "loss": 11.8364, "step": 76300 }, { "epoch": 33.18125509372453, "grad_norm": 4.675612926483154, "learning_rate": 3.393336256028058e-06, "loss": 11.8131, "step": 76350 }, { "epoch": 33.20298831839174, "grad_norm": 8.468708992004395, "learning_rate": 3.3889522139412538e-06, "loss": 11.8201, "step": 76400 }, { "epoch": 33.224721543058955, "grad_norm": 21.99992561340332, "learning_rate": 3.38456817185445e-06, "loss": 11.8318, "step": 76450 }, { "epoch": 33.24645476772616, "grad_norm": 5.2964301109313965, "learning_rate": 3.3801841297676462e-06, "loss": 11.8196, "step": 76500 }, { "epoch": 33.26818799239337, "grad_norm": 12.34626579284668, "learning_rate": 3.375800087680842e-06, "loss": 11.8333, "step": 76550 }, { "epoch": 33.28992121706058, "grad_norm": 12.113372802734375, "learning_rate": 3.3714160455940383e-06, "loss": 11.8187, "step": 76600 }, { "epoch": 33.31165444172779, "grad_norm": 15.364481925964355, "learning_rate": 3.367032003507234e-06, "loss": 11.8173, "step": 76650 }, { "epoch": 33.333387666395, "grad_norm": 4.8235063552856445, "learning_rate": 3.36264796142043e-06, "loss": 11.8306, "step": 76700 }, { "epoch": 33.355120891062214, "grad_norm": 23.78803253173828, "learning_rate": 3.3582639193336257e-06, "loss": 11.8027, "step": 76750 }, { "epoch": 33.37685411572942, "grad_norm": 9.344151496887207, "learning_rate": 3.3538798772468215e-06, "loss": 11.8113, "step": 76800 }, { "epoch": 33.39858734039663, "grad_norm": 8.895915985107422, "learning_rate": 3.3494958351600178e-06, "loss": 11.8214, "step": 76850 }, { "epoch": 33.42032056506384, "grad_norm": 33.99968719482422, "learning_rate": 3.3451117930732136e-06, "loss": 11.8217, "step": 76900 }, { "epoch": 33.44205378973105, "grad_norm": 9.92707633972168, "learning_rate": 3.34072775098641e-06, "loss": 11.8012, "step": 76950 }, { "epoch": 33.463787014398264, "grad_norm": 12.355010986328125, "learning_rate": 3.336343708899606e-06, "loss": 11.8093, "step": 77000 }, { "epoch": 33.48552023906547, "grad_norm": 12.512097358703613, "learning_rate": 3.331959666812802e-06, "loss": 11.8357, "step": 77050 }, { "epoch": 33.50725346373268, "grad_norm": 4.472128391265869, "learning_rate": 3.3275756247259977e-06, "loss": 11.8175, "step": 77100 }, { "epoch": 33.52898668839989, "grad_norm": 12.460317611694336, "learning_rate": 3.3231915826391935e-06, "loss": 11.8219, "step": 77150 }, { "epoch": 33.5507199130671, "grad_norm": 10.255359649658203, "learning_rate": 3.3188075405523897e-06, "loss": 11.8135, "step": 77200 }, { "epoch": 33.572453137734314, "grad_norm": 9.60875415802002, "learning_rate": 3.3144234984655855e-06, "loss": 11.8244, "step": 77250 }, { "epoch": 33.59418636240152, "grad_norm": 7.315709590911865, "learning_rate": 3.3100394563787813e-06, "loss": 11.8137, "step": 77300 }, { "epoch": 33.61591958706873, "grad_norm": 16.642723083496094, "learning_rate": 3.305655414291977e-06, "loss": 11.8368, "step": 77350 }, { "epoch": 33.63765281173594, "grad_norm": 4.400660991668701, "learning_rate": 3.301271372205174e-06, "loss": 11.8195, "step": 77400 }, { "epoch": 33.65938603640315, "grad_norm": 8.862713813781738, "learning_rate": 3.2968873301183696e-06, "loss": 11.8168, "step": 77450 }, { "epoch": 33.681119261070364, "grad_norm": 10.427742004394531, "learning_rate": 3.2925032880315654e-06, "loss": 11.8033, "step": 77500 }, { "epoch": 33.70285248573757, "grad_norm": 6.926135540008545, "learning_rate": 3.2881192459447612e-06, "loss": 11.828, "step": 77550 }, { "epoch": 33.72458571040478, "grad_norm": 5.068178176879883, "learning_rate": 3.2837352038579575e-06, "loss": 11.8273, "step": 77600 }, { "epoch": 33.74631893507199, "grad_norm": 6.944793224334717, "learning_rate": 3.2793511617711533e-06, "loss": 11.813, "step": 77650 }, { "epoch": 33.7680521597392, "grad_norm": 36.322383880615234, "learning_rate": 3.274967119684349e-06, "loss": 11.8141, "step": 77700 }, { "epoch": 33.789785384406414, "grad_norm": 6.488020420074463, "learning_rate": 3.270583077597545e-06, "loss": 11.8322, "step": 77750 }, { "epoch": 33.81151860907362, "grad_norm": 9.435515403747559, "learning_rate": 3.2661990355107407e-06, "loss": 11.8242, "step": 77800 }, { "epoch": 33.83325183374083, "grad_norm": 4.060996055603027, "learning_rate": 3.2618149934239374e-06, "loss": 11.816, "step": 77850 }, { "epoch": 33.85498505840804, "grad_norm": 13.589747428894043, "learning_rate": 3.257430951337133e-06, "loss": 11.8091, "step": 77900 }, { "epoch": 33.87671828307525, "grad_norm": 11.052616119384766, "learning_rate": 3.253046909250329e-06, "loss": 11.8391, "step": 77950 }, { "epoch": 33.898451507742465, "grad_norm": 10.746622085571289, "learning_rate": 3.2486628671635252e-06, "loss": 11.8432, "step": 78000 }, { "epoch": 33.92018473240967, "grad_norm": 10.50125503540039, "learning_rate": 3.244278825076721e-06, "loss": 11.8408, "step": 78050 }, { "epoch": 33.94191795707688, "grad_norm": 6.73277473449707, "learning_rate": 3.239894782989917e-06, "loss": 11.8296, "step": 78100 }, { "epoch": 33.96365118174409, "grad_norm": 10.480985641479492, "learning_rate": 3.2355107409031126e-06, "loss": 11.8362, "step": 78150 }, { "epoch": 33.9853844064113, "grad_norm": 7.480873107910156, "learning_rate": 3.231126698816309e-06, "loss": 11.8262, "step": 78200 }, { "epoch": 34.00695463189351, "grad_norm": 9.988080024719238, "learning_rate": 3.2267426567295047e-06, "loss": 11.735, "step": 78250 }, { "epoch": 34.028687856560715, "grad_norm": 10.169675827026367, "learning_rate": 3.222358614642701e-06, "loss": 11.8251, "step": 78300 }, { "epoch": 34.050421081227924, "grad_norm": 13.815673828125, "learning_rate": 3.217974572555897e-06, "loss": 11.8091, "step": 78350 }, { "epoch": 34.07215430589514, "grad_norm": 10.704404830932617, "learning_rate": 3.213590530469093e-06, "loss": 11.8009, "step": 78400 }, { "epoch": 34.09388753056235, "grad_norm": 9.71978759765625, "learning_rate": 3.2092064883822888e-06, "loss": 11.8105, "step": 78450 }, { "epoch": 34.11562075522956, "grad_norm": 18.075393676757812, "learning_rate": 3.2048224462954846e-06, "loss": 11.8083, "step": 78500 }, { "epoch": 34.137353979896766, "grad_norm": 10.046432495117188, "learning_rate": 3.2004384042086804e-06, "loss": 11.7901, "step": 78550 }, { "epoch": 34.159087204563974, "grad_norm": 11.01378345489502, "learning_rate": 3.1960543621218766e-06, "loss": 11.7937, "step": 78600 }, { "epoch": 34.18082042923119, "grad_norm": 20.022729873657227, "learning_rate": 3.1916703200350724e-06, "loss": 11.8135, "step": 78650 }, { "epoch": 34.2025536538984, "grad_norm": 6.636748790740967, "learning_rate": 3.1872862779482687e-06, "loss": 11.8014, "step": 78700 }, { "epoch": 34.22428687856561, "grad_norm": 13.776731491088867, "learning_rate": 3.182902235861465e-06, "loss": 11.8353, "step": 78750 }, { "epoch": 34.246020103232816, "grad_norm": 4.75822114944458, "learning_rate": 3.1785181937746607e-06, "loss": 11.7999, "step": 78800 }, { "epoch": 34.267753327900024, "grad_norm": 11.153389930725098, "learning_rate": 3.1741341516878565e-06, "loss": 11.8159, "step": 78850 }, { "epoch": 34.28948655256724, "grad_norm": 13.353851318359375, "learning_rate": 3.1697501096010523e-06, "loss": 11.807, "step": 78900 }, { "epoch": 34.31121977723445, "grad_norm": 5.565258026123047, "learning_rate": 3.165366067514248e-06, "loss": 11.8092, "step": 78950 }, { "epoch": 34.33295300190166, "grad_norm": 16.32341194152832, "learning_rate": 3.1609820254274444e-06, "loss": 11.8032, "step": 79000 }, { "epoch": 34.354686226568866, "grad_norm": 8.501863479614258, "learning_rate": 3.15659798334064e-06, "loss": 11.8121, "step": 79050 }, { "epoch": 34.376419451236075, "grad_norm": 5.864038467407227, "learning_rate": 3.152213941253836e-06, "loss": 11.8086, "step": 79100 }, { "epoch": 34.39815267590329, "grad_norm": 12.040675163269043, "learning_rate": 3.1478298991670327e-06, "loss": 11.8134, "step": 79150 }, { "epoch": 34.4198859005705, "grad_norm": 24.84530258178711, "learning_rate": 3.1434458570802285e-06, "loss": 11.8149, "step": 79200 }, { "epoch": 34.44161912523771, "grad_norm": 11.11407470703125, "learning_rate": 3.1390618149934243e-06, "loss": 11.8105, "step": 79250 }, { "epoch": 34.463352349904916, "grad_norm": 15.913960456848145, "learning_rate": 3.13467777290662e-06, "loss": 11.8004, "step": 79300 }, { "epoch": 34.485085574572125, "grad_norm": 7.755058288574219, "learning_rate": 3.1302937308198163e-06, "loss": 11.8044, "step": 79350 }, { "epoch": 34.50681879923934, "grad_norm": 5.433537006378174, "learning_rate": 3.125909688733012e-06, "loss": 11.8161, "step": 79400 }, { "epoch": 34.52855202390655, "grad_norm": 6.0616912841796875, "learning_rate": 3.121525646646208e-06, "loss": 11.806, "step": 79450 }, { "epoch": 34.55028524857376, "grad_norm": 8.498095512390137, "learning_rate": 3.1171416045594038e-06, "loss": 11.8407, "step": 79500 }, { "epoch": 34.572018473240966, "grad_norm": 24.549198150634766, "learning_rate": 3.1127575624725996e-06, "loss": 11.8168, "step": 79550 }, { "epoch": 34.593751697908175, "grad_norm": 11.136092185974121, "learning_rate": 3.1083735203857962e-06, "loss": 11.8184, "step": 79600 }, { "epoch": 34.61548492257539, "grad_norm": 8.212324142456055, "learning_rate": 3.103989478298992e-06, "loss": 11.802, "step": 79650 }, { "epoch": 34.6372181472426, "grad_norm": 4.912588596343994, "learning_rate": 3.099605436212188e-06, "loss": 11.8164, "step": 79700 }, { "epoch": 34.65895137190981, "grad_norm": 5.911812782287598, "learning_rate": 3.095221394125384e-06, "loss": 11.8035, "step": 79750 }, { "epoch": 34.68068459657702, "grad_norm": 13.612801551818848, "learning_rate": 3.09083735203858e-06, "loss": 11.8221, "step": 79800 }, { "epoch": 34.702417821244225, "grad_norm": 7.984292030334473, "learning_rate": 3.0864533099517757e-06, "loss": 11.8073, "step": 79850 }, { "epoch": 34.724151045911434, "grad_norm": 12.358894348144531, "learning_rate": 3.0820692678649715e-06, "loss": 11.793, "step": 79900 }, { "epoch": 34.74588427057865, "grad_norm": 9.695011138916016, "learning_rate": 3.0776852257781673e-06, "loss": 11.8138, "step": 79950 }, { "epoch": 34.76761749524586, "grad_norm": 13.982564926147461, "learning_rate": 3.0733011836913636e-06, "loss": 11.8171, "step": 80000 }, { "epoch": 34.76761749524586, "eval_cer": 0.0757471023169121, "eval_loss": 2.4033260345458984, "eval_runtime": 399.2668, "eval_samples_per_second": 13.54, "eval_steps_per_second": 3.386, "eval_wer": 0.22807348092322186, "step": 80000 }, { "epoch": 34.78935071991307, "grad_norm": 12.179760932922363, "learning_rate": 3.06891714160456e-06, "loss": 11.8053, "step": 80050 }, { "epoch": 34.811083944580275, "grad_norm": 11.413451194763184, "learning_rate": 3.0645330995177556e-06, "loss": 11.8123, "step": 80100 }, { "epoch": 34.832817169247484, "grad_norm": 4.437108993530273, "learning_rate": 3.060149057430952e-06, "loss": 11.8026, "step": 80150 }, { "epoch": 34.8545503939147, "grad_norm": 73.1333236694336, "learning_rate": 3.0557650153441477e-06, "loss": 11.8005, "step": 80200 }, { "epoch": 34.87628361858191, "grad_norm": 8.468038558959961, "learning_rate": 3.0513809732573435e-06, "loss": 11.8035, "step": 80250 }, { "epoch": 34.89801684324912, "grad_norm": 6.311350345611572, "learning_rate": 3.0469969311705393e-06, "loss": 11.8177, "step": 80300 }, { "epoch": 34.919750067916326, "grad_norm": 6.435243606567383, "learning_rate": 3.0426128890837355e-06, "loss": 11.8085, "step": 80350 }, { "epoch": 34.941483292583534, "grad_norm": 23.506589889526367, "learning_rate": 3.0382288469969313e-06, "loss": 11.8198, "step": 80400 }, { "epoch": 34.96321651725075, "grad_norm": 14.792353630065918, "learning_rate": 3.033844804910127e-06, "loss": 11.8236, "step": 80450 }, { "epoch": 34.98494974191796, "grad_norm": 9.948033332824707, "learning_rate": 3.0294607628233234e-06, "loss": 11.7999, "step": 80500 }, { "epoch": 35.006519967400166, "grad_norm": 10.65179443359375, "learning_rate": 3.0250767207365196e-06, "loss": 11.7357, "step": 80550 }, { "epoch": 35.028253192067375, "grad_norm": 17.818878173828125, "learning_rate": 3.0206926786497154e-06, "loss": 11.8027, "step": 80600 }, { "epoch": 35.04998641673458, "grad_norm": 12.1105318069458, "learning_rate": 3.0163086365629112e-06, "loss": 11.8061, "step": 80650 }, { "epoch": 35.07171964140179, "grad_norm": 9.265717506408691, "learning_rate": 3.011924594476107e-06, "loss": 11.8093, "step": 80700 }, { "epoch": 35.093452866069, "grad_norm": 7.630993366241455, "learning_rate": 3.0075405523893033e-06, "loss": 11.8209, "step": 80750 }, { "epoch": 35.115186090736216, "grad_norm": 9.022866249084473, "learning_rate": 3.003156510302499e-06, "loss": 11.8025, "step": 80800 }, { "epoch": 35.136919315403425, "grad_norm": 7.761841297149658, "learning_rate": 2.998772468215695e-06, "loss": 11.8045, "step": 80850 }, { "epoch": 35.158652540070634, "grad_norm": 5.690446853637695, "learning_rate": 2.9943884261288915e-06, "loss": 11.7887, "step": 80900 }, { "epoch": 35.18038576473784, "grad_norm": 8.062559127807617, "learning_rate": 2.9900043840420874e-06, "loss": 11.8076, "step": 80950 }, { "epoch": 35.20211898940505, "grad_norm": 11.13079833984375, "learning_rate": 2.985620341955283e-06, "loss": 11.791, "step": 81000 }, { "epoch": 35.22385221407227, "grad_norm": 9.493050575256348, "learning_rate": 2.981236299868479e-06, "loss": 11.7975, "step": 81050 }, { "epoch": 35.245585438739475, "grad_norm": 5.601952075958252, "learning_rate": 2.9768522577816748e-06, "loss": 11.8132, "step": 81100 }, { "epoch": 35.267318663406684, "grad_norm": 16.74399757385254, "learning_rate": 2.972468215694871e-06, "loss": 11.8072, "step": 81150 }, { "epoch": 35.28905188807389, "grad_norm": 14.785292625427246, "learning_rate": 2.968084173608067e-06, "loss": 11.7861, "step": 81200 }, { "epoch": 35.3107851127411, "grad_norm": 4.938207149505615, "learning_rate": 2.9637001315212626e-06, "loss": 11.7976, "step": 81250 }, { "epoch": 35.33251833740831, "grad_norm": 7.194094181060791, "learning_rate": 2.9593160894344585e-06, "loss": 11.8092, "step": 81300 }, { "epoch": 35.354251562075525, "grad_norm": 8.084842681884766, "learning_rate": 2.954932047347655e-06, "loss": 11.8066, "step": 81350 }, { "epoch": 35.375984786742734, "grad_norm": 13.50389289855957, "learning_rate": 2.950548005260851e-06, "loss": 11.8166, "step": 81400 }, { "epoch": 35.39771801140994, "grad_norm": 34.29204559326172, "learning_rate": 2.9461639631740467e-06, "loss": 11.7957, "step": 81450 }, { "epoch": 35.41945123607715, "grad_norm": 115.18916320800781, "learning_rate": 2.9417799210872425e-06, "loss": 11.7966, "step": 81500 }, { "epoch": 35.44118446074436, "grad_norm": 6.231071949005127, "learning_rate": 2.9373958790004388e-06, "loss": 11.8096, "step": 81550 }, { "epoch": 35.462917685411576, "grad_norm": 12.070874214172363, "learning_rate": 2.9330118369136346e-06, "loss": 11.7912, "step": 81600 }, { "epoch": 35.484650910078784, "grad_norm": 11.014456748962402, "learning_rate": 2.9286277948268304e-06, "loss": 11.7999, "step": 81650 }, { "epoch": 35.50638413474599, "grad_norm": 7.878298759460449, "learning_rate": 2.924243752740026e-06, "loss": 11.7924, "step": 81700 }, { "epoch": 35.5281173594132, "grad_norm": 9.946538925170898, "learning_rate": 2.9198597106532224e-06, "loss": 11.7876, "step": 81750 }, { "epoch": 35.54985058408041, "grad_norm": 9.082895278930664, "learning_rate": 2.9154756685664187e-06, "loss": 11.7947, "step": 81800 }, { "epoch": 35.571583808747626, "grad_norm": 8.261942863464355, "learning_rate": 2.9110916264796145e-06, "loss": 11.8, "step": 81850 }, { "epoch": 35.593317033414834, "grad_norm": 8.274785041809082, "learning_rate": 2.9067075843928107e-06, "loss": 11.7947, "step": 81900 }, { "epoch": 35.61505025808204, "grad_norm": 12.555307388305664, "learning_rate": 2.9023235423060065e-06, "loss": 11.8046, "step": 81950 }, { "epoch": 35.63678348274925, "grad_norm": 16.864561080932617, "learning_rate": 2.8979395002192023e-06, "loss": 11.8078, "step": 82000 }, { "epoch": 35.65851670741646, "grad_norm": 7.3884782791137695, "learning_rate": 2.893555458132398e-06, "loss": 11.7997, "step": 82050 }, { "epoch": 35.680249932083676, "grad_norm": 5.330382823944092, "learning_rate": 2.889171416045594e-06, "loss": 11.7931, "step": 82100 }, { "epoch": 35.701983156750885, "grad_norm": 135.67330932617188, "learning_rate": 2.88478737395879e-06, "loss": 11.8205, "step": 82150 }, { "epoch": 35.72371638141809, "grad_norm": 11.841288566589355, "learning_rate": 2.880403331871986e-06, "loss": 11.8093, "step": 82200 }, { "epoch": 35.7454496060853, "grad_norm": 7.48586368560791, "learning_rate": 2.8760192897851822e-06, "loss": 11.7827, "step": 82250 }, { "epoch": 35.76718283075251, "grad_norm": 8.122782707214355, "learning_rate": 2.8716352476983785e-06, "loss": 11.8002, "step": 82300 }, { "epoch": 35.788916055419726, "grad_norm": 11.976639747619629, "learning_rate": 2.8672512056115743e-06, "loss": 11.7877, "step": 82350 }, { "epoch": 35.810649280086935, "grad_norm": 4.260416507720947, "learning_rate": 2.86286716352477e-06, "loss": 11.8016, "step": 82400 }, { "epoch": 35.83238250475414, "grad_norm": 6.422642230987549, "learning_rate": 2.858483121437966e-06, "loss": 11.7963, "step": 82450 }, { "epoch": 35.85411572942135, "grad_norm": 17.52088165283203, "learning_rate": 2.8540990793511617e-06, "loss": 11.8099, "step": 82500 }, { "epoch": 35.87584895408856, "grad_norm": 18.16527557373047, "learning_rate": 2.849715037264358e-06, "loss": 11.8045, "step": 82550 }, { "epoch": 35.897582178755776, "grad_norm": 21.54142189025879, "learning_rate": 2.8453309951775538e-06, "loss": 11.8147, "step": 82600 }, { "epoch": 35.919315403422985, "grad_norm": 10.738289833068848, "learning_rate": 2.8409469530907496e-06, "loss": 11.7977, "step": 82650 }, { "epoch": 35.94104862809019, "grad_norm": 7.5517144203186035, "learning_rate": 2.8365629110039462e-06, "loss": 11.8223, "step": 82700 }, { "epoch": 35.9627818527574, "grad_norm": 17.005064010620117, "learning_rate": 2.832178868917142e-06, "loss": 11.7941, "step": 82750 }, { "epoch": 35.98451507742461, "grad_norm": 20.802410125732422, "learning_rate": 2.827794826830338e-06, "loss": 11.8144, "step": 82800 }, { "epoch": 36.00608530290682, "grad_norm": 4.643016815185547, "learning_rate": 2.8234107847435337e-06, "loss": 11.7041, "step": 82850 }, { "epoch": 36.02781852757403, "grad_norm": 5.0188398361206055, "learning_rate": 2.81902674265673e-06, "loss": 11.7857, "step": 82900 }, { "epoch": 36.049551752241236, "grad_norm": 43.052833557128906, "learning_rate": 2.8146427005699257e-06, "loss": 11.7965, "step": 82950 }, { "epoch": 36.07128497690845, "grad_norm": 5.6486382484436035, "learning_rate": 2.8102586584831215e-06, "loss": 11.7801, "step": 83000 }, { "epoch": 36.09301820157566, "grad_norm": 9.257708549499512, "learning_rate": 2.8058746163963173e-06, "loss": 11.7884, "step": 83050 }, { "epoch": 36.11475142624287, "grad_norm": 9.969672203063965, "learning_rate": 2.801490574309514e-06, "loss": 11.7893, "step": 83100 }, { "epoch": 36.13648465091008, "grad_norm": 4.864919185638428, "learning_rate": 2.79710653222271e-06, "loss": 11.7891, "step": 83150 }, { "epoch": 36.158217875577286, "grad_norm": 15.945795059204102, "learning_rate": 2.7927224901359056e-06, "loss": 11.8076, "step": 83200 }, { "epoch": 36.1799511002445, "grad_norm": 8.471965789794922, "learning_rate": 2.7883384480491014e-06, "loss": 11.8079, "step": 83250 }, { "epoch": 36.20168432491171, "grad_norm": 3.7765846252441406, "learning_rate": 2.7839544059622976e-06, "loss": 11.7893, "step": 83300 }, { "epoch": 36.22341754957892, "grad_norm": 32.80738067626953, "learning_rate": 2.7795703638754935e-06, "loss": 11.8117, "step": 83350 }, { "epoch": 36.24515077424613, "grad_norm": 11.759632110595703, "learning_rate": 2.7751863217886893e-06, "loss": 11.7887, "step": 83400 }, { "epoch": 36.266883998913336, "grad_norm": 9.582806587219238, "learning_rate": 2.770802279701885e-06, "loss": 11.7988, "step": 83450 }, { "epoch": 36.28861722358055, "grad_norm": 13.065892219543457, "learning_rate": 2.7664182376150813e-06, "loss": 11.7974, "step": 83500 }, { "epoch": 36.31035044824776, "grad_norm": 25.009721755981445, "learning_rate": 2.7620341955282775e-06, "loss": 11.7938, "step": 83550 }, { "epoch": 36.33208367291497, "grad_norm": 7.72334098815918, "learning_rate": 2.7576501534414734e-06, "loss": 11.7859, "step": 83600 }, { "epoch": 36.35381689758218, "grad_norm": 8.665655136108398, "learning_rate": 2.753266111354669e-06, "loss": 11.7859, "step": 83650 }, { "epoch": 36.375550122249386, "grad_norm": 19.630573272705078, "learning_rate": 2.7488820692678654e-06, "loss": 11.7842, "step": 83700 }, { "epoch": 36.3972833469166, "grad_norm": 13.641834259033203, "learning_rate": 2.7444980271810612e-06, "loss": 11.8031, "step": 83750 }, { "epoch": 36.41901657158381, "grad_norm": 5.9598917961120605, "learning_rate": 2.740113985094257e-06, "loss": 11.7813, "step": 83800 }, { "epoch": 36.44074979625102, "grad_norm": 8.549332618713379, "learning_rate": 2.735729943007453e-06, "loss": 11.7959, "step": 83850 }, { "epoch": 36.46248302091823, "grad_norm": 4.3795857429504395, "learning_rate": 2.731345900920649e-06, "loss": 11.7963, "step": 83900 }, { "epoch": 36.484216245585436, "grad_norm": 7.300856113433838, "learning_rate": 2.726961858833845e-06, "loss": 11.7902, "step": 83950 }, { "epoch": 36.50594947025265, "grad_norm": 7.026275157928467, "learning_rate": 2.722577816747041e-06, "loss": 11.7988, "step": 84000 }, { "epoch": 36.52768269491986, "grad_norm": 12.537973403930664, "learning_rate": 2.7181937746602373e-06, "loss": 11.7891, "step": 84050 }, { "epoch": 36.54941591958707, "grad_norm": 6.903670787811279, "learning_rate": 2.713809732573433e-06, "loss": 11.7984, "step": 84100 }, { "epoch": 36.57114914425428, "grad_norm": 11.342251777648926, "learning_rate": 2.709425690486629e-06, "loss": 11.7915, "step": 84150 }, { "epoch": 36.59288236892149, "grad_norm": 10.707886695861816, "learning_rate": 2.7050416483998248e-06, "loss": 11.7955, "step": 84200 }, { "epoch": 36.614615593588695, "grad_norm": 7.921166896820068, "learning_rate": 2.7006576063130206e-06, "loss": 11.7868, "step": 84250 }, { "epoch": 36.63634881825591, "grad_norm": 9.0649995803833, "learning_rate": 2.696273564226217e-06, "loss": 11.7949, "step": 84300 }, { "epoch": 36.65808204292312, "grad_norm": 13.355379104614258, "learning_rate": 2.6918895221394126e-06, "loss": 11.7763, "step": 84350 }, { "epoch": 36.67981526759033, "grad_norm": 12.289958953857422, "learning_rate": 2.6875054800526084e-06, "loss": 11.7861, "step": 84400 }, { "epoch": 36.70154849225754, "grad_norm": 4.684927940368652, "learning_rate": 2.683121437965805e-06, "loss": 11.7803, "step": 84450 }, { "epoch": 36.723281716924745, "grad_norm": 7.917582035064697, "learning_rate": 2.678737395879001e-06, "loss": 11.799, "step": 84500 }, { "epoch": 36.74501494159196, "grad_norm": 5.413401126861572, "learning_rate": 2.6743533537921967e-06, "loss": 11.7873, "step": 84550 }, { "epoch": 36.76674816625917, "grad_norm": 14.283327102661133, "learning_rate": 2.6699693117053925e-06, "loss": 11.7867, "step": 84600 }, { "epoch": 36.78848139092638, "grad_norm": 8.163966178894043, "learning_rate": 2.6655852696185883e-06, "loss": 11.8112, "step": 84650 }, { "epoch": 36.81021461559359, "grad_norm": 7.235820770263672, "learning_rate": 2.6612012275317846e-06, "loss": 11.781, "step": 84700 }, { "epoch": 36.831947840260796, "grad_norm": 4.746747016906738, "learning_rate": 2.6568171854449804e-06, "loss": 11.7941, "step": 84750 }, { "epoch": 36.85368106492801, "grad_norm": 4.514492511749268, "learning_rate": 2.652433143358176e-06, "loss": 11.7947, "step": 84800 }, { "epoch": 36.87541428959522, "grad_norm": 10.46290111541748, "learning_rate": 2.648049101271372e-06, "loss": 11.7826, "step": 84850 }, { "epoch": 36.89714751426243, "grad_norm": 8.848064422607422, "learning_rate": 2.6436650591845687e-06, "loss": 11.7802, "step": 84900 }, { "epoch": 36.91888073892964, "grad_norm": 6.194151401519775, "learning_rate": 2.6392810170977645e-06, "loss": 11.7857, "step": 84950 }, { "epoch": 36.940613963596846, "grad_norm": 8.114167213439941, "learning_rate": 2.6348969750109603e-06, "loss": 11.7934, "step": 85000 }, { "epoch": 36.96234718826406, "grad_norm": 11.507193565368652, "learning_rate": 2.6305129329241565e-06, "loss": 11.7928, "step": 85050 }, { "epoch": 36.98408041293127, "grad_norm": 12.524467468261719, "learning_rate": 2.6261288908373523e-06, "loss": 11.7915, "step": 85100 }, { "epoch": 37.00565063841348, "grad_norm": 12.674412727355957, "learning_rate": 2.621744848750548e-06, "loss": 11.6996, "step": 85150 }, { "epoch": 37.027383863080686, "grad_norm": 4.092529773712158, "learning_rate": 2.617360806663744e-06, "loss": 11.784, "step": 85200 }, { "epoch": 37.049117087747895, "grad_norm": 4.772137641906738, "learning_rate": 2.6129767645769398e-06, "loss": 11.7883, "step": 85250 }, { "epoch": 37.070850312415104, "grad_norm": 9.263222694396973, "learning_rate": 2.608592722490136e-06, "loss": 11.7655, "step": 85300 }, { "epoch": 37.09258353708231, "grad_norm": 11.615614891052246, "learning_rate": 2.6042086804033322e-06, "loss": 11.7987, "step": 85350 }, { "epoch": 37.11431676174953, "grad_norm": 3.8077142238616943, "learning_rate": 2.599824638316528e-06, "loss": 11.7786, "step": 85400 }, { "epoch": 37.13604998641674, "grad_norm": 5.6897993087768555, "learning_rate": 2.5954405962297243e-06, "loss": 11.797, "step": 85450 }, { "epoch": 37.157783211083945, "grad_norm": 5.3308305740356445, "learning_rate": 2.59105655414292e-06, "loss": 11.7697, "step": 85500 }, { "epoch": 37.179516435751154, "grad_norm": 8.419775009155273, "learning_rate": 2.586672512056116e-06, "loss": 11.7771, "step": 85550 }, { "epoch": 37.20124966041836, "grad_norm": 4.629072189331055, "learning_rate": 2.5822884699693117e-06, "loss": 11.7899, "step": 85600 }, { "epoch": 37.22298288508557, "grad_norm": 16.38741683959961, "learning_rate": 2.5779044278825075e-06, "loss": 11.7799, "step": 85650 }, { "epoch": 37.24471610975279, "grad_norm": 39.32244110107422, "learning_rate": 2.5735203857957038e-06, "loss": 11.791, "step": 85700 }, { "epoch": 37.266449334419995, "grad_norm": 14.12863826751709, "learning_rate": 2.5691363437089e-06, "loss": 11.7906, "step": 85750 }, { "epoch": 37.288182559087204, "grad_norm": 7.304044723510742, "learning_rate": 2.564752301622096e-06, "loss": 11.7865, "step": 85800 }, { "epoch": 37.30991578375441, "grad_norm": 11.347620964050293, "learning_rate": 2.560368259535292e-06, "loss": 11.8021, "step": 85850 }, { "epoch": 37.33164900842162, "grad_norm": 9.358373641967773, "learning_rate": 2.555984217448488e-06, "loss": 11.7979, "step": 85900 }, { "epoch": 37.35338223308884, "grad_norm": 3.5885915756225586, "learning_rate": 2.5516001753616837e-06, "loss": 11.7782, "step": 85950 }, { "epoch": 37.375115457756046, "grad_norm": 9.129725456237793, "learning_rate": 2.5472161332748795e-06, "loss": 11.783, "step": 86000 }, { "epoch": 37.396848682423254, "grad_norm": 8.22261905670166, "learning_rate": 2.5428320911880757e-06, "loss": 11.7652, "step": 86050 }, { "epoch": 37.41858190709046, "grad_norm": 6.796608924865723, "learning_rate": 2.5384480491012715e-06, "loss": 11.7897, "step": 86100 }, { "epoch": 37.44031513175767, "grad_norm": 3.8288304805755615, "learning_rate": 2.5340640070144673e-06, "loss": 11.7803, "step": 86150 }, { "epoch": 37.46204835642489, "grad_norm": 4.8984599113464355, "learning_rate": 2.529679964927664e-06, "loss": 11.7998, "step": 86200 }, { "epoch": 37.483781581092096, "grad_norm": 11.828535079956055, "learning_rate": 2.52529592284086e-06, "loss": 11.7756, "step": 86250 }, { "epoch": 37.505514805759304, "grad_norm": 7.632526397705078, "learning_rate": 2.5209118807540556e-06, "loss": 11.7816, "step": 86300 }, { "epoch": 37.52724803042651, "grad_norm": 13.461671829223633, "learning_rate": 2.5165278386672514e-06, "loss": 11.7924, "step": 86350 }, { "epoch": 37.54898125509372, "grad_norm": 5.831872940063477, "learning_rate": 2.5121437965804472e-06, "loss": 11.767, "step": 86400 }, { "epoch": 37.57071447976094, "grad_norm": 15.29990005493164, "learning_rate": 2.5077597544936435e-06, "loss": 11.7853, "step": 86450 }, { "epoch": 37.592447704428146, "grad_norm": 18.313222885131836, "learning_rate": 2.5033757124068393e-06, "loss": 11.7759, "step": 86500 }, { "epoch": 37.614180929095355, "grad_norm": 7.100087642669678, "learning_rate": 2.4989916703200355e-06, "loss": 11.7861, "step": 86550 }, { "epoch": 37.63591415376256, "grad_norm": 10.970837593078613, "learning_rate": 2.4946076282332313e-06, "loss": 11.7859, "step": 86600 }, { "epoch": 37.65764737842977, "grad_norm": 17.918962478637695, "learning_rate": 2.490223586146427e-06, "loss": 11.781, "step": 86650 }, { "epoch": 37.67938060309699, "grad_norm": 23.407426834106445, "learning_rate": 2.485839544059623e-06, "loss": 11.7763, "step": 86700 }, { "epoch": 37.701113827764196, "grad_norm": 6.149535655975342, "learning_rate": 2.481455501972819e-06, "loss": 11.7819, "step": 86750 }, { "epoch": 37.722847052431405, "grad_norm": 5.372469425201416, "learning_rate": 2.477071459886015e-06, "loss": 11.7762, "step": 86800 }, { "epoch": 37.74458027709861, "grad_norm": 21.797292709350586, "learning_rate": 2.472687417799211e-06, "loss": 11.7822, "step": 86850 }, { "epoch": 37.76631350176582, "grad_norm": 17.68259048461914, "learning_rate": 2.468303375712407e-06, "loss": 11.7753, "step": 86900 }, { "epoch": 37.78804672643304, "grad_norm": 10.163092613220215, "learning_rate": 2.463919333625603e-06, "loss": 11.7662, "step": 86950 }, { "epoch": 37.809779951100246, "grad_norm": 6.926383972167969, "learning_rate": 2.459535291538799e-06, "loss": 11.7897, "step": 87000 }, { "epoch": 37.831513175767455, "grad_norm": 8.474647521972656, "learning_rate": 2.455151249451995e-06, "loss": 11.78, "step": 87050 }, { "epoch": 37.853246400434664, "grad_norm": 16.252666473388672, "learning_rate": 2.4507672073651907e-06, "loss": 11.7683, "step": 87100 }, { "epoch": 37.87497962510187, "grad_norm": 9.422881126403809, "learning_rate": 2.446383165278387e-06, "loss": 11.78, "step": 87150 }, { "epoch": 37.89671284976909, "grad_norm": 13.145493507385254, "learning_rate": 2.441999123191583e-06, "loss": 11.7806, "step": 87200 }, { "epoch": 37.9184460744363, "grad_norm": 9.204483985900879, "learning_rate": 2.437615081104779e-06, "loss": 11.7722, "step": 87250 }, { "epoch": 37.940179299103505, "grad_norm": 9.611700057983398, "learning_rate": 2.4332310390179748e-06, "loss": 11.7957, "step": 87300 }, { "epoch": 37.961912523770714, "grad_norm": 11.884017944335938, "learning_rate": 2.4288469969311706e-06, "loss": 11.7755, "step": 87350 }, { "epoch": 37.98364574843792, "grad_norm": 5.692808151245117, "learning_rate": 2.424462954844367e-06, "loss": 11.7766, "step": 87400 }, { "epoch": 38.00521597392013, "grad_norm": 66.74461364746094, "learning_rate": 2.4200789127575626e-06, "loss": 11.7051, "step": 87450 }, { "epoch": 38.02694919858734, "grad_norm": 5.268041610717773, "learning_rate": 2.4156948706707584e-06, "loss": 11.7609, "step": 87500 }, { "epoch": 38.04868242325455, "grad_norm": 8.159131050109863, "learning_rate": 2.4113108285839547e-06, "loss": 11.773, "step": 87550 }, { "epoch": 38.07041564792176, "grad_norm": 8.749338150024414, "learning_rate": 2.4069267864971505e-06, "loss": 11.7782, "step": 87600 }, { "epoch": 38.09214887258897, "grad_norm": 9.121374130249023, "learning_rate": 2.4025427444103467e-06, "loss": 11.7703, "step": 87650 }, { "epoch": 38.11388209725618, "grad_norm": 10.743656158447266, "learning_rate": 2.3981587023235425e-06, "loss": 11.7798, "step": 87700 }, { "epoch": 38.13561532192339, "grad_norm": 5.2683000564575195, "learning_rate": 2.3937746602367383e-06, "loss": 11.7791, "step": 87750 }, { "epoch": 38.1573485465906, "grad_norm": 9.600702285766602, "learning_rate": 2.389390618149934e-06, "loss": 11.7658, "step": 87800 }, { "epoch": 38.17908177125781, "grad_norm": 10.094902992248535, "learning_rate": 2.3850065760631304e-06, "loss": 11.7891, "step": 87850 }, { "epoch": 38.20081499592502, "grad_norm": 8.227887153625488, "learning_rate": 2.3806225339763266e-06, "loss": 11.7706, "step": 87900 }, { "epoch": 38.22254822059223, "grad_norm": 7.997677803039551, "learning_rate": 2.3762384918895224e-06, "loss": 11.772, "step": 87950 }, { "epoch": 38.24428144525944, "grad_norm": 6.51764440536499, "learning_rate": 2.3718544498027182e-06, "loss": 11.7739, "step": 88000 }, { "epoch": 38.26601466992665, "grad_norm": 4.949069499969482, "learning_rate": 2.367470407715914e-06, "loss": 11.778, "step": 88050 }, { "epoch": 38.28774789459386, "grad_norm": 4.438246250152588, "learning_rate": 2.3630863656291103e-06, "loss": 11.7742, "step": 88100 }, { "epoch": 38.30948111926107, "grad_norm": 11.066926956176758, "learning_rate": 2.358702323542306e-06, "loss": 11.7817, "step": 88150 }, { "epoch": 38.33121434392828, "grad_norm": 6.765926837921143, "learning_rate": 2.3543182814555023e-06, "loss": 11.7767, "step": 88200 }, { "epoch": 38.35294756859549, "grad_norm": 6.704973220825195, "learning_rate": 2.349934239368698e-06, "loss": 11.7671, "step": 88250 }, { "epoch": 38.3746807932627, "grad_norm": 10.575370788574219, "learning_rate": 2.3455501972818944e-06, "loss": 11.771, "step": 88300 }, { "epoch": 38.396414017929914, "grad_norm": 3.860527992248535, "learning_rate": 2.34116615519509e-06, "loss": 11.7685, "step": 88350 }, { "epoch": 38.41814724259712, "grad_norm": 10.35341739654541, "learning_rate": 2.336782113108286e-06, "loss": 11.7715, "step": 88400 }, { "epoch": 38.43988046726433, "grad_norm": 7.268162727355957, "learning_rate": 2.332398071021482e-06, "loss": 11.7878, "step": 88450 }, { "epoch": 38.46161369193154, "grad_norm": 6.3647871017456055, "learning_rate": 2.328014028934678e-06, "loss": 11.7675, "step": 88500 }, { "epoch": 38.48334691659875, "grad_norm": 27.453014373779297, "learning_rate": 2.323629986847874e-06, "loss": 11.7635, "step": 88550 }, { "epoch": 38.505080141265964, "grad_norm": 12.105439186096191, "learning_rate": 2.31924594476107e-06, "loss": 11.7825, "step": 88600 }, { "epoch": 38.52681336593317, "grad_norm": 12.992817878723145, "learning_rate": 2.314861902674266e-06, "loss": 11.76, "step": 88650 }, { "epoch": 38.54854659060038, "grad_norm": 8.603561401367188, "learning_rate": 2.3104778605874617e-06, "loss": 11.7655, "step": 88700 }, { "epoch": 38.57027981526759, "grad_norm": 4.036582946777344, "learning_rate": 2.306093818500658e-06, "loss": 11.7882, "step": 88750 }, { "epoch": 38.5920130399348, "grad_norm": 5.422863483428955, "learning_rate": 2.3017097764138538e-06, "loss": 11.7683, "step": 88800 }, { "epoch": 38.61374626460201, "grad_norm": 5.842752933502197, "learning_rate": 2.2973257343270496e-06, "loss": 11.7695, "step": 88850 }, { "epoch": 38.63547948926922, "grad_norm": 9.653190612792969, "learning_rate": 2.292941692240246e-06, "loss": 11.7873, "step": 88900 }, { "epoch": 38.65721271393643, "grad_norm": 5.730354309082031, "learning_rate": 2.2885576501534416e-06, "loss": 11.768, "step": 88950 }, { "epoch": 38.67894593860364, "grad_norm": 17.826345443725586, "learning_rate": 2.284173608066638e-06, "loss": 11.776, "step": 89000 }, { "epoch": 38.70067916327085, "grad_norm": 9.027566909790039, "learning_rate": 2.2797895659798336e-06, "loss": 11.755, "step": 89050 }, { "epoch": 38.72241238793806, "grad_norm": 4.53999662399292, "learning_rate": 2.2754055238930295e-06, "loss": 11.765, "step": 89100 }, { "epoch": 38.74414561260527, "grad_norm": 8.569631576538086, "learning_rate": 2.2710214818062253e-06, "loss": 11.7777, "step": 89150 }, { "epoch": 38.76587883727248, "grad_norm": 24.965681076049805, "learning_rate": 2.2666374397194215e-06, "loss": 11.7738, "step": 89200 }, { "epoch": 38.78761206193969, "grad_norm": 7.554117202758789, "learning_rate": 2.2622533976326173e-06, "loss": 11.7801, "step": 89250 }, { "epoch": 38.8093452866069, "grad_norm": 16.02465057373047, "learning_rate": 2.2578693555458135e-06, "loss": 11.785, "step": 89300 }, { "epoch": 38.83107851127411, "grad_norm": 9.892585754394531, "learning_rate": 2.2534853134590094e-06, "loss": 11.7812, "step": 89350 }, { "epoch": 38.85281173594132, "grad_norm": 20.471792221069336, "learning_rate": 2.2491012713722056e-06, "loss": 11.774, "step": 89400 }, { "epoch": 38.87454496060853, "grad_norm": 15.924908638000488, "learning_rate": 2.2447172292854014e-06, "loss": 11.7735, "step": 89450 }, { "epoch": 38.89627818527574, "grad_norm": 9.257697105407715, "learning_rate": 2.2403331871985972e-06, "loss": 11.784, "step": 89500 }, { "epoch": 38.91801140994295, "grad_norm": 8.59609317779541, "learning_rate": 2.235949145111793e-06, "loss": 11.7924, "step": 89550 }, { "epoch": 38.93974463461016, "grad_norm": 5.643759727478027, "learning_rate": 2.2315651030249893e-06, "loss": 11.7727, "step": 89600 }, { "epoch": 38.96147785927737, "grad_norm": 3.887133836746216, "learning_rate": 2.227181060938185e-06, "loss": 11.7728, "step": 89650 }, { "epoch": 38.98321108394458, "grad_norm": 15.085949897766113, "learning_rate": 2.2227970188513813e-06, "loss": 11.7837, "step": 89700 }, { "epoch": 39.00478130942679, "grad_norm": 4.734740257263184, "learning_rate": 2.218412976764577e-06, "loss": 11.6785, "step": 89750 }, { "epoch": 39.026514534094, "grad_norm": 7.277717590332031, "learning_rate": 2.214028934677773e-06, "loss": 11.7636, "step": 89800 }, { "epoch": 39.04824775876121, "grad_norm": 12.622576713562012, "learning_rate": 2.209644892590969e-06, "loss": 11.7632, "step": 89850 }, { "epoch": 39.069980983428415, "grad_norm": 8.72470760345459, "learning_rate": 2.205260850504165e-06, "loss": 11.7806, "step": 89900 }, { "epoch": 39.091714208095624, "grad_norm": 8.548195838928223, "learning_rate": 2.2008768084173608e-06, "loss": 11.7642, "step": 89950 }, { "epoch": 39.11344743276284, "grad_norm": 5.267911911010742, "learning_rate": 2.196492766330557e-06, "loss": 11.7606, "step": 90000 }, { "epoch": 39.11344743276284, "eval_cer": 0.07434609260242184, "eval_loss": 2.4127438068389893, "eval_runtime": 397.8168, "eval_samples_per_second": 13.589, "eval_steps_per_second": 3.399, "eval_wer": 0.22556131260794474, "step": 90000 }, { "epoch": 39.13518065743005, "grad_norm": 13.38624095916748, "learning_rate": 2.1921087242437532e-06, "loss": 11.7632, "step": 90050 }, { "epoch": 39.15691388209726, "grad_norm": 13.824868202209473, "learning_rate": 2.187724682156949e-06, "loss": 11.7722, "step": 90100 }, { "epoch": 39.178647106764465, "grad_norm": 10.600975036621094, "learning_rate": 2.183340640070145e-06, "loss": 11.7692, "step": 90150 }, { "epoch": 39.200380331431674, "grad_norm": 10.806214332580566, "learning_rate": 2.1789565979833407e-06, "loss": 11.753, "step": 90200 }, { "epoch": 39.22211355609888, "grad_norm": 7.800635814666748, "learning_rate": 2.1745725558965365e-06, "loss": 11.7616, "step": 90250 }, { "epoch": 39.2438467807661, "grad_norm": 7.492881774902344, "learning_rate": 2.1701885138097327e-06, "loss": 11.7439, "step": 90300 }, { "epoch": 39.26558000543331, "grad_norm": 12.061040878295898, "learning_rate": 2.1658044717229285e-06, "loss": 11.7567, "step": 90350 }, { "epoch": 39.287313230100516, "grad_norm": 6.541141510009766, "learning_rate": 2.1614204296361248e-06, "loss": 11.775, "step": 90400 }, { "epoch": 39.309046454767724, "grad_norm": 5.959283828735352, "learning_rate": 2.1570363875493206e-06, "loss": 11.7633, "step": 90450 }, { "epoch": 39.33077967943493, "grad_norm": 9.226263046264648, "learning_rate": 2.152652345462517e-06, "loss": 11.7746, "step": 90500 }, { "epoch": 39.35251290410215, "grad_norm": 15.486861228942871, "learning_rate": 2.1482683033757126e-06, "loss": 11.775, "step": 90550 }, { "epoch": 39.37424612876936, "grad_norm": 6.420067310333252, "learning_rate": 2.1438842612889084e-06, "loss": 11.7812, "step": 90600 }, { "epoch": 39.395979353436566, "grad_norm": 7.908544540405273, "learning_rate": 2.1395002192021042e-06, "loss": 11.7573, "step": 90650 }, { "epoch": 39.417712578103774, "grad_norm": 7.737216472625732, "learning_rate": 2.1351161771153005e-06, "loss": 11.7621, "step": 90700 }, { "epoch": 39.43944580277098, "grad_norm": 5.630201816558838, "learning_rate": 2.1307321350284967e-06, "loss": 11.7629, "step": 90750 }, { "epoch": 39.4611790274382, "grad_norm": 5.563817024230957, "learning_rate": 2.1263480929416925e-06, "loss": 11.7663, "step": 90800 }, { "epoch": 39.48291225210541, "grad_norm": 6.785152912139893, "learning_rate": 2.1219640508548883e-06, "loss": 11.7575, "step": 90850 }, { "epoch": 39.504645476772616, "grad_norm": 5.261542797088623, "learning_rate": 2.117580008768084e-06, "loss": 11.7676, "step": 90900 }, { "epoch": 39.526378701439825, "grad_norm": 8.951974868774414, "learning_rate": 2.1131959666812804e-06, "loss": 11.7626, "step": 90950 }, { "epoch": 39.54811192610703, "grad_norm": 5.925467491149902, "learning_rate": 2.108811924594476e-06, "loss": 11.7652, "step": 91000 }, { "epoch": 39.56984515077425, "grad_norm": 6.1618194580078125, "learning_rate": 2.1044278825076724e-06, "loss": 11.7526, "step": 91050 }, { "epoch": 39.59157837544146, "grad_norm": 5.625064373016357, "learning_rate": 2.1000438404208682e-06, "loss": 11.7605, "step": 91100 }, { "epoch": 39.613311600108666, "grad_norm": 12.690841674804688, "learning_rate": 2.0956597983340645e-06, "loss": 11.7649, "step": 91150 }, { "epoch": 39.635044824775875, "grad_norm": 12.550384521484375, "learning_rate": 2.0912757562472603e-06, "loss": 11.792, "step": 91200 }, { "epoch": 39.65677804944308, "grad_norm": 9.650748252868652, "learning_rate": 2.086891714160456e-06, "loss": 11.7733, "step": 91250 }, { "epoch": 39.6785112741103, "grad_norm": 8.173276901245117, "learning_rate": 2.082507672073652e-06, "loss": 11.7835, "step": 91300 }, { "epoch": 39.70024449877751, "grad_norm": 8.434061050415039, "learning_rate": 2.0781236299868477e-06, "loss": 11.7474, "step": 91350 }, { "epoch": 39.721977723444716, "grad_norm": 9.518670082092285, "learning_rate": 2.073739587900044e-06, "loss": 11.7687, "step": 91400 }, { "epoch": 39.743710948111925, "grad_norm": 6.331693649291992, "learning_rate": 2.06935554581324e-06, "loss": 11.7578, "step": 91450 }, { "epoch": 39.765444172779134, "grad_norm": 9.190653800964355, "learning_rate": 2.064971503726436e-06, "loss": 11.7756, "step": 91500 }, { "epoch": 39.78717739744635, "grad_norm": 13.398709297180176, "learning_rate": 2.060587461639632e-06, "loss": 11.7704, "step": 91550 }, { "epoch": 39.80891062211356, "grad_norm": 11.104494094848633, "learning_rate": 2.056203419552828e-06, "loss": 11.7623, "step": 91600 }, { "epoch": 39.83064384678077, "grad_norm": 5.477586269378662, "learning_rate": 2.051819377466024e-06, "loss": 11.7647, "step": 91650 }, { "epoch": 39.852377071447975, "grad_norm": 3.5749564170837402, "learning_rate": 2.0474353353792197e-06, "loss": 11.7695, "step": 91700 }, { "epoch": 39.874110296115184, "grad_norm": 5.978471755981445, "learning_rate": 2.043051293292416e-06, "loss": 11.7648, "step": 91750 }, { "epoch": 39.89584352078239, "grad_norm": 25.19099235534668, "learning_rate": 2.0386672512056117e-06, "loss": 11.7612, "step": 91800 }, { "epoch": 39.91757674544961, "grad_norm": 5.882903099060059, "learning_rate": 2.034283209118808e-06, "loss": 11.7799, "step": 91850 }, { "epoch": 39.93930997011682, "grad_norm": 27.048709869384766, "learning_rate": 2.0298991670320037e-06, "loss": 11.7737, "step": 91900 }, { "epoch": 39.961043194784025, "grad_norm": 9.073412895202637, "learning_rate": 2.0255151249451996e-06, "loss": 11.7612, "step": 91950 }, { "epoch": 39.982776419451234, "grad_norm": 13.002010345458984, "learning_rate": 2.0211310828583954e-06, "loss": 11.7782, "step": 92000 }, { "epoch": 40.00434664493344, "grad_norm": 9.383039474487305, "learning_rate": 2.0167470407715916e-06, "loss": 11.6788, "step": 92050 }, { "epoch": 40.02607986960065, "grad_norm": 9.476020812988281, "learning_rate": 2.0123629986847874e-06, "loss": 11.7597, "step": 92100 }, { "epoch": 40.04781309426786, "grad_norm": 8.409124374389648, "learning_rate": 2.0079789565979836e-06, "loss": 11.7492, "step": 92150 }, { "epoch": 40.069546318935075, "grad_norm": 6.030084609985352, "learning_rate": 2.0035949145111795e-06, "loss": 11.7751, "step": 92200 }, { "epoch": 40.09127954360228, "grad_norm": 4.96845006942749, "learning_rate": 1.9992108724243757e-06, "loss": 11.7443, "step": 92250 }, { "epoch": 40.11301276826949, "grad_norm": 11.096965789794922, "learning_rate": 1.9948268303375715e-06, "loss": 11.7718, "step": 92300 }, { "epoch": 40.1347459929367, "grad_norm": 6.45682430267334, "learning_rate": 1.9904427882507673e-06, "loss": 11.7592, "step": 92350 }, { "epoch": 40.15647921760391, "grad_norm": 7.665036678314209, "learning_rate": 1.986058746163963e-06, "loss": 11.7715, "step": 92400 }, { "epoch": 40.178212442271125, "grad_norm": 4.477396011352539, "learning_rate": 1.9816747040771594e-06, "loss": 11.7572, "step": 92450 }, { "epoch": 40.19994566693833, "grad_norm": 13.868246078491211, "learning_rate": 1.977290661990355e-06, "loss": 11.7656, "step": 92500 }, { "epoch": 40.22167889160554, "grad_norm": 7.29514217376709, "learning_rate": 1.9729066199035514e-06, "loss": 11.7525, "step": 92550 }, { "epoch": 40.24341211627275, "grad_norm": 5.126781463623047, "learning_rate": 1.968522577816747e-06, "loss": 11.7623, "step": 92600 }, { "epoch": 40.26514534093996, "grad_norm": 13.333416938781738, "learning_rate": 1.964138535729943e-06, "loss": 11.7645, "step": 92650 }, { "epoch": 40.286878565607175, "grad_norm": 4.38609504699707, "learning_rate": 1.9597544936431393e-06, "loss": 11.7471, "step": 92700 }, { "epoch": 40.308611790274384, "grad_norm": 18.64853858947754, "learning_rate": 1.955370451556335e-06, "loss": 11.7614, "step": 92750 }, { "epoch": 40.33034501494159, "grad_norm": 10.51586627960205, "learning_rate": 1.950986409469531e-06, "loss": 11.7572, "step": 92800 }, { "epoch": 40.3520782396088, "grad_norm": 10.547462463378906, "learning_rate": 1.946602367382727e-06, "loss": 11.7609, "step": 92850 }, { "epoch": 40.37381146427601, "grad_norm": 5.690642356872559, "learning_rate": 1.9422183252959233e-06, "loss": 11.7481, "step": 92900 }, { "epoch": 40.395544688943225, "grad_norm": 3.9211175441741943, "learning_rate": 1.937834283209119e-06, "loss": 11.7461, "step": 92950 }, { "epoch": 40.417277913610434, "grad_norm": 4.40631103515625, "learning_rate": 1.933450241122315e-06, "loss": 11.7587, "step": 93000 }, { "epoch": 40.43901113827764, "grad_norm": 4.007054328918457, "learning_rate": 1.9290661990355108e-06, "loss": 11.7649, "step": 93050 }, { "epoch": 40.46074436294485, "grad_norm": 9.994613647460938, "learning_rate": 1.9246821569487066e-06, "loss": 11.7632, "step": 93100 }, { "epoch": 40.48247758761206, "grad_norm": 7.985722541809082, "learning_rate": 1.920298114861903e-06, "loss": 11.7661, "step": 93150 }, { "epoch": 40.50421081227927, "grad_norm": 17.07448387145996, "learning_rate": 1.9159140727750986e-06, "loss": 11.7407, "step": 93200 }, { "epoch": 40.525944036946484, "grad_norm": 5.9471211433410645, "learning_rate": 1.911530030688295e-06, "loss": 11.7688, "step": 93250 }, { "epoch": 40.54767726161369, "grad_norm": 5.977828502655029, "learning_rate": 1.9071459886014907e-06, "loss": 11.7515, "step": 93300 }, { "epoch": 40.5694104862809, "grad_norm": 37.1627082824707, "learning_rate": 1.902761946514687e-06, "loss": 11.7671, "step": 93350 }, { "epoch": 40.59114371094811, "grad_norm": 6.538187503814697, "learning_rate": 1.8983779044278827e-06, "loss": 11.7546, "step": 93400 }, { "epoch": 40.61287693561532, "grad_norm": 7.786463260650635, "learning_rate": 1.8939938623410785e-06, "loss": 11.7539, "step": 93450 }, { "epoch": 40.634610160282534, "grad_norm": 9.64076042175293, "learning_rate": 1.8896098202542746e-06, "loss": 11.7615, "step": 93500 }, { "epoch": 40.65634338494974, "grad_norm": 6.382667064666748, "learning_rate": 1.8852257781674704e-06, "loss": 11.749, "step": 93550 }, { "epoch": 40.67807660961695, "grad_norm": 9.91306209564209, "learning_rate": 1.8808417360806666e-06, "loss": 11.7645, "step": 93600 }, { "epoch": 40.69980983428416, "grad_norm": 5.685323715209961, "learning_rate": 1.8764576939938626e-06, "loss": 11.7511, "step": 93650 }, { "epoch": 40.72154305895137, "grad_norm": 6.233023166656494, "learning_rate": 1.8720736519070584e-06, "loss": 11.7425, "step": 93700 }, { "epoch": 40.743276283618584, "grad_norm": 7.542703628540039, "learning_rate": 1.8676896098202542e-06, "loss": 11.7679, "step": 93750 }, { "epoch": 40.76500950828579, "grad_norm": 20.795351028442383, "learning_rate": 1.8633055677334505e-06, "loss": 11.7475, "step": 93800 }, { "epoch": 40.786742732953, "grad_norm": 8.967228889465332, "learning_rate": 1.8589215256466465e-06, "loss": 11.7556, "step": 93850 }, { "epoch": 40.80847595762021, "grad_norm": 9.180179595947266, "learning_rate": 1.8545374835598423e-06, "loss": 11.7549, "step": 93900 }, { "epoch": 40.83020918228742, "grad_norm": 4.092516899108887, "learning_rate": 1.8501534414730381e-06, "loss": 11.7597, "step": 93950 }, { "epoch": 40.851942406954635, "grad_norm": 4.368731498718262, "learning_rate": 1.8457693993862344e-06, "loss": 11.7655, "step": 94000 }, { "epoch": 40.87367563162184, "grad_norm": 3.8829784393310547, "learning_rate": 1.8413853572994304e-06, "loss": 11.7653, "step": 94050 }, { "epoch": 40.89540885628905, "grad_norm": 30.0755615234375, "learning_rate": 1.8370013152126262e-06, "loss": 11.7594, "step": 94100 }, { "epoch": 40.91714208095626, "grad_norm": 5.881009578704834, "learning_rate": 1.8326172731258222e-06, "loss": 11.7536, "step": 94150 }, { "epoch": 40.93887530562347, "grad_norm": 11.916665077209473, "learning_rate": 1.828233231039018e-06, "loss": 11.746, "step": 94200 }, { "epoch": 40.960608530290685, "grad_norm": 4.125995635986328, "learning_rate": 1.8238491889522143e-06, "loss": 11.7549, "step": 94250 }, { "epoch": 40.98234175495789, "grad_norm": 9.298914909362793, "learning_rate": 1.81946514686541e-06, "loss": 11.746, "step": 94300 }, { "epoch": 41.0039119804401, "grad_norm": 6.543211460113525, "learning_rate": 1.815081104778606e-06, "loss": 11.6774, "step": 94350 }, { "epoch": 41.02564520510731, "grad_norm": 10.013541221618652, "learning_rate": 1.810697062691802e-06, "loss": 11.747, "step": 94400 }, { "epoch": 41.04737842977452, "grad_norm": 26.30128288269043, "learning_rate": 1.8063130206049981e-06, "loss": 11.7447, "step": 94450 }, { "epoch": 41.06911165444173, "grad_norm": 6.381859302520752, "learning_rate": 1.801928978518194e-06, "loss": 11.7397, "step": 94500 }, { "epoch": 41.090844879108936, "grad_norm": 6.652487754821777, "learning_rate": 1.79754493643139e-06, "loss": 11.7431, "step": 94550 }, { "epoch": 41.11257810377615, "grad_norm": 5.449718952178955, "learning_rate": 1.7931608943445858e-06, "loss": 11.7549, "step": 94600 }, { "epoch": 41.13431132844336, "grad_norm": 3.2079617977142334, "learning_rate": 1.7887768522577818e-06, "loss": 11.7596, "step": 94650 }, { "epoch": 41.15604455311057, "grad_norm": 9.676004409790039, "learning_rate": 1.7843928101709778e-06, "loss": 11.7529, "step": 94700 }, { "epoch": 41.17777777777778, "grad_norm": 9.563153266906738, "learning_rate": 1.7800087680841738e-06, "loss": 11.7469, "step": 94750 }, { "epoch": 41.199511002444986, "grad_norm": 8.786322593688965, "learning_rate": 1.7756247259973697e-06, "loss": 11.7364, "step": 94800 }, { "epoch": 41.221244227112194, "grad_norm": 8.319864273071289, "learning_rate": 1.7712406839105657e-06, "loss": 11.7545, "step": 94850 }, { "epoch": 41.24297745177941, "grad_norm": 11.104187965393066, "learning_rate": 1.7668566418237617e-06, "loss": 11.7425, "step": 94900 }, { "epoch": 41.26471067644662, "grad_norm": 5.2200026512146, "learning_rate": 1.7624725997369577e-06, "loss": 11.7511, "step": 94950 }, { "epoch": 41.28644390111383, "grad_norm": 5.0673933029174805, "learning_rate": 1.7580885576501535e-06, "loss": 11.7597, "step": 95000 }, { "epoch": 41.308177125781036, "grad_norm": 6.790633678436279, "learning_rate": 1.7537045155633495e-06, "loss": 11.7566, "step": 95050 }, { "epoch": 41.329910350448245, "grad_norm": 5.625386714935303, "learning_rate": 1.7493204734765456e-06, "loss": 11.7483, "step": 95100 }, { "epoch": 41.35164357511546, "grad_norm": 5.591269493103027, "learning_rate": 1.7449364313897416e-06, "loss": 11.7497, "step": 95150 }, { "epoch": 41.37337679978267, "grad_norm": 5.52367639541626, "learning_rate": 1.7405523893029374e-06, "loss": 11.7637, "step": 95200 }, { "epoch": 41.39511002444988, "grad_norm": 3.352064371109009, "learning_rate": 1.7361683472161334e-06, "loss": 11.7539, "step": 95250 }, { "epoch": 41.416843249117086, "grad_norm": 10.966846466064453, "learning_rate": 1.7317843051293292e-06, "loss": 11.7587, "step": 95300 }, { "epoch": 41.438576473784295, "grad_norm": 4.151219844818115, "learning_rate": 1.7274002630425255e-06, "loss": 11.7389, "step": 95350 }, { "epoch": 41.46030969845151, "grad_norm": 7.952151775360107, "learning_rate": 1.7230162209557213e-06, "loss": 11.7477, "step": 95400 }, { "epoch": 41.48204292311872, "grad_norm": 7.96391487121582, "learning_rate": 1.7186321788689173e-06, "loss": 11.7551, "step": 95450 }, { "epoch": 41.50377614778593, "grad_norm": 5.120693206787109, "learning_rate": 1.7142481367821131e-06, "loss": 11.7426, "step": 95500 }, { "epoch": 41.525509372453136, "grad_norm": 4.3286027908325195, "learning_rate": 1.7098640946953093e-06, "loss": 11.7461, "step": 95550 }, { "epoch": 41.547242597120345, "grad_norm": 7.304409027099609, "learning_rate": 1.7054800526085052e-06, "loss": 11.7565, "step": 95600 }, { "epoch": 41.56897582178756, "grad_norm": 5.609922885894775, "learning_rate": 1.7010960105217012e-06, "loss": 11.7654, "step": 95650 }, { "epoch": 41.59070904645477, "grad_norm": 7.889837265014648, "learning_rate": 1.696711968434897e-06, "loss": 11.7447, "step": 95700 }, { "epoch": 41.61244227112198, "grad_norm": 6.620103359222412, "learning_rate": 1.692327926348093e-06, "loss": 11.7406, "step": 95750 }, { "epoch": 41.63417549578919, "grad_norm": 5.3006463050842285, "learning_rate": 1.687943884261289e-06, "loss": 11.7421, "step": 95800 }, { "epoch": 41.655908720456395, "grad_norm": 6.590972423553467, "learning_rate": 1.683559842174485e-06, "loss": 11.7446, "step": 95850 }, { "epoch": 41.67764194512361, "grad_norm": 9.739943504333496, "learning_rate": 1.6791758000876809e-06, "loss": 11.7506, "step": 95900 }, { "epoch": 41.69937516979082, "grad_norm": 5.318305969238281, "learning_rate": 1.6747917580008769e-06, "loss": 11.7503, "step": 95950 }, { "epoch": 41.72110839445803, "grad_norm": 4.048628807067871, "learning_rate": 1.6704077159140731e-06, "loss": 11.7579, "step": 96000 }, { "epoch": 41.74284161912524, "grad_norm": 6.817996025085449, "learning_rate": 1.666023673827269e-06, "loss": 11.7427, "step": 96050 }, { "epoch": 41.764574843792445, "grad_norm": 10.243191719055176, "learning_rate": 1.6616396317404647e-06, "loss": 11.7662, "step": 96100 }, { "epoch": 41.78630806845966, "grad_norm": 17.077388763427734, "learning_rate": 1.6572555896536608e-06, "loss": 11.7471, "step": 96150 }, { "epoch": 41.80804129312687, "grad_norm": 10.154135704040527, "learning_rate": 1.652871547566857e-06, "loss": 11.7526, "step": 96200 }, { "epoch": 41.82977451779408, "grad_norm": 15.618302345275879, "learning_rate": 1.6484875054800528e-06, "loss": 11.7451, "step": 96250 }, { "epoch": 41.85150774246129, "grad_norm": 5.547327041625977, "learning_rate": 1.6441034633932486e-06, "loss": 11.759, "step": 96300 }, { "epoch": 41.873240967128496, "grad_norm": 4.5382304191589355, "learning_rate": 1.6397194213064446e-06, "loss": 11.7377, "step": 96350 }, { "epoch": 41.89497419179571, "grad_norm": 4.469516277313232, "learning_rate": 1.6353353792196405e-06, "loss": 11.7555, "step": 96400 }, { "epoch": 41.91670741646292, "grad_norm": 4.71604061126709, "learning_rate": 1.6309513371328367e-06, "loss": 11.745, "step": 96450 }, { "epoch": 41.93844064113013, "grad_norm": 4.897347450256348, "learning_rate": 1.6265672950460327e-06, "loss": 11.7572, "step": 96500 }, { "epoch": 41.96017386579734, "grad_norm": 19.85100746154785, "learning_rate": 1.6221832529592285e-06, "loss": 11.7499, "step": 96550 }, { "epoch": 41.981907090464546, "grad_norm": 5.2028117179870605, "learning_rate": 1.6177992108724243e-06, "loss": 11.7622, "step": 96600 }, { "epoch": 42.00347731594675, "grad_norm": 12.004639625549316, "learning_rate": 1.6134151687856206e-06, "loss": 11.6536, "step": 96650 }, { "epoch": 42.02521054061396, "grad_norm": 5.1376848220825195, "learning_rate": 1.6090311266988166e-06, "loss": 11.7351, "step": 96700 }, { "epoch": 42.04694376528117, "grad_norm": 5.914182662963867, "learning_rate": 1.6046470846120124e-06, "loss": 11.7339, "step": 96750 }, { "epoch": 42.068676989948386, "grad_norm": 7.787753105163574, "learning_rate": 1.6002630425252082e-06, "loss": 11.7402, "step": 96800 }, { "epoch": 42.090410214615595, "grad_norm": 6.375885009765625, "learning_rate": 1.5958790004384042e-06, "loss": 11.7447, "step": 96850 }, { "epoch": 42.112143439282804, "grad_norm": 24.318653106689453, "learning_rate": 1.5914949583516005e-06, "loss": 11.7561, "step": 96900 }, { "epoch": 42.13387666395001, "grad_norm": 3.9973437786102295, "learning_rate": 1.5871109162647963e-06, "loss": 11.7402, "step": 96950 }, { "epoch": 42.15560988861722, "grad_norm": 7.005822658538818, "learning_rate": 1.5827268741779923e-06, "loss": 11.739, "step": 97000 }, { "epoch": 42.17734311328444, "grad_norm": 7.374983787536621, "learning_rate": 1.5783428320911881e-06, "loss": 11.7416, "step": 97050 }, { "epoch": 42.199076337951645, "grad_norm": 7.227538108825684, "learning_rate": 1.5739587900043843e-06, "loss": 11.7344, "step": 97100 }, { "epoch": 42.220809562618854, "grad_norm": 16.485198974609375, "learning_rate": 1.5695747479175802e-06, "loss": 11.7412, "step": 97150 }, { "epoch": 42.24254278728606, "grad_norm": 8.366280555725098, "learning_rate": 1.5651907058307762e-06, "loss": 11.7441, "step": 97200 }, { "epoch": 42.26427601195327, "grad_norm": 4.64595365524292, "learning_rate": 1.560806663743972e-06, "loss": 11.7412, "step": 97250 }, { "epoch": 42.28600923662049, "grad_norm": 8.422093391418457, "learning_rate": 1.5564226216571678e-06, "loss": 11.7501, "step": 97300 }, { "epoch": 42.307742461287695, "grad_norm": 7.655717849731445, "learning_rate": 1.552038579570364e-06, "loss": 11.7379, "step": 97350 }, { "epoch": 42.329475685954904, "grad_norm": 5.5334272384643555, "learning_rate": 1.54765453748356e-06, "loss": 11.7498, "step": 97400 }, { "epoch": 42.35120891062211, "grad_norm": 23.848054885864258, "learning_rate": 1.5432704953967559e-06, "loss": 11.7518, "step": 97450 }, { "epoch": 42.37294213528932, "grad_norm": 12.059696197509766, "learning_rate": 1.5388864533099519e-06, "loss": 11.7456, "step": 97500 }, { "epoch": 42.39467535995654, "grad_norm": 5.756223678588867, "learning_rate": 1.534502411223148e-06, "loss": 11.7399, "step": 97550 }, { "epoch": 42.416408584623746, "grad_norm": 3.3854639530181885, "learning_rate": 1.530118369136344e-06, "loss": 11.7399, "step": 97600 }, { "epoch": 42.438141809290954, "grad_norm": 33.99950408935547, "learning_rate": 1.5257343270495397e-06, "loss": 11.7448, "step": 97650 }, { "epoch": 42.45987503395816, "grad_norm": 12.245558738708496, "learning_rate": 1.5213502849627358e-06, "loss": 11.7508, "step": 97700 }, { "epoch": 42.48160825862537, "grad_norm": 13.875435829162598, "learning_rate": 1.5169662428759318e-06, "loss": 11.7354, "step": 97750 }, { "epoch": 42.50334148329259, "grad_norm": 4.234302997589111, "learning_rate": 1.5125822007891278e-06, "loss": 11.7334, "step": 97800 }, { "epoch": 42.525074707959796, "grad_norm": 6.025676727294922, "learning_rate": 1.5081981587023236e-06, "loss": 11.7598, "step": 97850 }, { "epoch": 42.546807932627004, "grad_norm": 6.693691253662109, "learning_rate": 1.5038141166155196e-06, "loss": 11.7353, "step": 97900 }, { "epoch": 42.56854115729421, "grad_norm": 15.162924766540527, "learning_rate": 1.4994300745287155e-06, "loss": 11.7358, "step": 97950 }, { "epoch": 42.59027438196142, "grad_norm": 14.031683921813965, "learning_rate": 1.4950460324419117e-06, "loss": 11.7534, "step": 98000 }, { "epoch": 42.61200760662863, "grad_norm": 5.764448642730713, "learning_rate": 1.4906619903551075e-06, "loss": 11.7335, "step": 98050 }, { "epoch": 42.633740831295846, "grad_norm": 4.945387840270996, "learning_rate": 1.4862779482683035e-06, "loss": 11.7459, "step": 98100 }, { "epoch": 42.655474055963055, "grad_norm": 5.07288932800293, "learning_rate": 1.4818939061814993e-06, "loss": 11.7434, "step": 98150 }, { "epoch": 42.67720728063026, "grad_norm": 5.957432270050049, "learning_rate": 1.4775098640946956e-06, "loss": 11.7353, "step": 98200 }, { "epoch": 42.69894050529747, "grad_norm": 11.405596733093262, "learning_rate": 1.4731258220078914e-06, "loss": 11.7419, "step": 98250 }, { "epoch": 42.72067372996468, "grad_norm": 5.61083984375, "learning_rate": 1.4687417799210874e-06, "loss": 11.7435, "step": 98300 }, { "epoch": 42.742406954631896, "grad_norm": 5.795932292938232, "learning_rate": 1.4643577378342832e-06, "loss": 11.7536, "step": 98350 }, { "epoch": 42.764140179299105, "grad_norm": 14.968274116516113, "learning_rate": 1.4599736957474792e-06, "loss": 11.7532, "step": 98400 }, { "epoch": 42.78587340396631, "grad_norm": 4.567286491394043, "learning_rate": 1.4555896536606753e-06, "loss": 11.7412, "step": 98450 }, { "epoch": 42.80760662863352, "grad_norm": 3.640240430831909, "learning_rate": 1.4512056115738713e-06, "loss": 11.7526, "step": 98500 }, { "epoch": 42.82933985330073, "grad_norm": 7.847878456115723, "learning_rate": 1.446821569487067e-06, "loss": 11.7427, "step": 98550 }, { "epoch": 42.851073077967946, "grad_norm": 9.818808555603027, "learning_rate": 1.4424375274002631e-06, "loss": 11.7491, "step": 98600 }, { "epoch": 42.872806302635155, "grad_norm": 6.466832637786865, "learning_rate": 1.4380534853134591e-06, "loss": 11.7487, "step": 98650 }, { "epoch": 42.89453952730236, "grad_norm": 12.53541374206543, "learning_rate": 1.4336694432266552e-06, "loss": 11.7451, "step": 98700 }, { "epoch": 42.91627275196957, "grad_norm": 9.651721000671387, "learning_rate": 1.429285401139851e-06, "loss": 11.7455, "step": 98750 }, { "epoch": 42.93800597663678, "grad_norm": 7.363193511962891, "learning_rate": 1.424901359053047e-06, "loss": 11.7442, "step": 98800 }, { "epoch": 42.959739201304, "grad_norm": 6.456315040588379, "learning_rate": 1.4205173169662432e-06, "loss": 11.7378, "step": 98850 }, { "epoch": 42.981472425971205, "grad_norm": 4.924785137176514, "learning_rate": 1.416133274879439e-06, "loss": 11.7338, "step": 98900 }, { "epoch": 43.00304265145341, "grad_norm": 12.623988151550293, "learning_rate": 1.4117492327926348e-06, "loss": 11.6662, "step": 98950 }, { "epoch": 43.02477587612062, "grad_norm": 9.521653175354004, "learning_rate": 1.4073651907058309e-06, "loss": 11.7329, "step": 99000 }, { "epoch": 43.04650910078783, "grad_norm": 4.056196212768555, "learning_rate": 1.4029811486190267e-06, "loss": 11.7467, "step": 99050 }, { "epoch": 43.06824232545504, "grad_norm": 3.722097396850586, "learning_rate": 1.398597106532223e-06, "loss": 11.735, "step": 99100 }, { "epoch": 43.08997555012225, "grad_norm": 3.6029582023620605, "learning_rate": 1.3942130644454187e-06, "loss": 11.7395, "step": 99150 }, { "epoch": 43.111708774789456, "grad_norm": 4.464113712310791, "learning_rate": 1.3898290223586147e-06, "loss": 11.7417, "step": 99200 }, { "epoch": 43.13344199945667, "grad_norm": 7.3181986808776855, "learning_rate": 1.3854449802718106e-06, "loss": 11.7556, "step": 99250 }, { "epoch": 43.15517522412388, "grad_norm": 12.537137031555176, "learning_rate": 1.3810609381850068e-06, "loss": 11.7485, "step": 99300 }, { "epoch": 43.17690844879109, "grad_norm": 14.284486770629883, "learning_rate": 1.3766768960982028e-06, "loss": 11.7374, "step": 99350 }, { "epoch": 43.1986416734583, "grad_norm": 3.9522204399108887, "learning_rate": 1.3722928540113986e-06, "loss": 11.7394, "step": 99400 }, { "epoch": 43.220374898125506, "grad_norm": 3.287987232208252, "learning_rate": 1.3679088119245944e-06, "loss": 11.7358, "step": 99450 }, { "epoch": 43.24210812279272, "grad_norm": 5.351846694946289, "learning_rate": 1.3635247698377905e-06, "loss": 11.7402, "step": 99500 }, { "epoch": 43.26384134745993, "grad_norm": 13.18363094329834, "learning_rate": 1.3591407277509867e-06, "loss": 11.7343, "step": 99550 }, { "epoch": 43.28557457212714, "grad_norm": 2.8352560997009277, "learning_rate": 1.3547566856641825e-06, "loss": 11.7408, "step": 99600 }, { "epoch": 43.30730779679435, "grad_norm": 4.404722690582275, "learning_rate": 1.3503726435773783e-06, "loss": 11.7488, "step": 99650 }, { "epoch": 43.329041021461556, "grad_norm": 5.89795446395874, "learning_rate": 1.3459886014905743e-06, "loss": 11.7417, "step": 99700 }, { "epoch": 43.35077424612877, "grad_norm": 3.792579412460327, "learning_rate": 1.3416045594037706e-06, "loss": 11.742, "step": 99750 }, { "epoch": 43.37250747079598, "grad_norm": 6.295860290527344, "learning_rate": 1.3372205173169664e-06, "loss": 11.7419, "step": 99800 }, { "epoch": 43.39424069546319, "grad_norm": 4.617618560791016, "learning_rate": 1.3328364752301624e-06, "loss": 11.7388, "step": 99850 }, { "epoch": 43.4159739201304, "grad_norm": 3.46701717376709, "learning_rate": 1.3284524331433582e-06, "loss": 11.739, "step": 99900 }, { "epoch": 43.437707144797606, "grad_norm": 5.266394138336182, "learning_rate": 1.3240683910565544e-06, "loss": 11.7336, "step": 99950 }, { "epoch": 43.45944036946482, "grad_norm": 4.314075946807861, "learning_rate": 1.3196843489697503e-06, "loss": 11.7505, "step": 100000 }, { "epoch": 43.45944036946482, "eval_cer": 0.07396960981570859, "eval_loss": 2.4215219020843506, "eval_runtime": 398.4535, "eval_samples_per_second": 13.567, "eval_steps_per_second": 3.393, "eval_wer": 0.22507457999685979, "step": 100000 }, { "epoch": 43.48117359413203, "grad_norm": 4.137760639190674, "learning_rate": 1.3153003068829463e-06, "loss": 11.7369, "step": 100050 }, { "epoch": 43.50290681879924, "grad_norm": 16.71613311767578, "learning_rate": 1.310916264796142e-06, "loss": 11.7308, "step": 100100 }, { "epoch": 43.52464004346645, "grad_norm": 3.2521612644195557, "learning_rate": 1.306532222709338e-06, "loss": 11.7266, "step": 100150 }, { "epoch": 43.54637326813366, "grad_norm": 5.732780456542969, "learning_rate": 1.3021481806225341e-06, "loss": 11.7389, "step": 100200 }, { "epoch": 43.56810649280087, "grad_norm": 6.189255714416504, "learning_rate": 1.2977641385357302e-06, "loss": 11.7341, "step": 100250 }, { "epoch": 43.58983971746808, "grad_norm": 4.808958053588867, "learning_rate": 1.293380096448926e-06, "loss": 11.739, "step": 100300 }, { "epoch": 43.61157294213529, "grad_norm": 13.771133422851562, "learning_rate": 1.288996054362122e-06, "loss": 11.734, "step": 100350 }, { "epoch": 43.6333061668025, "grad_norm": 5.9765400886535645, "learning_rate": 1.284612012275318e-06, "loss": 11.7398, "step": 100400 }, { "epoch": 43.65503939146971, "grad_norm": 4.513848304748535, "learning_rate": 1.280227970188514e-06, "loss": 11.7321, "step": 100450 }, { "epoch": 43.67677261613692, "grad_norm": 3.8837485313415527, "learning_rate": 1.2758439281017098e-06, "loss": 11.7257, "step": 100500 }, { "epoch": 43.69850584080413, "grad_norm": 5.340435028076172, "learning_rate": 1.2714598860149059e-06, "loss": 11.7429, "step": 100550 }, { "epoch": 43.72023906547134, "grad_norm": 5.877197742462158, "learning_rate": 1.2670758439281017e-06, "loss": 11.728, "step": 100600 }, { "epoch": 43.74197229013855, "grad_norm": 11.274967193603516, "learning_rate": 1.262691801841298e-06, "loss": 11.7314, "step": 100650 }, { "epoch": 43.76370551480576, "grad_norm": 7.218222141265869, "learning_rate": 1.2583077597544937e-06, "loss": 11.726, "step": 100700 }, { "epoch": 43.78543873947297, "grad_norm": 3.588624954223633, "learning_rate": 1.2539237176676897e-06, "loss": 11.7394, "step": 100750 }, { "epoch": 43.80717196414018, "grad_norm": 10.603730201721191, "learning_rate": 1.2495396755808858e-06, "loss": 11.7442, "step": 100800 }, { "epoch": 43.82890518880739, "grad_norm": 4.9444379806518555, "learning_rate": 1.2451556334940816e-06, "loss": 11.7318, "step": 100850 }, { "epoch": 43.8506384134746, "grad_norm": 16.76546287536621, "learning_rate": 1.2407715914072776e-06, "loss": 11.731, "step": 100900 }, { "epoch": 43.87237163814181, "grad_norm": 3.7398102283477783, "learning_rate": 1.2363875493204736e-06, "loss": 11.7348, "step": 100950 }, { "epoch": 43.894104862809016, "grad_norm": 4.954889297485352, "learning_rate": 1.2320035072336696e-06, "loss": 11.7333, "step": 101000 }, { "epoch": 43.91583808747623, "grad_norm": 3.635148286819458, "learning_rate": 1.2276194651468654e-06, "loss": 11.7388, "step": 101050 }, { "epoch": 43.93757131214344, "grad_norm": 9.620790481567383, "learning_rate": 1.2232354230600615e-06, "loss": 11.7254, "step": 101100 }, { "epoch": 43.95930453681065, "grad_norm": 4.13824462890625, "learning_rate": 1.2188513809732575e-06, "loss": 11.7366, "step": 101150 }, { "epoch": 43.98103776147786, "grad_norm": 6.983582973480225, "learning_rate": 1.2144673388864533e-06, "loss": 11.7287, "step": 101200 }, { "epoch": 44.002607986960065, "grad_norm": 4.2465691566467285, "learning_rate": 1.2100832967996493e-06, "loss": 11.649, "step": 101250 }, { "epoch": 44.024341211627274, "grad_norm": 17.737117767333984, "learning_rate": 1.2056992547128453e-06, "loss": 11.7302, "step": 101300 }, { "epoch": 44.04607443629448, "grad_norm": 4.739099502563477, "learning_rate": 1.2013152126260414e-06, "loss": 11.75, "step": 101350 }, { "epoch": 44.0678076609617, "grad_norm": 3.695364236831665, "learning_rate": 1.1969311705392372e-06, "loss": 11.7399, "step": 101400 }, { "epoch": 44.08954088562891, "grad_norm": 6.371554851531982, "learning_rate": 1.1925471284524332e-06, "loss": 11.7314, "step": 101450 }, { "epoch": 44.111274110296115, "grad_norm": 24.45339584350586, "learning_rate": 1.1881630863656292e-06, "loss": 11.7215, "step": 101500 }, { "epoch": 44.133007334963324, "grad_norm": 5.760914325714111, "learning_rate": 1.1837790442788252e-06, "loss": 11.7376, "step": 101550 }, { "epoch": 44.15474055963053, "grad_norm": 4.051183700561523, "learning_rate": 1.179395002192021e-06, "loss": 11.7265, "step": 101600 }, { "epoch": 44.17647378429775, "grad_norm": 5.363418102264404, "learning_rate": 1.175010960105217e-06, "loss": 11.7367, "step": 101650 }, { "epoch": 44.19820700896496, "grad_norm": 3.471618890762329, "learning_rate": 1.170626918018413e-06, "loss": 11.7384, "step": 101700 }, { "epoch": 44.219940233632165, "grad_norm": 7.958500385284424, "learning_rate": 1.166242875931609e-06, "loss": 11.7332, "step": 101750 }, { "epoch": 44.241673458299374, "grad_norm": 25.589679718017578, "learning_rate": 1.161858833844805e-06, "loss": 11.7298, "step": 101800 }, { "epoch": 44.26340668296658, "grad_norm": 11.511533737182617, "learning_rate": 1.157474791758001e-06, "loss": 11.7272, "step": 101850 }, { "epoch": 44.2851399076338, "grad_norm": 4.467309474945068, "learning_rate": 1.153090749671197e-06, "loss": 11.7184, "step": 101900 }, { "epoch": 44.30687313230101, "grad_norm": 4.863615989685059, "learning_rate": 1.1487067075843928e-06, "loss": 11.7395, "step": 101950 }, { "epoch": 44.328606356968216, "grad_norm": 6.230271816253662, "learning_rate": 1.1443226654975888e-06, "loss": 11.7373, "step": 102000 }, { "epoch": 44.350339581635424, "grad_norm": 4.7530517578125, "learning_rate": 1.1399386234107848e-06, "loss": 11.7472, "step": 102050 }, { "epoch": 44.37207280630263, "grad_norm": 22.007238388061523, "learning_rate": 1.1355545813239809e-06, "loss": 11.7212, "step": 102100 }, { "epoch": 44.39380603096985, "grad_norm": 6.660682678222656, "learning_rate": 1.1311705392371767e-06, "loss": 11.7316, "step": 102150 }, { "epoch": 44.41553925563706, "grad_norm": 5.25778865814209, "learning_rate": 1.126786497150373e-06, "loss": 11.7305, "step": 102200 }, { "epoch": 44.437272480304266, "grad_norm": 5.097360134124756, "learning_rate": 1.1224024550635687e-06, "loss": 11.7314, "step": 102250 }, { "epoch": 44.459005704971474, "grad_norm": 4.272281169891357, "learning_rate": 1.1180184129767645e-06, "loss": 11.7419, "step": 102300 }, { "epoch": 44.48073892963868, "grad_norm": 6.060675144195557, "learning_rate": 1.1136343708899605e-06, "loss": 11.7367, "step": 102350 }, { "epoch": 44.50247215430589, "grad_norm": 5.883248329162598, "learning_rate": 1.1092503288031566e-06, "loss": 11.7368, "step": 102400 }, { "epoch": 44.52420537897311, "grad_norm": 6.329914093017578, "learning_rate": 1.1048662867163526e-06, "loss": 11.7303, "step": 102450 }, { "epoch": 44.545938603640316, "grad_norm": 6.62354850769043, "learning_rate": 1.1004822446295484e-06, "loss": 11.7375, "step": 102500 }, { "epoch": 44.567671828307525, "grad_norm": 10.634700775146484, "learning_rate": 1.0960982025427446e-06, "loss": 11.7295, "step": 102550 }, { "epoch": 44.58940505297473, "grad_norm": 2.787297487258911, "learning_rate": 1.0917141604559404e-06, "loss": 11.7311, "step": 102600 }, { "epoch": 44.61113827764194, "grad_norm": 4.915313720703125, "learning_rate": 1.0873301183691365e-06, "loss": 11.7368, "step": 102650 }, { "epoch": 44.63287150230916, "grad_norm": 13.359769821166992, "learning_rate": 1.0829460762823325e-06, "loss": 11.7417, "step": 102700 }, { "epoch": 44.654604726976366, "grad_norm": 3.9991888999938965, "learning_rate": 1.0785620341955285e-06, "loss": 11.7392, "step": 102750 }, { "epoch": 44.676337951643575, "grad_norm": 3.797086238861084, "learning_rate": 1.0741779921087243e-06, "loss": 11.7242, "step": 102800 }, { "epoch": 44.69807117631078, "grad_norm": 6.608884811401367, "learning_rate": 1.0697939500219203e-06, "loss": 11.7231, "step": 102850 }, { "epoch": 44.71980440097799, "grad_norm": 10.230695724487305, "learning_rate": 1.0654099079351164e-06, "loss": 11.7325, "step": 102900 }, { "epoch": 44.74153762564521, "grad_norm": 4.2929301261901855, "learning_rate": 1.0610258658483122e-06, "loss": 11.7303, "step": 102950 }, { "epoch": 44.763270850312416, "grad_norm": 5.952197074890137, "learning_rate": 1.0566418237615082e-06, "loss": 11.7291, "step": 103000 }, { "epoch": 44.785004074979625, "grad_norm": 10.304634094238281, "learning_rate": 1.0522577816747042e-06, "loss": 11.7452, "step": 103050 }, { "epoch": 44.806737299646834, "grad_norm": 4.195600509643555, "learning_rate": 1.0478737395879002e-06, "loss": 11.744, "step": 103100 }, { "epoch": 44.82847052431404, "grad_norm": 3.8358092308044434, "learning_rate": 1.043489697501096e-06, "loss": 11.7301, "step": 103150 }, { "epoch": 44.85020374898126, "grad_norm": 13.731127738952637, "learning_rate": 1.039105655414292e-06, "loss": 11.7318, "step": 103200 }, { "epoch": 44.87193697364847, "grad_norm": 4.197743892669678, "learning_rate": 1.034721613327488e-06, "loss": 11.7318, "step": 103250 }, { "epoch": 44.893670198315675, "grad_norm": 8.494996070861816, "learning_rate": 1.030337571240684e-06, "loss": 11.7357, "step": 103300 }, { "epoch": 44.915403422982884, "grad_norm": 3.6425983905792236, "learning_rate": 1.02595352915388e-06, "loss": 11.729, "step": 103350 }, { "epoch": 44.93713664765009, "grad_norm": 4.333567142486572, "learning_rate": 1.021569487067076e-06, "loss": 11.7339, "step": 103400 }, { "epoch": 44.95886987231731, "grad_norm": 4.8357930183410645, "learning_rate": 1.017185444980272e-06, "loss": 11.735, "step": 103450 }, { "epoch": 44.98060309698452, "grad_norm": 8.409868240356445, "learning_rate": 1.0128014028934678e-06, "loss": 11.7336, "step": 103500 }, { "epoch": 45.002173322466724, "grad_norm": 3.9856626987457275, "learning_rate": 1.0084173608066638e-06, "loss": 11.653, "step": 103550 }, { "epoch": 45.02390654713393, "grad_norm": 4.529376029968262, "learning_rate": 1.0040333187198598e-06, "loss": 11.7279, "step": 103600 }, { "epoch": 45.04563977180114, "grad_norm": 4.276280403137207, "learning_rate": 9.996492766330559e-07, "loss": 11.7362, "step": 103650 }, { "epoch": 45.06737299646835, "grad_norm": 4.415678024291992, "learning_rate": 9.952652345462517e-07, "loss": 11.7194, "step": 103700 }, { "epoch": 45.08910622113556, "grad_norm": 4.513889789581299, "learning_rate": 9.908811924594477e-07, "loss": 11.7178, "step": 103750 }, { "epoch": 45.11083944580277, "grad_norm": 5.300011157989502, "learning_rate": 9.864971503726437e-07, "loss": 11.727, "step": 103800 }, { "epoch": 45.13257267046998, "grad_norm": 3.8258767127990723, "learning_rate": 9.821131082858395e-07, "loss": 11.7328, "step": 103850 }, { "epoch": 45.15430589513719, "grad_norm": 7.767271995544434, "learning_rate": 9.777290661990355e-07, "loss": 11.7202, "step": 103900 }, { "epoch": 45.1760391198044, "grad_norm": 3.230754852294922, "learning_rate": 9.733450241122316e-07, "loss": 11.7268, "step": 103950 }, { "epoch": 45.19777234447161, "grad_norm": 3.8269119262695312, "learning_rate": 9.689609820254276e-07, "loss": 11.7249, "step": 104000 }, { "epoch": 45.21950556913882, "grad_norm": 5.030121803283691, "learning_rate": 9.645769399386234e-07, "loss": 11.7181, "step": 104050 }, { "epoch": 45.24123879380603, "grad_norm": 4.850019931793213, "learning_rate": 9.601928978518194e-07, "loss": 11.7272, "step": 104100 }, { "epoch": 45.26297201847324, "grad_norm": 6.58116340637207, "learning_rate": 9.558088557650154e-07, "loss": 11.7237, "step": 104150 }, { "epoch": 45.28470524314045, "grad_norm": 20.67346954345703, "learning_rate": 9.514248136782115e-07, "loss": 11.7302, "step": 104200 }, { "epoch": 45.30643846780766, "grad_norm": 3.362128973007202, "learning_rate": 9.470407715914074e-07, "loss": 11.7209, "step": 104250 }, { "epoch": 45.32817169247487, "grad_norm": 7.51302433013916, "learning_rate": 9.426567295046034e-07, "loss": 11.7303, "step": 104300 }, { "epoch": 45.349904917142084, "grad_norm": 4.610814094543457, "learning_rate": 9.382726874177993e-07, "loss": 11.7177, "step": 104350 }, { "epoch": 45.37163814180929, "grad_norm": 13.158862113952637, "learning_rate": 9.338886453309952e-07, "loss": 11.7227, "step": 104400 }, { "epoch": 45.3933713664765, "grad_norm": 4.248621940612793, "learning_rate": 9.295046032441913e-07, "loss": 11.7428, "step": 104450 }, { "epoch": 45.41510459114371, "grad_norm": 3.553060531616211, "learning_rate": 9.251205611573872e-07, "loss": 11.7294, "step": 104500 }, { "epoch": 45.43683781581092, "grad_norm": 5.807036399841309, "learning_rate": 9.207365190705832e-07, "loss": 11.7284, "step": 104550 }, { "epoch": 45.458571040478134, "grad_norm": 13.629132270812988, "learning_rate": 9.163524769837791e-07, "loss": 11.7301, "step": 104600 }, { "epoch": 45.48030426514534, "grad_norm": 4.1011857986450195, "learning_rate": 9.119684348969751e-07, "loss": 11.7319, "step": 104650 }, { "epoch": 45.50203748981255, "grad_norm": 10.649341583251953, "learning_rate": 9.075843928101711e-07, "loss": 11.7333, "step": 104700 }, { "epoch": 45.52377071447976, "grad_norm": 20.217660903930664, "learning_rate": 9.032003507233671e-07, "loss": 11.7319, "step": 104750 }, { "epoch": 45.54550393914697, "grad_norm": 7.371703624725342, "learning_rate": 8.98816308636563e-07, "loss": 11.7223, "step": 104800 }, { "epoch": 45.567237163814184, "grad_norm": 6.1061110496521, "learning_rate": 8.94432266549759e-07, "loss": 11.7134, "step": 104850 }, { "epoch": 45.58897038848139, "grad_norm": 3.3697314262390137, "learning_rate": 8.900482244629549e-07, "loss": 11.7206, "step": 104900 }, { "epoch": 45.6107036131486, "grad_norm": 5.704832077026367, "learning_rate": 8.856641823761508e-07, "loss": 11.7343, "step": 104950 }, { "epoch": 45.63243683781581, "grad_norm": 5.612690448760986, "learning_rate": 8.812801402893469e-07, "loss": 11.7228, "step": 105000 }, { "epoch": 45.65417006248302, "grad_norm": 4.661070823669434, "learning_rate": 8.768960982025428e-07, "loss": 11.7344, "step": 105050 }, { "epoch": 45.675903287150234, "grad_norm": 4.922998905181885, "learning_rate": 8.725120561157388e-07, "loss": 11.7161, "step": 105100 }, { "epoch": 45.69763651181744, "grad_norm": 6.320181369781494, "learning_rate": 8.681280140289347e-07, "loss": 11.7451, "step": 105150 }, { "epoch": 45.71936973648465, "grad_norm": 6.543067455291748, "learning_rate": 8.637439719421307e-07, "loss": 11.7187, "step": 105200 }, { "epoch": 45.74110296115186, "grad_norm": 7.506560802459717, "learning_rate": 8.593599298553267e-07, "loss": 11.7368, "step": 105250 }, { "epoch": 45.76283618581907, "grad_norm": 6.871926307678223, "learning_rate": 8.549758877685227e-07, "loss": 11.7347, "step": 105300 }, { "epoch": 45.784569410486284, "grad_norm": 4.491659641265869, "learning_rate": 8.505918456817186e-07, "loss": 11.7379, "step": 105350 }, { "epoch": 45.80630263515349, "grad_norm": 21.81031036376953, "learning_rate": 8.462078035949146e-07, "loss": 11.7307, "step": 105400 }, { "epoch": 45.8280358598207, "grad_norm": 20.492307662963867, "learning_rate": 8.418237615081105e-07, "loss": 11.7268, "step": 105450 }, { "epoch": 45.84976908448791, "grad_norm": 7.620596408843994, "learning_rate": 8.374397194213065e-07, "loss": 11.7384, "step": 105500 }, { "epoch": 45.87150230915512, "grad_norm": 4.937099456787109, "learning_rate": 8.330556773345025e-07, "loss": 11.7266, "step": 105550 }, { "epoch": 45.89323553382233, "grad_norm": 3.815049409866333, "learning_rate": 8.286716352476984e-07, "loss": 11.7281, "step": 105600 }, { "epoch": 45.91496875848954, "grad_norm": 9.32738208770752, "learning_rate": 8.242875931608944e-07, "loss": 11.7424, "step": 105650 }, { "epoch": 45.93670198315675, "grad_norm": 12.112308502197266, "learning_rate": 8.199035510740903e-07, "loss": 11.729, "step": 105700 }, { "epoch": 45.95843520782396, "grad_norm": 4.76987361907959, "learning_rate": 8.155195089872864e-07, "loss": 11.7292, "step": 105750 }, { "epoch": 45.98016843249117, "grad_norm": 11.38598346710205, "learning_rate": 8.111354669004823e-07, "loss": 11.7356, "step": 105800 }, { "epoch": 46.00173865797338, "grad_norm": 18.5734806060791, "learning_rate": 8.067514248136783e-07, "loss": 11.6526, "step": 105850 }, { "epoch": 46.023471882640585, "grad_norm": 3.3094968795776367, "learning_rate": 8.023673827268742e-07, "loss": 11.7246, "step": 105900 }, { "epoch": 46.045205107307794, "grad_norm": 10.625943183898926, "learning_rate": 7.979833406400702e-07, "loss": 11.7197, "step": 105950 }, { "epoch": 46.06693833197501, "grad_norm": 11.11587142944336, "learning_rate": 7.935992985532662e-07, "loss": 11.712, "step": 106000 }, { "epoch": 46.08867155664222, "grad_norm": 5.9816083908081055, "learning_rate": 7.892152564664621e-07, "loss": 11.7291, "step": 106050 }, { "epoch": 46.11040478130943, "grad_norm": 5.810311317443848, "learning_rate": 7.848312143796581e-07, "loss": 11.72, "step": 106100 }, { "epoch": 46.132138005976635, "grad_norm": 9.10987377166748, "learning_rate": 7.80447172292854e-07, "loss": 11.7341, "step": 106150 }, { "epoch": 46.153871230643844, "grad_norm": 3.9713680744171143, "learning_rate": 7.7606313020605e-07, "loss": 11.728, "step": 106200 }, { "epoch": 46.17560445531106, "grad_norm": 10.883817672729492, "learning_rate": 7.716790881192459e-07, "loss": 11.7321, "step": 106250 }, { "epoch": 46.19733767997827, "grad_norm": 3.261399745941162, "learning_rate": 7.67295046032442e-07, "loss": 11.7197, "step": 106300 }, { "epoch": 46.21907090464548, "grad_norm": 3.867229461669922, "learning_rate": 7.629110039456379e-07, "loss": 11.7311, "step": 106350 }, { "epoch": 46.240804129312686, "grad_norm": 5.125184059143066, "learning_rate": 7.585269618588339e-07, "loss": 11.7357, "step": 106400 }, { "epoch": 46.262537353979894, "grad_norm": 3.271857500076294, "learning_rate": 7.541429197720298e-07, "loss": 11.7182, "step": 106450 }, { "epoch": 46.28427057864711, "grad_norm": 2.972466230392456, "learning_rate": 7.49758877685226e-07, "loss": 11.7244, "step": 106500 }, { "epoch": 46.30600380331432, "grad_norm": 10.320878028869629, "learning_rate": 7.453748355984218e-07, "loss": 11.7296, "step": 106550 }, { "epoch": 46.32773702798153, "grad_norm": 7.7540483474731445, "learning_rate": 7.409907935116177e-07, "loss": 11.7175, "step": 106600 }, { "epoch": 46.349470252648736, "grad_norm": 5.142116546630859, "learning_rate": 7.366067514248137e-07, "loss": 11.7324, "step": 106650 }, { "epoch": 46.371203477315944, "grad_norm": 3.158510446548462, "learning_rate": 7.322227093380096e-07, "loss": 11.7292, "step": 106700 }, { "epoch": 46.39293670198316, "grad_norm": 3.982985258102417, "learning_rate": 7.278386672512057e-07, "loss": 11.7214, "step": 106750 }, { "epoch": 46.41466992665037, "grad_norm": 3.4331562519073486, "learning_rate": 7.234546251644016e-07, "loss": 11.7263, "step": 106800 }, { "epoch": 46.43640315131758, "grad_norm": 5.8017401695251465, "learning_rate": 7.190705830775977e-07, "loss": 11.7349, "step": 106850 }, { "epoch": 46.458136375984786, "grad_norm": 4.63163948059082, "learning_rate": 7.146865409907935e-07, "loss": 11.7241, "step": 106900 }, { "epoch": 46.479869600651995, "grad_norm": 4.267096996307373, "learning_rate": 7.103024989039896e-07, "loss": 11.7189, "step": 106950 }, { "epoch": 46.5016028253192, "grad_norm": 4.0522871017456055, "learning_rate": 7.059184568171855e-07, "loss": 11.7209, "step": 107000 }, { "epoch": 46.52333604998642, "grad_norm": 5.3363142013549805, "learning_rate": 7.015344147303816e-07, "loss": 11.7084, "step": 107050 }, { "epoch": 46.54506927465363, "grad_norm": 4.059858322143555, "learning_rate": 6.971503726435775e-07, "loss": 11.7125, "step": 107100 }, { "epoch": 46.566802499320836, "grad_norm": 3.252812623977661, "learning_rate": 6.927663305567733e-07, "loss": 11.7268, "step": 107150 }, { "epoch": 46.588535723988045, "grad_norm": 4.82172966003418, "learning_rate": 6.883822884699694e-07, "loss": 11.7139, "step": 107200 }, { "epoch": 46.61026894865525, "grad_norm": 8.201459884643555, "learning_rate": 6.839982463831653e-07, "loss": 11.7298, "step": 107250 }, { "epoch": 46.63200217332247, "grad_norm": 3.159785747528076, "learning_rate": 6.796142042963614e-07, "loss": 11.7209, "step": 107300 }, { "epoch": 46.65373539798968, "grad_norm": 11.2830171585083, "learning_rate": 6.752301622095573e-07, "loss": 11.7288, "step": 107350 }, { "epoch": 46.67546862265689, "grad_norm": 4.074632167816162, "learning_rate": 6.708461201227533e-07, "loss": 11.7174, "step": 107400 }, { "epoch": 46.697201847324095, "grad_norm": 12.465502738952637, "learning_rate": 6.664620780359492e-07, "loss": 11.7186, "step": 107450 }, { "epoch": 46.718935071991304, "grad_norm": 4.864065647125244, "learning_rate": 6.620780359491452e-07, "loss": 11.7249, "step": 107500 }, { "epoch": 46.74066829665852, "grad_norm": 7.3475341796875, "learning_rate": 6.576939938623411e-07, "loss": 11.7212, "step": 107550 }, { "epoch": 46.76240152132573, "grad_norm": 6.0634565353393555, "learning_rate": 6.533099517755372e-07, "loss": 11.7342, "step": 107600 }, { "epoch": 46.78413474599294, "grad_norm": 6.251104831695557, "learning_rate": 6.489259096887331e-07, "loss": 11.7225, "step": 107650 }, { "epoch": 46.805867970660145, "grad_norm": 5.822422027587891, "learning_rate": 6.44541867601929e-07, "loss": 11.7272, "step": 107700 }, { "epoch": 46.827601195327354, "grad_norm": 8.700297355651855, "learning_rate": 6.40157825515125e-07, "loss": 11.7297, "step": 107750 }, { "epoch": 46.84933441999457, "grad_norm": 5.136385917663574, "learning_rate": 6.357737834283209e-07, "loss": 11.726, "step": 107800 }, { "epoch": 46.87106764466178, "grad_norm": 5.658658981323242, "learning_rate": 6.31389741341517e-07, "loss": 11.7237, "step": 107850 }, { "epoch": 46.89280086932899, "grad_norm": 6.1630353927612305, "learning_rate": 6.270056992547129e-07, "loss": 11.728, "step": 107900 }, { "epoch": 46.914534093996195, "grad_norm": 11.851746559143066, "learning_rate": 6.226216571679088e-07, "loss": 11.7218, "step": 107950 }, { "epoch": 46.936267318663404, "grad_norm": 3.989478588104248, "learning_rate": 6.182376150811048e-07, "loss": 11.7246, "step": 108000 }, { "epoch": 46.95800054333062, "grad_norm": 10.78637981414795, "learning_rate": 6.138535729943007e-07, "loss": 11.7238, "step": 108050 }, { "epoch": 46.97973376799783, "grad_norm": 11.566459655761719, "learning_rate": 6.094695309074968e-07, "loss": 11.7193, "step": 108100 }, { "epoch": 47.001303993480036, "grad_norm": 9.378003120422363, "learning_rate": 6.050854888206927e-07, "loss": 11.6363, "step": 108150 }, { "epoch": 47.023037218147245, "grad_norm": 5.3512091636657715, "learning_rate": 6.007014467338887e-07, "loss": 11.7135, "step": 108200 }, { "epoch": 47.04477044281445, "grad_norm": 10.854682922363281, "learning_rate": 5.963174046470846e-07, "loss": 11.7218, "step": 108250 }, { "epoch": 47.06650366748166, "grad_norm": 3.557173728942871, "learning_rate": 5.919333625602806e-07, "loss": 11.7216, "step": 108300 }, { "epoch": 47.08823689214887, "grad_norm": 4.374483585357666, "learning_rate": 5.875493204734767e-07, "loss": 11.722, "step": 108350 }, { "epoch": 47.10997011681608, "grad_norm": 4.371666431427002, "learning_rate": 5.831652783866726e-07, "loss": 11.7127, "step": 108400 }, { "epoch": 47.131703341483295, "grad_norm": 6.458693504333496, "learning_rate": 5.787812362998686e-07, "loss": 11.7278, "step": 108450 }, { "epoch": 47.1534365661505, "grad_norm": 20.349096298217773, "learning_rate": 5.743971942130644e-07, "loss": 11.7243, "step": 108500 }, { "epoch": 47.17516979081771, "grad_norm": 24.32076072692871, "learning_rate": 5.700131521262604e-07, "loss": 11.7229, "step": 108550 }, { "epoch": 47.19690301548492, "grad_norm": 3.82700252532959, "learning_rate": 5.656291100394565e-07, "loss": 11.7084, "step": 108600 }, { "epoch": 47.21863624015213, "grad_norm": 3.007939338684082, "learning_rate": 5.612450679526524e-07, "loss": 11.7192, "step": 108650 }, { "epoch": 47.240369464819345, "grad_norm": 4.3855299949646, "learning_rate": 5.568610258658484e-07, "loss": 11.7216, "step": 108700 }, { "epoch": 47.262102689486554, "grad_norm": 3.7610890865325928, "learning_rate": 5.524769837790443e-07, "loss": 11.7107, "step": 108750 }, { "epoch": 47.28383591415376, "grad_norm": 5.21887731552124, "learning_rate": 5.480929416922403e-07, "loss": 11.7165, "step": 108800 }, { "epoch": 47.30556913882097, "grad_norm": 4.387558460235596, "learning_rate": 5.437088996054362e-07, "loss": 11.7145, "step": 108850 }, { "epoch": 47.32730236348818, "grad_norm": 2.78105092048645, "learning_rate": 5.393248575186323e-07, "loss": 11.7132, "step": 108900 }, { "epoch": 47.349035588155395, "grad_norm": 6.782215595245361, "learning_rate": 5.349408154318282e-07, "loss": 11.7313, "step": 108950 }, { "epoch": 47.370768812822604, "grad_norm": 4.510542392730713, "learning_rate": 5.305567733450242e-07, "loss": 11.7191, "step": 109000 }, { "epoch": 47.39250203748981, "grad_norm": 5.735984802246094, "learning_rate": 5.261727312582201e-07, "loss": 11.7178, "step": 109050 }, { "epoch": 47.41423526215702, "grad_norm": 9.908329963684082, "learning_rate": 5.21788689171416e-07, "loss": 11.7219, "step": 109100 }, { "epoch": 47.43596848682423, "grad_norm": 5.137279987335205, "learning_rate": 5.174046470846121e-07, "loss": 11.7231, "step": 109150 }, { "epoch": 47.457701711491445, "grad_norm": 15.530988693237305, "learning_rate": 5.13020604997808e-07, "loss": 11.7219, "step": 109200 }, { "epoch": 47.479434936158654, "grad_norm": 5.565670490264893, "learning_rate": 5.08636562911004e-07, "loss": 11.7231, "step": 109250 }, { "epoch": 47.50116816082586, "grad_norm": 3.390558958053589, "learning_rate": 5.042525208241999e-07, "loss": 11.7143, "step": 109300 }, { "epoch": 47.52290138549307, "grad_norm": 3.168869972229004, "learning_rate": 4.998684787373959e-07, "loss": 11.7107, "step": 109350 }, { "epoch": 47.54463461016028, "grad_norm": 4.391485691070557, "learning_rate": 4.954844366505919e-07, "loss": 11.7171, "step": 109400 }, { "epoch": 47.566367834827496, "grad_norm": 10.428187370300293, "learning_rate": 4.911003945637879e-07, "loss": 11.7214, "step": 109450 }, { "epoch": 47.588101059494704, "grad_norm": 8.480759620666504, "learning_rate": 4.867163524769838e-07, "loss": 11.7284, "step": 109500 }, { "epoch": 47.60983428416191, "grad_norm": 8.282448768615723, "learning_rate": 4.823323103901798e-07, "loss": 11.723, "step": 109550 }, { "epoch": 47.63156750882912, "grad_norm": 3.495969295501709, "learning_rate": 4.779482683033757e-07, "loss": 11.7213, "step": 109600 }, { "epoch": 47.65330073349633, "grad_norm": 4.484890937805176, "learning_rate": 4.735642262165717e-07, "loss": 11.7149, "step": 109650 }, { "epoch": 47.675033958163546, "grad_norm": 11.390790939331055, "learning_rate": 4.6918018412976767e-07, "loss": 11.7138, "step": 109700 }, { "epoch": 47.696767182830754, "grad_norm": 3.9627888202667236, "learning_rate": 4.6479614204296364e-07, "loss": 11.722, "step": 109750 }, { "epoch": 47.71850040749796, "grad_norm": 5.796283721923828, "learning_rate": 4.604120999561596e-07, "loss": 11.7182, "step": 109800 }, { "epoch": 47.74023363216517, "grad_norm": 8.347150802612305, "learning_rate": 4.560280578693556e-07, "loss": 11.7257, "step": 109850 }, { "epoch": 47.76196685683238, "grad_norm": 18.176475524902344, "learning_rate": 4.5164401578255155e-07, "loss": 11.7289, "step": 109900 }, { "epoch": 47.78370008149959, "grad_norm": 4.672854900360107, "learning_rate": 4.472599736957475e-07, "loss": 11.7134, "step": 109950 }, { "epoch": 47.805433306166805, "grad_norm": 4.16023588180542, "learning_rate": 4.428759316089435e-07, "loss": 11.7233, "step": 110000 }, { "epoch": 47.805433306166805, "eval_cer": 0.07368261883895177, "eval_loss": 2.426945209503174, "eval_runtime": 398.6555, "eval_samples_per_second": 13.561, "eval_steps_per_second": 3.391, "eval_wer": 0.224462238970011, "step": 110000 }, { "epoch": 47.82716653083401, "grad_norm": 6.881287097930908, "learning_rate": 4.3849188952213946e-07, "loss": 11.7274, "step": 110050 }, { "epoch": 47.84889975550122, "grad_norm": 22.432987213134766, "learning_rate": 4.3410784743533543e-07, "loss": 11.7251, "step": 110100 }, { "epoch": 47.87063298016843, "grad_norm": 5.372289180755615, "learning_rate": 4.2972380534853134e-07, "loss": 11.7208, "step": 110150 }, { "epoch": 47.89236620483564, "grad_norm": 7.969860553741455, "learning_rate": 4.253397632617273e-07, "loss": 11.714, "step": 110200 }, { "epoch": 47.914099429502855, "grad_norm": 4.329230308532715, "learning_rate": 4.209557211749233e-07, "loss": 11.7203, "step": 110250 }, { "epoch": 47.93583265417006, "grad_norm": 5.077062606811523, "learning_rate": 4.1657167908811925e-07, "loss": 11.7147, "step": 110300 }, { "epoch": 47.95756587883727, "grad_norm": 10.139983177185059, "learning_rate": 4.121876370013152e-07, "loss": 11.7224, "step": 110350 }, { "epoch": 47.97929910350448, "grad_norm": 7.735840320587158, "learning_rate": 4.078035949145112e-07, "loss": 11.7244, "step": 110400 }, { "epoch": 48.00086932898669, "grad_norm": 3.7973625659942627, "learning_rate": 4.0341955282770716e-07, "loss": 11.6379, "step": 110450 }, { "epoch": 48.0226025536539, "grad_norm": 30.568002700805664, "learning_rate": 3.990355107409032e-07, "loss": 11.7251, "step": 110500 }, { "epoch": 48.044335778321106, "grad_norm": 4.356334209442139, "learning_rate": 3.9465146865409915e-07, "loss": 11.7165, "step": 110550 }, { "epoch": 48.06606900298832, "grad_norm": 2.87776780128479, "learning_rate": 3.902674265672951e-07, "loss": 11.7094, "step": 110600 }, { "epoch": 48.08780222765553, "grad_norm": 7.523682117462158, "learning_rate": 3.858833844804911e-07, "loss": 11.7341, "step": 110650 }, { "epoch": 48.10953545232274, "grad_norm": 4.565247535705566, "learning_rate": 3.8149934239368695e-07, "loss": 11.7107, "step": 110700 }, { "epoch": 48.13126867698995, "grad_norm": 8.14168643951416, "learning_rate": 3.77115300306883e-07, "loss": 11.708, "step": 110750 }, { "epoch": 48.153001901657156, "grad_norm": 9.361733436584473, "learning_rate": 3.7273125822007895e-07, "loss": 11.7357, "step": 110800 }, { "epoch": 48.17473512632437, "grad_norm": 3.413947105407715, "learning_rate": 3.683472161332749e-07, "loss": 11.7106, "step": 110850 }, { "epoch": 48.19646835099158, "grad_norm": 3.5155258178710938, "learning_rate": 3.639631740464709e-07, "loss": 11.7122, "step": 110900 }, { "epoch": 48.21820157565879, "grad_norm": 3.1602470874786377, "learning_rate": 3.5957913195966685e-07, "loss": 11.7176, "step": 110950 }, { "epoch": 48.239934800326, "grad_norm": 9.386332511901855, "learning_rate": 3.551950898728628e-07, "loss": 11.724, "step": 111000 }, { "epoch": 48.261668024993206, "grad_norm": 4.479788780212402, "learning_rate": 3.508110477860588e-07, "loss": 11.7121, "step": 111050 }, { "epoch": 48.28340124966042, "grad_norm": 5.899557590484619, "learning_rate": 3.4642700569925476e-07, "loss": 11.7212, "step": 111100 }, { "epoch": 48.30513447432763, "grad_norm": 11.247237205505371, "learning_rate": 3.4204296361245073e-07, "loss": 11.7196, "step": 111150 }, { "epoch": 48.32686769899484, "grad_norm": 5.190598487854004, "learning_rate": 3.376589215256467e-07, "loss": 11.7078, "step": 111200 }, { "epoch": 48.34860092366205, "grad_norm": 4.109508037567139, "learning_rate": 3.332748794388426e-07, "loss": 11.7155, "step": 111250 }, { "epoch": 48.370334148329256, "grad_norm": 3.0162370204925537, "learning_rate": 3.288908373520386e-07, "loss": 11.7168, "step": 111300 }, { "epoch": 48.39206737299647, "grad_norm": 11.546656608581543, "learning_rate": 3.2450679526523456e-07, "loss": 11.7252, "step": 111350 }, { "epoch": 48.41380059766368, "grad_norm": 3.388889789581299, "learning_rate": 3.201227531784305e-07, "loss": 11.7268, "step": 111400 }, { "epoch": 48.43553382233089, "grad_norm": 6.033073902130127, "learning_rate": 3.157387110916265e-07, "loss": 11.7127, "step": 111450 }, { "epoch": 48.4572670469981, "grad_norm": 4.543982028961182, "learning_rate": 3.1135466900482246e-07, "loss": 11.7272, "step": 111500 }, { "epoch": 48.479000271665306, "grad_norm": 9.906218528747559, "learning_rate": 3.0697062691801843e-07, "loss": 11.7268, "step": 111550 }, { "epoch": 48.500733496332515, "grad_norm": 7.095948696136475, "learning_rate": 3.025865848312144e-07, "loss": 11.712, "step": 111600 }, { "epoch": 48.52246672099973, "grad_norm": 9.97701644897461, "learning_rate": 2.9820254274441037e-07, "loss": 11.722, "step": 111650 }, { "epoch": 48.54419994566694, "grad_norm": 4.398223876953125, "learning_rate": 2.9381850065760634e-07, "loss": 11.7101, "step": 111700 }, { "epoch": 48.56593317033415, "grad_norm": 3.77424693107605, "learning_rate": 2.894344585708023e-07, "loss": 11.7272, "step": 111750 }, { "epoch": 48.58766639500136, "grad_norm": 17.39592933654785, "learning_rate": 2.850504164839983e-07, "loss": 11.7154, "step": 111800 }, { "epoch": 48.609399619668565, "grad_norm": 3.8219528198242188, "learning_rate": 2.8066637439719425e-07, "loss": 11.7156, "step": 111850 }, { "epoch": 48.63113284433578, "grad_norm": 9.067111015319824, "learning_rate": 2.7628233231039017e-07, "loss": 11.7133, "step": 111900 }, { "epoch": 48.65286606900299, "grad_norm": 15.224953651428223, "learning_rate": 2.7189829022358614e-07, "loss": 11.7166, "step": 111950 }, { "epoch": 48.6745992936702, "grad_norm": 4.944436073303223, "learning_rate": 2.675142481367821e-07, "loss": 11.7152, "step": 112000 }, { "epoch": 48.69633251833741, "grad_norm": 11.312178611755371, "learning_rate": 2.6313020604997813e-07, "loss": 11.7157, "step": 112050 }, { "epoch": 48.718065743004615, "grad_norm": 5.6469011306762695, "learning_rate": 2.587461639631741e-07, "loss": 11.7188, "step": 112100 }, { "epoch": 48.73979896767183, "grad_norm": 3.34533429145813, "learning_rate": 2.5436212187637007e-07, "loss": 11.7275, "step": 112150 }, { "epoch": 48.76153219233904, "grad_norm": 9.967689514160156, "learning_rate": 2.49978079789566e-07, "loss": 11.717, "step": 112200 }, { "epoch": 48.78326541700625, "grad_norm": 5.482551574707031, "learning_rate": 2.4559403770276195e-07, "loss": 11.7111, "step": 112250 }, { "epoch": 48.80499864167346, "grad_norm": 4.191429615020752, "learning_rate": 2.412099956159579e-07, "loss": 11.7112, "step": 112300 }, { "epoch": 48.826731866340666, "grad_norm": 4.112410068511963, "learning_rate": 2.3682595352915392e-07, "loss": 11.7252, "step": 112350 }, { "epoch": 48.84846509100788, "grad_norm": 4.255959510803223, "learning_rate": 2.3244191144234989e-07, "loss": 11.7149, "step": 112400 }, { "epoch": 48.87019831567509, "grad_norm": 8.20151424407959, "learning_rate": 2.2805786935554583e-07, "loss": 11.7073, "step": 112450 }, { "epoch": 48.8919315403423, "grad_norm": 4.13128137588501, "learning_rate": 2.236738272687418e-07, "loss": 11.72, "step": 112500 }, { "epoch": 48.91366476500951, "grad_norm": 6.540150165557861, "learning_rate": 2.1928978518193777e-07, "loss": 11.707, "step": 112550 }, { "epoch": 48.935397989676716, "grad_norm": 10.835039138793945, "learning_rate": 2.1490574309513374e-07, "loss": 11.7185, "step": 112600 }, { "epoch": 48.95713121434393, "grad_norm": 11.767996788024902, "learning_rate": 2.105217010083297e-07, "loss": 11.7273, "step": 112650 }, { "epoch": 48.97886443901114, "grad_norm": 7.164200305938721, "learning_rate": 2.0613765892152568e-07, "loss": 11.7255, "step": 112700 }, { "epoch": 49.00043466449335, "grad_norm": 3.889307737350464, "learning_rate": 2.0175361683472162e-07, "loss": 11.6305, "step": 112750 }, { "epoch": 49.022167889160556, "grad_norm": 3.3905019760131836, "learning_rate": 1.973695747479176e-07, "loss": 11.7146, "step": 112800 }, { "epoch": 49.043901113827765, "grad_norm": 10.843219757080078, "learning_rate": 1.9298553266111356e-07, "loss": 11.7074, "step": 112850 }, { "epoch": 49.065634338494974, "grad_norm": 6.1026082038879395, "learning_rate": 1.8860149057430953e-07, "loss": 11.7187, "step": 112900 }, { "epoch": 49.08736756316218, "grad_norm": 12.958758354187012, "learning_rate": 1.842174484875055e-07, "loss": 11.7198, "step": 112950 }, { "epoch": 49.10910078782939, "grad_norm": 7.045960426330566, "learning_rate": 1.7983340640070144e-07, "loss": 11.7171, "step": 113000 }, { "epoch": 49.13083401249661, "grad_norm": 8.215546607971191, "learning_rate": 1.754493643138974e-07, "loss": 11.7161, "step": 113050 }, { "epoch": 49.152567237163815, "grad_norm": 3.4392971992492676, "learning_rate": 1.710653222270934e-07, "loss": 11.7033, "step": 113100 }, { "epoch": 49.174300461831024, "grad_norm": 4.333184242248535, "learning_rate": 1.6668128014028937e-07, "loss": 11.7221, "step": 113150 }, { "epoch": 49.19603368649823, "grad_norm": 8.148516654968262, "learning_rate": 1.6229723805348534e-07, "loss": 11.7099, "step": 113200 }, { "epoch": 49.21776691116544, "grad_norm": 10.723722457885742, "learning_rate": 1.579131959666813e-07, "loss": 11.7253, "step": 113250 }, { "epoch": 49.23950013583266, "grad_norm": 5.778897285461426, "learning_rate": 1.5352915387987726e-07, "loss": 11.7058, "step": 113300 }, { "epoch": 49.261233360499865, "grad_norm": 5.398443698883057, "learning_rate": 1.4914511179307322e-07, "loss": 11.7219, "step": 113350 }, { "epoch": 49.282966585167074, "grad_norm": 3.614530324935913, "learning_rate": 1.447610697062692e-07, "loss": 11.7105, "step": 113400 }, { "epoch": 49.30469980983428, "grad_norm": 4.205718040466309, "learning_rate": 1.4037702761946516e-07, "loss": 11.7128, "step": 113450 }, { "epoch": 49.32643303450149, "grad_norm": 5.203486442565918, "learning_rate": 1.3599298553266113e-07, "loss": 11.7145, "step": 113500 }, { "epoch": 49.34816625916871, "grad_norm": 3.4985852241516113, "learning_rate": 1.316089434458571e-07, "loss": 11.7212, "step": 113550 }, { "epoch": 49.369899483835916, "grad_norm": 9.43883991241455, "learning_rate": 1.2722490135905305e-07, "loss": 11.7124, "step": 113600 }, { "epoch": 49.391632708503124, "grad_norm": 7.489180088043213, "learning_rate": 1.2284085927224901e-07, "loss": 11.7207, "step": 113650 }, { "epoch": 49.41336593317033, "grad_norm": 9.499123573303223, "learning_rate": 1.18456817185445e-07, "loss": 11.7147, "step": 113700 }, { "epoch": 49.43509915783754, "grad_norm": 7.789849758148193, "learning_rate": 1.1407277509864095e-07, "loss": 11.7171, "step": 113750 }, { "epoch": 49.45683238250476, "grad_norm": 9.22687816619873, "learning_rate": 1.0968873301183692e-07, "loss": 11.7071, "step": 113800 }, { "epoch": 49.478565607171966, "grad_norm": 5.999274253845215, "learning_rate": 1.0530469092503289e-07, "loss": 11.7137, "step": 113850 }, { "epoch": 49.500298831839174, "grad_norm": 9.73884391784668, "learning_rate": 1.0092064883822885e-07, "loss": 11.7201, "step": 113900 }, { "epoch": 49.52203205650638, "grad_norm": 9.630657196044922, "learning_rate": 9.653660675142482e-08, "loss": 11.7212, "step": 113950 }, { "epoch": 49.54376528117359, "grad_norm": 4.612308979034424, "learning_rate": 9.21525646646208e-08, "loss": 11.7247, "step": 114000 }, { "epoch": 49.56549850584081, "grad_norm": 2.9876091480255127, "learning_rate": 8.776852257781676e-08, "loss": 11.7146, "step": 114050 }, { "epoch": 49.587231730508016, "grad_norm": 4.555498123168945, "learning_rate": 8.338448049101273e-08, "loss": 11.7073, "step": 114100 }, { "epoch": 49.608964955175225, "grad_norm": 5.015764236450195, "learning_rate": 7.900043840420868e-08, "loss": 11.7119, "step": 114150 }, { "epoch": 49.63069817984243, "grad_norm": 5.208141326904297, "learning_rate": 7.461639631740465e-08, "loss": 11.7178, "step": 114200 }, { "epoch": 49.65243140450964, "grad_norm": 5.420982837677002, "learning_rate": 7.023235423060062e-08, "loss": 11.7187, "step": 114250 }, { "epoch": 49.67416462917686, "grad_norm": 6.694780349731445, "learning_rate": 6.584831214379659e-08, "loss": 11.7102, "step": 114300 }, { "epoch": 49.695897853844066, "grad_norm": 4.203577995300293, "learning_rate": 6.146427005699255e-08, "loss": 11.7142, "step": 114350 }, { "epoch": 49.717631078511275, "grad_norm": 3.1716277599334717, "learning_rate": 5.7080227970188515e-08, "loss": 11.7139, "step": 114400 }, { "epoch": 49.73936430317848, "grad_norm": 3.897326946258545, "learning_rate": 5.269618588338448e-08, "loss": 11.712, "step": 114450 }, { "epoch": 49.76109752784569, "grad_norm": 13.347712516784668, "learning_rate": 4.8312143796580454e-08, "loss": 11.7155, "step": 114500 }, { "epoch": 49.78283075251291, "grad_norm": 6.420513153076172, "learning_rate": 4.392810170977642e-08, "loss": 11.7286, "step": 114550 }, { "epoch": 49.804563977180116, "grad_norm": 5.6966447830200195, "learning_rate": 3.954405962297238e-08, "loss": 11.7078, "step": 114600 }, { "epoch": 49.826297201847325, "grad_norm": 5.481497287750244, "learning_rate": 3.516001753616835e-08, "loss": 11.7061, "step": 114650 }, { "epoch": 49.84803042651453, "grad_norm": 6.5728840827941895, "learning_rate": 3.077597544936432e-08, "loss": 11.7121, "step": 114700 }, { "epoch": 49.86976365118174, "grad_norm": 7.013606071472168, "learning_rate": 2.639193336256028e-08, "loss": 11.7165, "step": 114750 }, { "epoch": 49.89149687584895, "grad_norm": 8.17546272277832, "learning_rate": 2.200789127575625e-08, "loss": 11.7226, "step": 114800 }, { "epoch": 49.91323010051617, "grad_norm": 11.53906536102295, "learning_rate": 1.7623849188952217e-08, "loss": 11.7159, "step": 114850 }, { "epoch": 49.934963325183375, "grad_norm": 3.259451389312744, "learning_rate": 1.3239807102148183e-08, "loss": 11.7256, "step": 114900 }, { "epoch": 49.956696549850584, "grad_norm": 9.84170913696289, "learning_rate": 8.855765015344147e-09, "loss": 11.7094, "step": 114950 }, { "epoch": 49.97842977451779, "grad_norm": 3.992011070251465, "learning_rate": 4.471722928540115e-09, "loss": 11.7083, "step": 115000 }, { "epoch": 50.0, "grad_norm": 7.805567264556885, "learning_rate": 8.768084173608068e-11, "loss": 11.6432, "step": 115050 } ], "logging_steps": 50, "max_steps": 115050, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }