diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,16251 @@ +{ + "best_global_step": 110000, + "best_metric": 0.07368261883895177, + "best_model_checkpoint": "./TrOCR_SigLIP2_Aranizer_41K_AND_COMBINED/stage2/checkpoint-110000", + "epoch": 50.0, + "eval_steps": 10000, + "global_step": 115050, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.021733224667209998, + "grad_norm": 190.25732421875, + "learning_rate": 4.900000000000001e-07, + "loss": 17.665, + "step": 50 + }, + { + "epoch": 0.043466449334419996, + "grad_norm": 101.98078918457031, + "learning_rate": 9.9e-07, + "loss": 17.0, + "step": 100 + }, + { + "epoch": 0.06519967400162999, + "grad_norm": 61.98678970336914, + "learning_rate": 1.4900000000000001e-06, + "loss": 16.6227, + "step": 150 + }, + { + "epoch": 0.08693289866883999, + "grad_norm": 77.35042572021484, + "learning_rate": 1.9900000000000004e-06, + "loss": 16.5637, + "step": 200 + }, + { + "epoch": 0.10866612333604998, + "grad_norm": 91.11700439453125, + "learning_rate": 2.4900000000000003e-06, + "loss": 16.7798, + "step": 250 + }, + { + "epoch": 0.13039934800325997, + "grad_norm": 95.30789184570312, + "learning_rate": 2.99e-06, + "loss": 16.4945, + "step": 300 + }, + { + "epoch": 0.15213257267046998, + "grad_norm": 81.99232482910156, + "learning_rate": 3.49e-06, + "loss": 16.2094, + "step": 350 + }, + { + "epoch": 0.17386579733767998, + "grad_norm": 65.64993286132812, + "learning_rate": 3.990000000000001e-06, + "loss": 16.4501, + "step": 400 + }, + { + "epoch": 0.19559902200489, + "grad_norm": 98.58848571777344, + "learning_rate": 4.49e-06, + "loss": 16.386, + "step": 450 + }, + { + "epoch": 0.21733224667209997, + "grad_norm": 85.1087646484375, + "learning_rate": 4.9900000000000005e-06, + "loss": 16.453, + "step": 500 + }, + { + "epoch": 0.23906547133930997, + "grad_norm": 68.43016052246094, + "learning_rate": 5.490000000000001e-06, + "loss": 16.245, + "step": 550 + }, + { + "epoch": 0.26079869600651995, + "grad_norm": 71.09822082519531, + "learning_rate": 5.99e-06, + "loss": 16.0378, + "step": 600 + }, + { + "epoch": 0.28253192067372995, + "grad_norm": 71.08029174804688, + "learning_rate": 6.4900000000000005e-06, + "loss": 16.7085, + "step": 650 + }, + { + "epoch": 0.30426514534093996, + "grad_norm": 66.14205169677734, + "learning_rate": 6.99e-06, + "loss": 16.3454, + "step": 700 + }, + { + "epoch": 0.32599837000814996, + "grad_norm": 93.4423599243164, + "learning_rate": 7.49e-06, + "loss": 16.4424, + "step": 750 + }, + { + "epoch": 0.34773159467535997, + "grad_norm": 80.68280029296875, + "learning_rate": 7.990000000000001e-06, + "loss": 16.5934, + "step": 800 + }, + { + "epoch": 0.36946481934257, + "grad_norm": 50.99578094482422, + "learning_rate": 8.49e-06, + "loss": 16.4154, + "step": 850 + }, + { + "epoch": 0.39119804400978, + "grad_norm": 73.9505386352539, + "learning_rate": 8.99e-06, + "loss": 16.4743, + "step": 900 + }, + { + "epoch": 0.4129312686769899, + "grad_norm": 53.72673034667969, + "learning_rate": 9.49e-06, + "loss": 16.5231, + "step": 950 + }, + { + "epoch": 0.43466449334419993, + "grad_norm": 80.76425170898438, + "learning_rate": 9.990000000000001e-06, + "loss": 16.3367, + "step": 1000 + }, + { + "epoch": 0.45639771801140994, + "grad_norm": 88.06133270263672, + "learning_rate": 9.995703638754933e-06, + "loss": 16.7444, + "step": 1050 + }, + { + "epoch": 0.47813094267861994, + "grad_norm": 49.88163757324219, + "learning_rate": 9.991319596668128e-06, + "loss": 16.4815, + "step": 1100 + }, + { + "epoch": 0.49986416734582995, + "grad_norm": 66.88523864746094, + "learning_rate": 9.986935554581326e-06, + "loss": 16.467, + "step": 1150 + }, + { + "epoch": 0.5215973920130399, + "grad_norm": 65.38529968261719, + "learning_rate": 9.982551512494521e-06, + "loss": 16.4785, + "step": 1200 + }, + { + "epoch": 0.54333061668025, + "grad_norm": 54.72123336791992, + "learning_rate": 9.978167470407716e-06, + "loss": 16.5669, + "step": 1250 + }, + { + "epoch": 0.5650638413474599, + "grad_norm": 52.83816909790039, + "learning_rate": 9.973783428320912e-06, + "loss": 16.3767, + "step": 1300 + }, + { + "epoch": 0.58679706601467, + "grad_norm": 72.39130401611328, + "learning_rate": 9.969399386234109e-06, + "loss": 16.2847, + "step": 1350 + }, + { + "epoch": 0.6085302906818799, + "grad_norm": 52.192020416259766, + "learning_rate": 9.965015344147304e-06, + "loss": 16.3576, + "step": 1400 + }, + { + "epoch": 0.63026351534909, + "grad_norm": 50.59873962402344, + "learning_rate": 9.960631302060501e-06, + "loss": 15.9625, + "step": 1450 + }, + { + "epoch": 0.6519967400162999, + "grad_norm": 253.75856018066406, + "learning_rate": 9.956247259973697e-06, + "loss": 16.3934, + "step": 1500 + }, + { + "epoch": 0.6737299646835099, + "grad_norm": 43.8497314453125, + "learning_rate": 9.951863217886892e-06, + "loss": 16.686, + "step": 1550 + }, + { + "epoch": 0.6954631893507199, + "grad_norm": 48.21563720703125, + "learning_rate": 9.947479175800089e-06, + "loss": 16.4491, + "step": 1600 + }, + { + "epoch": 0.7171964140179299, + "grad_norm": 59.72194290161133, + "learning_rate": 9.943095133713284e-06, + "loss": 16.1068, + "step": 1650 + }, + { + "epoch": 0.73892963868514, + "grad_norm": 117.0466079711914, + "learning_rate": 9.93871109162648e-06, + "loss": 16.3469, + "step": 1700 + }, + { + "epoch": 0.7606628633523499, + "grad_norm": 48.334346771240234, + "learning_rate": 9.934327049539675e-06, + "loss": 16.3334, + "step": 1750 + }, + { + "epoch": 0.78239608801956, + "grad_norm": 54.45792007446289, + "learning_rate": 9.929943007452872e-06, + "loss": 16.2452, + "step": 1800 + }, + { + "epoch": 0.8041293126867699, + "grad_norm": 39.005428314208984, + "learning_rate": 9.925558965366068e-06, + "loss": 16.0999, + "step": 1850 + }, + { + "epoch": 0.8258625373539799, + "grad_norm": 67.72175598144531, + "learning_rate": 9.921174923279265e-06, + "loss": 16.2755, + "step": 1900 + }, + { + "epoch": 0.8475957620211899, + "grad_norm": 72.75259399414062, + "learning_rate": 9.91679088119246e-06, + "loss": 16.5987, + "step": 1950 + }, + { + "epoch": 0.8693289866883999, + "grad_norm": 58.86764144897461, + "learning_rate": 9.912406839105656e-06, + "loss": 16.4092, + "step": 2000 + }, + { + "epoch": 0.8910622113556099, + "grad_norm": 73.46177673339844, + "learning_rate": 9.908022797018853e-06, + "loss": 16.637, + "step": 2050 + }, + { + "epoch": 0.9127954360228199, + "grad_norm": 62.41428756713867, + "learning_rate": 9.903638754932048e-06, + "loss": 16.528, + "step": 2100 + }, + { + "epoch": 0.9345286606900299, + "grad_norm": 65.01278686523438, + "learning_rate": 9.899254712845245e-06, + "loss": 16.5003, + "step": 2150 + }, + { + "epoch": 0.9562618853572399, + "grad_norm": 57.43757247924805, + "learning_rate": 9.894870670758439e-06, + "loss": 16.3343, + "step": 2200 + }, + { + "epoch": 0.9779951100244498, + "grad_norm": 55.26877975463867, + "learning_rate": 9.890486628671636e-06, + "loss": 16.4804, + "step": 2250 + }, + { + "epoch": 0.9997283346916599, + "grad_norm": 59.66270446777344, + "learning_rate": 9.886102586584833e-06, + "loss": 16.1125, + "step": 2300 + }, + { + "epoch": 1.0212985601738658, + "grad_norm": 65.677490234375, + "learning_rate": 9.881718544498028e-06, + "loss": 15.5081, + "step": 2350 + }, + { + "epoch": 1.0430317848410757, + "grad_norm": 46.62354278564453, + "learning_rate": 9.877334502411224e-06, + "loss": 15.5581, + "step": 2400 + }, + { + "epoch": 1.0647650095082857, + "grad_norm": 47.7025032043457, + "learning_rate": 9.87295046032442e-06, + "loss": 15.2401, + "step": 2450 + }, + { + "epoch": 1.0864982341754958, + "grad_norm": 57.22602081298828, + "learning_rate": 9.868566418237616e-06, + "loss": 15.5116, + "step": 2500 + }, + { + "epoch": 1.1082314588427058, + "grad_norm": 51.149818420410156, + "learning_rate": 9.864182376150812e-06, + "loss": 15.5938, + "step": 2550 + }, + { + "epoch": 1.1299646835099157, + "grad_norm": 80.70169067382812, + "learning_rate": 9.859798334064009e-06, + "loss": 15.5891, + "step": 2600 + }, + { + "epoch": 1.1516979081771257, + "grad_norm": 59.62293243408203, + "learning_rate": 9.855414291977204e-06, + "loss": 15.6038, + "step": 2650 + }, + { + "epoch": 1.1734311328443359, + "grad_norm": 109.22635650634766, + "learning_rate": 9.8510302498904e-06, + "loss": 15.4956, + "step": 2700 + }, + { + "epoch": 1.1951643575115458, + "grad_norm": 54.90534591674805, + "learning_rate": 9.846646207803597e-06, + "loss": 15.5951, + "step": 2750 + }, + { + "epoch": 1.2168975821787558, + "grad_norm": 130.99798583984375, + "learning_rate": 9.842262165716792e-06, + "loss": 15.5201, + "step": 2800 + }, + { + "epoch": 1.2386308068459657, + "grad_norm": 45.625389099121094, + "learning_rate": 9.837878123629987e-06, + "loss": 15.4576, + "step": 2850 + }, + { + "epoch": 1.2603640315131757, + "grad_norm": 36.033836364746094, + "learning_rate": 9.833494081543183e-06, + "loss": 15.4927, + "step": 2900 + }, + { + "epoch": 1.2820972561803858, + "grad_norm": 52.81075668334961, + "learning_rate": 9.82911003945638e-06, + "loss": 15.4669, + "step": 2950 + }, + { + "epoch": 1.3038304808475958, + "grad_norm": 43.44648361206055, + "learning_rate": 9.824725997369575e-06, + "loss": 15.5326, + "step": 3000 + }, + { + "epoch": 1.3255637055148057, + "grad_norm": 40.79172134399414, + "learning_rate": 9.820341955282772e-06, + "loss": 15.6252, + "step": 3050 + }, + { + "epoch": 1.3472969301820157, + "grad_norm": 54.189910888671875, + "learning_rate": 9.815957913195968e-06, + "loss": 15.5897, + "step": 3100 + }, + { + "epoch": 1.3690301548492259, + "grad_norm": 55.73503112792969, + "learning_rate": 9.811573871109163e-06, + "loss": 15.6081, + "step": 3150 + }, + { + "epoch": 1.3907633795164358, + "grad_norm": 67.98750305175781, + "learning_rate": 9.80718982902236e-06, + "loss": 15.5154, + "step": 3200 + }, + { + "epoch": 1.4124966041836458, + "grad_norm": 61.99040222167969, + "learning_rate": 9.802805786935556e-06, + "loss": 15.464, + "step": 3250 + }, + { + "epoch": 1.4342298288508557, + "grad_norm": 42.3632926940918, + "learning_rate": 9.798421744848751e-06, + "loss": 15.3425, + "step": 3300 + }, + { + "epoch": 1.4559630535180657, + "grad_norm": 46.5098991394043, + "learning_rate": 9.794037702761946e-06, + "loss": 15.5891, + "step": 3350 + }, + { + "epoch": 1.4776962781852756, + "grad_norm": 59.43826675415039, + "learning_rate": 9.789653660675143e-06, + "loss": 15.6337, + "step": 3400 + }, + { + "epoch": 1.4994295028524858, + "grad_norm": 76.57585906982422, + "learning_rate": 9.785269618588339e-06, + "loss": 15.4892, + "step": 3450 + }, + { + "epoch": 1.5211627275196957, + "grad_norm": 65.46538543701172, + "learning_rate": 9.780885576501536e-06, + "loss": 15.6873, + "step": 3500 + }, + { + "epoch": 1.542895952186906, + "grad_norm": 65.29698181152344, + "learning_rate": 9.776501534414731e-06, + "loss": 15.5051, + "step": 3550 + }, + { + "epoch": 1.5646291768541158, + "grad_norm": 46.745784759521484, + "learning_rate": 9.772117492327927e-06, + "loss": 15.6602, + "step": 3600 + }, + { + "epoch": 1.5863624015213258, + "grad_norm": 44.605228424072266, + "learning_rate": 9.767733450241124e-06, + "loss": 15.5229, + "step": 3650 + }, + { + "epoch": 1.6080956261885357, + "grad_norm": 47.4207649230957, + "learning_rate": 9.763349408154319e-06, + "loss": 15.5634, + "step": 3700 + }, + { + "epoch": 1.6298288508557457, + "grad_norm": 43.18611145019531, + "learning_rate": 9.758965366067516e-06, + "loss": 15.5222, + "step": 3750 + }, + { + "epoch": 1.6515620755229556, + "grad_norm": 39.6898078918457, + "learning_rate": 9.754581323980712e-06, + "loss": 15.5259, + "step": 3800 + }, + { + "epoch": 1.6732953001901656, + "grad_norm": 71.0409164428711, + "learning_rate": 9.750197281893907e-06, + "loss": 15.5971, + "step": 3850 + }, + { + "epoch": 1.6950285248573758, + "grad_norm": 53.462467193603516, + "learning_rate": 9.745813239807102e-06, + "loss": 15.4515, + "step": 3900 + }, + { + "epoch": 1.7167617495245857, + "grad_norm": 40.28457260131836, + "learning_rate": 9.7414291977203e-06, + "loss": 15.4006, + "step": 3950 + }, + { + "epoch": 1.7384949741917957, + "grad_norm": 50.27900695800781, + "learning_rate": 9.737045155633495e-06, + "loss": 15.3051, + "step": 4000 + }, + { + "epoch": 1.7602281988590058, + "grad_norm": 44.33418655395508, + "learning_rate": 9.73266111354669e-06, + "loss": 15.7606, + "step": 4050 + }, + { + "epoch": 1.7819614235262158, + "grad_norm": 65.12041473388672, + "learning_rate": 9.728277071459887e-06, + "loss": 15.4747, + "step": 4100 + }, + { + "epoch": 1.8036946481934257, + "grad_norm": 50.64781951904297, + "learning_rate": 9.723893029373083e-06, + "loss": 15.5251, + "step": 4150 + }, + { + "epoch": 1.8254278728606357, + "grad_norm": 37.71573257446289, + "learning_rate": 9.71950898728628e-06, + "loss": 15.4422, + "step": 4200 + }, + { + "epoch": 1.8471610975278456, + "grad_norm": 53.08781433105469, + "learning_rate": 9.715124945199475e-06, + "loss": 15.6055, + "step": 4250 + }, + { + "epoch": 1.8688943221950556, + "grad_norm": 40.412384033203125, + "learning_rate": 9.71074090311267e-06, + "loss": 15.3275, + "step": 4300 + }, + { + "epoch": 1.8906275468622655, + "grad_norm": 81.10236358642578, + "learning_rate": 9.706356861025866e-06, + "loss": 15.391, + "step": 4350 + }, + { + "epoch": 1.9123607715294757, + "grad_norm": 73.39491271972656, + "learning_rate": 9.701972818939063e-06, + "loss": 15.7053, + "step": 4400 + }, + { + "epoch": 1.9340939961966856, + "grad_norm": 42.71440124511719, + "learning_rate": 9.697588776852258e-06, + "loss": 15.484, + "step": 4450 + }, + { + "epoch": 1.9558272208638958, + "grad_norm": 73.08609008789062, + "learning_rate": 9.693204734765454e-06, + "loss": 15.5822, + "step": 4500 + }, + { + "epoch": 1.9775604455311058, + "grad_norm": 66.59615325927734, + "learning_rate": 9.68882069267865e-06, + "loss": 15.6825, + "step": 4550 + }, + { + "epoch": 1.9992936701983157, + "grad_norm": 63.10333251953125, + "learning_rate": 9.684436650591846e-06, + "loss": 15.5203, + "step": 4600 + }, + { + "epoch": 2.0208638956805216, + "grad_norm": 57.844970703125, + "learning_rate": 9.680052608505043e-06, + "loss": 14.5109, + "step": 4650 + }, + { + "epoch": 2.0425971203477316, + "grad_norm": 36.37318420410156, + "learning_rate": 9.675668566418239e-06, + "loss": 14.7488, + "step": 4700 + }, + { + "epoch": 2.0643303450149415, + "grad_norm": 72.80779266357422, + "learning_rate": 9.671284524331434e-06, + "loss": 14.9111, + "step": 4750 + }, + { + "epoch": 2.0860635696821515, + "grad_norm": 71.37971496582031, + "learning_rate": 9.66690048224463e-06, + "loss": 14.6878, + "step": 4800 + }, + { + "epoch": 2.1077967943493614, + "grad_norm": 42.20883560180664, + "learning_rate": 9.662516440157827e-06, + "loss": 14.7469, + "step": 4850 + }, + { + "epoch": 2.1295300190165714, + "grad_norm": 53.63486862182617, + "learning_rate": 9.658132398071022e-06, + "loss": 14.6422, + "step": 4900 + }, + { + "epoch": 2.1512632436837817, + "grad_norm": 54.38608932495117, + "learning_rate": 9.653748355984219e-06, + "loss": 14.6238, + "step": 4950 + }, + { + "epoch": 2.1729964683509917, + "grad_norm": 44.58712387084961, + "learning_rate": 9.649364313897414e-06, + "loss": 14.6619, + "step": 5000 + }, + { + "epoch": 2.1947296930182016, + "grad_norm": 46.281524658203125, + "learning_rate": 9.64498027181061e-06, + "loss": 14.8443, + "step": 5050 + }, + { + "epoch": 2.2164629176854116, + "grad_norm": 38.51953887939453, + "learning_rate": 9.640596229723807e-06, + "loss": 14.9273, + "step": 5100 + }, + { + "epoch": 2.2381961423526215, + "grad_norm": 53.27817153930664, + "learning_rate": 9.636212187637002e-06, + "loss": 14.6411, + "step": 5150 + }, + { + "epoch": 2.2599293670198315, + "grad_norm": 43.47584533691406, + "learning_rate": 9.631828145550198e-06, + "loss": 14.7459, + "step": 5200 + }, + { + "epoch": 2.2816625916870414, + "grad_norm": 37.26194381713867, + "learning_rate": 9.627444103463393e-06, + "loss": 14.9103, + "step": 5250 + }, + { + "epoch": 2.3033958163542514, + "grad_norm": 38.729373931884766, + "learning_rate": 9.62306006137659e-06, + "loss": 14.5397, + "step": 5300 + }, + { + "epoch": 2.3251290410214613, + "grad_norm": 33.352901458740234, + "learning_rate": 9.618676019289785e-06, + "loss": 14.9044, + "step": 5350 + }, + { + "epoch": 2.3468622656886717, + "grad_norm": 47.63081359863281, + "learning_rate": 9.614291977202983e-06, + "loss": 14.5471, + "step": 5400 + }, + { + "epoch": 2.3685954903558817, + "grad_norm": 125.63179016113281, + "learning_rate": 9.609907935116178e-06, + "loss": 14.8267, + "step": 5450 + }, + { + "epoch": 2.3903287150230916, + "grad_norm": 49.1522216796875, + "learning_rate": 9.605523893029373e-06, + "loss": 14.7433, + "step": 5500 + }, + { + "epoch": 2.4120619396903016, + "grad_norm": 43.327091217041016, + "learning_rate": 9.60113985094257e-06, + "loss": 14.908, + "step": 5550 + }, + { + "epoch": 2.4337951643575115, + "grad_norm": 30.76859474182129, + "learning_rate": 9.596755808855766e-06, + "loss": 14.7341, + "step": 5600 + }, + { + "epoch": 2.4555283890247215, + "grad_norm": 42.72526550292969, + "learning_rate": 9.592371766768961e-06, + "loss": 15.0516, + "step": 5650 + }, + { + "epoch": 2.4772616136919314, + "grad_norm": 58.9193000793457, + "learning_rate": 9.587987724682157e-06, + "loss": 14.8745, + "step": 5700 + }, + { + "epoch": 2.4989948383591414, + "grad_norm": 53.90520095825195, + "learning_rate": 9.583603682595354e-06, + "loss": 14.8219, + "step": 5750 + }, + { + "epoch": 2.5207280630263513, + "grad_norm": 61.370452880859375, + "learning_rate": 9.579219640508549e-06, + "loss": 14.6925, + "step": 5800 + }, + { + "epoch": 2.5424612876935617, + "grad_norm": 47.58317184448242, + "learning_rate": 9.574835598421746e-06, + "loss": 14.8654, + "step": 5850 + }, + { + "epoch": 2.5641945123607717, + "grad_norm": 51.90703582763672, + "learning_rate": 9.570451556334942e-06, + "loss": 14.8152, + "step": 5900 + }, + { + "epoch": 2.5859277370279816, + "grad_norm": 42.62101364135742, + "learning_rate": 9.566067514248137e-06, + "loss": 15.0139, + "step": 5950 + }, + { + "epoch": 2.6076609616951916, + "grad_norm": 58.69119644165039, + "learning_rate": 9.561683472161334e-06, + "loss": 14.8962, + "step": 6000 + }, + { + "epoch": 2.6293941863624015, + "grad_norm": 58.02621841430664, + "learning_rate": 9.55729943007453e-06, + "loss": 15.0422, + "step": 6050 + }, + { + "epoch": 2.6511274110296115, + "grad_norm": 45.985225677490234, + "learning_rate": 9.552915387987726e-06, + "loss": 14.8361, + "step": 6100 + }, + { + "epoch": 2.6728606356968214, + "grad_norm": 58.74437713623047, + "learning_rate": 9.548531345900922e-06, + "loss": 14.9231, + "step": 6150 + }, + { + "epoch": 2.6945938603640314, + "grad_norm": 54.490962982177734, + "learning_rate": 9.544147303814117e-06, + "loss": 14.7987, + "step": 6200 + }, + { + "epoch": 2.7163270850312413, + "grad_norm": 44.067710876464844, + "learning_rate": 9.539763261727313e-06, + "loss": 14.8596, + "step": 6250 + }, + { + "epoch": 2.7380603096984517, + "grad_norm": 56.0435676574707, + "learning_rate": 9.53537921964051e-06, + "loss": 14.7602, + "step": 6300 + }, + { + "epoch": 2.7597935343656617, + "grad_norm": 68.08670806884766, + "learning_rate": 9.530995177553705e-06, + "loss": 14.78, + "step": 6350 + }, + { + "epoch": 2.7815267590328716, + "grad_norm": 55.21569061279297, + "learning_rate": 9.5266111354669e-06, + "loss": 14.908, + "step": 6400 + }, + { + "epoch": 2.8032599837000816, + "grad_norm": 48.79258346557617, + "learning_rate": 9.522227093380098e-06, + "loss": 14.8478, + "step": 6450 + }, + { + "epoch": 2.8249932083672915, + "grad_norm": 61.38957214355469, + "learning_rate": 9.517843051293293e-06, + "loss": 14.9716, + "step": 6500 + }, + { + "epoch": 2.8467264330345015, + "grad_norm": 53.00950622558594, + "learning_rate": 9.51345900920649e-06, + "loss": 14.7508, + "step": 6550 + }, + { + "epoch": 2.8684596577017114, + "grad_norm": 43.13687515258789, + "learning_rate": 9.509074967119685e-06, + "loss": 14.8466, + "step": 6600 + }, + { + "epoch": 2.8901928823689214, + "grad_norm": 54.39565658569336, + "learning_rate": 9.50469092503288e-06, + "loss": 14.8025, + "step": 6650 + }, + { + "epoch": 2.9119261070361313, + "grad_norm": 47.21046447753906, + "learning_rate": 9.500306882946078e-06, + "loss": 14.8096, + "step": 6700 + }, + { + "epoch": 2.9336593317033417, + "grad_norm": 51.13401412963867, + "learning_rate": 9.495922840859273e-06, + "loss": 14.9482, + "step": 6750 + }, + { + "epoch": 2.955392556370551, + "grad_norm": 47.619503021240234, + "learning_rate": 9.491538798772469e-06, + "loss": 14.9805, + "step": 6800 + }, + { + "epoch": 2.9771257810377616, + "grad_norm": 40.876808166503906, + "learning_rate": 9.487154756685664e-06, + "loss": 14.9048, + "step": 6850 + }, + { + "epoch": 2.9988590057049715, + "grad_norm": 52.037567138671875, + "learning_rate": 9.482770714598861e-06, + "loss": 14.9026, + "step": 6900 + }, + { + "epoch": 3.0204292311871774, + "grad_norm": 41.4274787902832, + "learning_rate": 9.478386672512057e-06, + "loss": 14.1481, + "step": 6950 + }, + { + "epoch": 3.0421624558543874, + "grad_norm": 51.49604797363281, + "learning_rate": 9.474002630425254e-06, + "loss": 14.1383, + "step": 7000 + }, + { + "epoch": 3.0638956805215973, + "grad_norm": 53.052005767822266, + "learning_rate": 9.469618588338449e-06, + "loss": 14.2031, + "step": 7050 + }, + { + "epoch": 3.0856289051888073, + "grad_norm": 29.748735427856445, + "learning_rate": 9.465234546251644e-06, + "loss": 14.2273, + "step": 7100 + }, + { + "epoch": 3.1073621298560172, + "grad_norm": 41.33003616333008, + "learning_rate": 9.460850504164841e-06, + "loss": 14.2338, + "step": 7150 + }, + { + "epoch": 3.129095354523227, + "grad_norm": 39.78664779663086, + "learning_rate": 9.456466462078037e-06, + "loss": 14.1309, + "step": 7200 + }, + { + "epoch": 3.1508285791904376, + "grad_norm": 35.99256896972656, + "learning_rate": 9.452082419991234e-06, + "loss": 14.2261, + "step": 7250 + }, + { + "epoch": 3.1725618038576475, + "grad_norm": 40.001197814941406, + "learning_rate": 9.44769837790443e-06, + "loss": 14.1562, + "step": 7300 + }, + { + "epoch": 3.1942950285248575, + "grad_norm": 48.2380256652832, + "learning_rate": 9.443314335817625e-06, + "loss": 14.0423, + "step": 7350 + }, + { + "epoch": 3.2160282531920674, + "grad_norm": 44.41048812866211, + "learning_rate": 9.43893029373082e-06, + "loss": 14.0881, + "step": 7400 + }, + { + "epoch": 3.2377614778592774, + "grad_norm": 29.655723571777344, + "learning_rate": 9.434546251644017e-06, + "loss": 14.2163, + "step": 7450 + }, + { + "epoch": 3.2594947025264873, + "grad_norm": 40.9448356628418, + "learning_rate": 9.430162209557213e-06, + "loss": 14.2057, + "step": 7500 + }, + { + "epoch": 3.2812279271936973, + "grad_norm": 50.84467315673828, + "learning_rate": 9.425778167470408e-06, + "loss": 14.2386, + "step": 7550 + }, + { + "epoch": 3.302961151860907, + "grad_norm": 46.98764419555664, + "learning_rate": 9.421394125383605e-06, + "loss": 14.2241, + "step": 7600 + }, + { + "epoch": 3.324694376528117, + "grad_norm": 46.322166442871094, + "learning_rate": 9.4170100832968e-06, + "loss": 14.1992, + "step": 7650 + }, + { + "epoch": 3.3464276011953276, + "grad_norm": 45.123985290527344, + "learning_rate": 9.412626041209997e-06, + "loss": 14.2106, + "step": 7700 + }, + { + "epoch": 3.3681608258625375, + "grad_norm": 50.508583068847656, + "learning_rate": 9.408241999123193e-06, + "loss": 14.0418, + "step": 7750 + }, + { + "epoch": 3.3898940505297475, + "grad_norm": 42.03702926635742, + "learning_rate": 9.403857957036388e-06, + "loss": 14.298, + "step": 7800 + }, + { + "epoch": 3.4116272751969574, + "grad_norm": 49.16743469238281, + "learning_rate": 9.399473914949584e-06, + "loss": 14.2654, + "step": 7850 + }, + { + "epoch": 3.4333604998641674, + "grad_norm": 47.92793273925781, + "learning_rate": 9.39508987286278e-06, + "loss": 14.3558, + "step": 7900 + }, + { + "epoch": 3.4550937245313773, + "grad_norm": 37.79042434692383, + "learning_rate": 9.390705830775976e-06, + "loss": 14.2301, + "step": 7950 + }, + { + "epoch": 3.4768269491985873, + "grad_norm": 41.851051330566406, + "learning_rate": 9.386321788689171e-06, + "loss": 14.2025, + "step": 8000 + }, + { + "epoch": 3.498560173865797, + "grad_norm": 58.03968811035156, + "learning_rate": 9.381937746602369e-06, + "loss": 14.3552, + "step": 8050 + }, + { + "epoch": 3.520293398533007, + "grad_norm": 34.9418830871582, + "learning_rate": 9.377553704515564e-06, + "loss": 14.3336, + "step": 8100 + }, + { + "epoch": 3.5420266232002175, + "grad_norm": 41.05316162109375, + "learning_rate": 9.373169662428761e-06, + "loss": 14.2819, + "step": 8150 + }, + { + "epoch": 3.563759847867427, + "grad_norm": 45.65940856933594, + "learning_rate": 9.368785620341956e-06, + "loss": 14.2237, + "step": 8200 + }, + { + "epoch": 3.5854930725346374, + "grad_norm": 37.271751403808594, + "learning_rate": 9.364401578255152e-06, + "loss": 14.0594, + "step": 8250 + }, + { + "epoch": 3.6072262972018474, + "grad_norm": 41.95325469970703, + "learning_rate": 9.360017536168347e-06, + "loss": 14.1691, + "step": 8300 + }, + { + "epoch": 3.6289595218690573, + "grad_norm": 91.28557586669922, + "learning_rate": 9.355633494081544e-06, + "loss": 14.1016, + "step": 8350 + }, + { + "epoch": 3.6506927465362673, + "grad_norm": 56.508670806884766, + "learning_rate": 9.35124945199474e-06, + "loss": 14.2686, + "step": 8400 + }, + { + "epoch": 3.6724259712034772, + "grad_norm": 69.8916015625, + "learning_rate": 9.346865409907935e-06, + "loss": 14.3426, + "step": 8450 + }, + { + "epoch": 3.694159195870687, + "grad_norm": 51.414215087890625, + "learning_rate": 9.342481367821132e-06, + "loss": 14.3489, + "step": 8500 + }, + { + "epoch": 3.715892420537897, + "grad_norm": 51.891639709472656, + "learning_rate": 9.338097325734328e-06, + "loss": 14.2614, + "step": 8550 + }, + { + "epoch": 3.7376256452051075, + "grad_norm": 55.276275634765625, + "learning_rate": 9.333713283647525e-06, + "loss": 14.1835, + "step": 8600 + }, + { + "epoch": 3.759358869872317, + "grad_norm": 38.33846664428711, + "learning_rate": 9.32932924156072e-06, + "loss": 14.5973, + "step": 8650 + }, + { + "epoch": 3.7810920945395274, + "grad_norm": 46.052513122558594, + "learning_rate": 9.324945199473915e-06, + "loss": 14.3903, + "step": 8700 + }, + { + "epoch": 3.8028253192067374, + "grad_norm": 35.333560943603516, + "learning_rate": 9.32056115738711e-06, + "loss": 14.3516, + "step": 8750 + }, + { + "epoch": 3.8245585438739473, + "grad_norm": 52.49406051635742, + "learning_rate": 9.316177115300308e-06, + "loss": 14.371, + "step": 8800 + }, + { + "epoch": 3.8462917685411573, + "grad_norm": 48.86211013793945, + "learning_rate": 9.311793073213503e-06, + "loss": 14.2632, + "step": 8850 + }, + { + "epoch": 3.8680249932083672, + "grad_norm": 48.95231628417969, + "learning_rate": 9.3074090311267e-06, + "loss": 14.1847, + "step": 8900 + }, + { + "epoch": 3.889758217875577, + "grad_norm": 37.594696044921875, + "learning_rate": 9.303024989039896e-06, + "loss": 14.3286, + "step": 8950 + }, + { + "epoch": 3.911491442542787, + "grad_norm": 47.66452407836914, + "learning_rate": 9.298640946953091e-06, + "loss": 14.3358, + "step": 9000 + }, + { + "epoch": 3.9332246672099975, + "grad_norm": 40.61109161376953, + "learning_rate": 9.294256904866288e-06, + "loss": 14.4558, + "step": 9050 + }, + { + "epoch": 3.954957891877207, + "grad_norm": 34.296836853027344, + "learning_rate": 9.289872862779484e-06, + "loss": 14.3049, + "step": 9100 + }, + { + "epoch": 3.9766911165444174, + "grad_norm": 43.91560363769531, + "learning_rate": 9.285488820692679e-06, + "loss": 14.3231, + "step": 9150 + }, + { + "epoch": 3.9984243412116274, + "grad_norm": 37.4168586730957, + "learning_rate": 9.281104778605874e-06, + "loss": 14.2418, + "step": 9200 + }, + { + "epoch": 4.019994566693833, + "grad_norm": 34.46104049682617, + "learning_rate": 9.276720736519071e-06, + "loss": 13.6035, + "step": 9250 + }, + { + "epoch": 4.041727791361043, + "grad_norm": 38.560298919677734, + "learning_rate": 9.272336694432267e-06, + "loss": 13.6357, + "step": 9300 + }, + { + "epoch": 4.063461016028254, + "grad_norm": 35.547752380371094, + "learning_rate": 9.267952652345464e-06, + "loss": 13.798, + "step": 9350 + }, + { + "epoch": 4.085194240695463, + "grad_norm": 36.332298278808594, + "learning_rate": 9.26356861025866e-06, + "loss": 13.6992, + "step": 9400 + }, + { + "epoch": 4.1069274653626735, + "grad_norm": 40.322715759277344, + "learning_rate": 9.259184568171855e-06, + "loss": 13.7247, + "step": 9450 + }, + { + "epoch": 4.128660690029883, + "grad_norm": 27.05885887145996, + "learning_rate": 9.254800526085052e-06, + "loss": 13.801, + "step": 9500 + }, + { + "epoch": 4.150393914697093, + "grad_norm": 38.66703414916992, + "learning_rate": 9.250416483998247e-06, + "loss": 13.7814, + "step": 9550 + }, + { + "epoch": 4.172127139364303, + "grad_norm": 37.8776969909668, + "learning_rate": 9.246032441911443e-06, + "loss": 13.7403, + "step": 9600 + }, + { + "epoch": 4.193860364031513, + "grad_norm": 36.977317810058594, + "learning_rate": 9.241648399824638e-06, + "loss": 13.8831, + "step": 9650 + }, + { + "epoch": 4.215593588698723, + "grad_norm": 43.09788131713867, + "learning_rate": 9.237264357737835e-06, + "loss": 13.7397, + "step": 9700 + }, + { + "epoch": 4.237326813365933, + "grad_norm": 33.9801139831543, + "learning_rate": 9.23288031565103e-06, + "loss": 13.8114, + "step": 9750 + }, + { + "epoch": 4.259060038033143, + "grad_norm": 32.65711212158203, + "learning_rate": 9.228496273564227e-06, + "loss": 13.7081, + "step": 9800 + }, + { + "epoch": 4.280793262700353, + "grad_norm": 71.91608428955078, + "learning_rate": 9.224112231477423e-06, + "loss": 13.7953, + "step": 9850 + }, + { + "epoch": 4.3025264873675635, + "grad_norm": 28.490583419799805, + "learning_rate": 9.219728189390618e-06, + "loss": 13.7322, + "step": 9900 + }, + { + "epoch": 4.324259712034773, + "grad_norm": 49.53886795043945, + "learning_rate": 9.215344147303815e-06, + "loss": 13.8046, + "step": 9950 + }, + { + "epoch": 4.345992936701983, + "grad_norm": 40.42410659790039, + "learning_rate": 9.21096010521701e-06, + "loss": 13.735, + "step": 10000 + }, + { + "epoch": 4.345992936701983, + "eval_cer": 0.07540147877501142, + "eval_loss": 2.2973904609680176, + "eval_runtime": 396.0823, + "eval_samples_per_second": 13.649, + "eval_steps_per_second": 3.413, + "eval_wer": 0.22808918197519235, + "step": 10000 + }, + { + "epoch": 4.367726161369193, + "grad_norm": 32.75251388549805, + "learning_rate": 9.206576063130208e-06, + "loss": 13.8058, + "step": 10050 + }, + { + "epoch": 4.389459386036403, + "grad_norm": 35.6936149597168, + "learning_rate": 9.202192021043403e-06, + "loss": 13.7489, + "step": 10100 + }, + { + "epoch": 4.411192610703613, + "grad_norm": 39.304100036621094, + "learning_rate": 9.197807978956599e-06, + "loss": 13.8124, + "step": 10150 + }, + { + "epoch": 4.432925835370823, + "grad_norm": 39.43230438232422, + "learning_rate": 9.193423936869794e-06, + "loss": 13.9531, + "step": 10200 + }, + { + "epoch": 4.454659060038033, + "grad_norm": 37.89631652832031, + "learning_rate": 9.189039894782991e-06, + "loss": 13.7975, + "step": 10250 + }, + { + "epoch": 4.476392284705243, + "grad_norm": 36.32379150390625, + "learning_rate": 9.184655852696186e-06, + "loss": 13.9208, + "step": 10300 + }, + { + "epoch": 4.4981255093724535, + "grad_norm": 39.24440002441406, + "learning_rate": 9.180271810609382e-06, + "loss": 13.88, + "step": 10350 + }, + { + "epoch": 4.519858734039663, + "grad_norm": 32.791900634765625, + "learning_rate": 9.175887768522579e-06, + "loss": 13.8944, + "step": 10400 + }, + { + "epoch": 4.541591958706873, + "grad_norm": 33.695865631103516, + "learning_rate": 9.171503726435774e-06, + "loss": 13.8637, + "step": 10450 + }, + { + "epoch": 4.563325183374083, + "grad_norm": 33.961647033691406, + "learning_rate": 9.167119684348971e-06, + "loss": 13.7873, + "step": 10500 + }, + { + "epoch": 4.585058408041293, + "grad_norm": 101.09957122802734, + "learning_rate": 9.162735642262167e-06, + "loss": 13.848, + "step": 10550 + }, + { + "epoch": 4.606791632708503, + "grad_norm": 42.666595458984375, + "learning_rate": 9.158351600175362e-06, + "loss": 13.9049, + "step": 10600 + }, + { + "epoch": 4.628524857375713, + "grad_norm": 44.05756378173828, + "learning_rate": 9.153967558088558e-06, + "loss": 13.9251, + "step": 10650 + }, + { + "epoch": 4.650258082042923, + "grad_norm": 44.468162536621094, + "learning_rate": 9.149583516001755e-06, + "loss": 13.7975, + "step": 10700 + }, + { + "epoch": 4.671991306710133, + "grad_norm": 35.0707893371582, + "learning_rate": 9.14519947391495e-06, + "loss": 13.9261, + "step": 10750 + }, + { + "epoch": 4.6937245313773435, + "grad_norm": 40.042274475097656, + "learning_rate": 9.140815431828145e-06, + "loss": 13.8594, + "step": 10800 + }, + { + "epoch": 4.715457756044553, + "grad_norm": 29.434371948242188, + "learning_rate": 9.136431389741342e-06, + "loss": 13.8838, + "step": 10850 + }, + { + "epoch": 4.737190980711763, + "grad_norm": 37.34782409667969, + "learning_rate": 9.132047347654538e-06, + "loss": 13.7635, + "step": 10900 + }, + { + "epoch": 4.758924205378973, + "grad_norm": 221.30532836914062, + "learning_rate": 9.127663305567735e-06, + "loss": 13.76, + "step": 10950 + }, + { + "epoch": 4.780657430046183, + "grad_norm": 35.67972946166992, + "learning_rate": 9.12327926348093e-06, + "loss": 13.8596, + "step": 11000 + }, + { + "epoch": 4.802390654713393, + "grad_norm": 52.167972564697266, + "learning_rate": 9.118895221394126e-06, + "loss": 13.8706, + "step": 11050 + }, + { + "epoch": 4.824123879380603, + "grad_norm": 47.18834686279297, + "learning_rate": 9.114511179307321e-06, + "loss": 13.8763, + "step": 11100 + }, + { + "epoch": 4.845857104047813, + "grad_norm": 42.99448776245117, + "learning_rate": 9.110127137220518e-06, + "loss": 13.9622, + "step": 11150 + }, + { + "epoch": 4.867590328715023, + "grad_norm": 55.08070755004883, + "learning_rate": 9.105743095133715e-06, + "loss": 13.8151, + "step": 11200 + }, + { + "epoch": 4.8893235533822335, + "grad_norm": 32.91100311279297, + "learning_rate": 9.10135905304691e-06, + "loss": 13.8262, + "step": 11250 + }, + { + "epoch": 4.911056778049443, + "grad_norm": 34.78753662109375, + "learning_rate": 9.096975010960106e-06, + "loss": 13.928, + "step": 11300 + }, + { + "epoch": 4.932790002716653, + "grad_norm": 40.1533203125, + "learning_rate": 9.092590968873301e-06, + "loss": 13.8169, + "step": 11350 + }, + { + "epoch": 4.954523227383863, + "grad_norm": 41.97115707397461, + "learning_rate": 9.088206926786498e-06, + "loss": 13.8194, + "step": 11400 + }, + { + "epoch": 4.976256452051073, + "grad_norm": 30.406110763549805, + "learning_rate": 9.083822884699694e-06, + "loss": 13.9941, + "step": 11450 + }, + { + "epoch": 4.997989676718283, + "grad_norm": 44.02429962158203, + "learning_rate": 9.07943884261289e-06, + "loss": 13.9272, + "step": 11500 + }, + { + "epoch": 5.019559902200489, + "grad_norm": 42.15421676635742, + "learning_rate": 9.075054800526086e-06, + "loss": 13.386, + "step": 11550 + }, + { + "epoch": 5.041293126867699, + "grad_norm": 28.951597213745117, + "learning_rate": 9.070670758439282e-06, + "loss": 13.4325, + "step": 11600 + }, + { + "epoch": 5.0630263515349085, + "grad_norm": 59.380374908447266, + "learning_rate": 9.066286716352479e-06, + "loss": 13.409, + "step": 11650 + }, + { + "epoch": 5.084759576202119, + "grad_norm": 56.05976867675781, + "learning_rate": 9.061902674265674e-06, + "loss": 13.5315, + "step": 11700 + }, + { + "epoch": 5.106492800869329, + "grad_norm": 36.069583892822266, + "learning_rate": 9.05751863217887e-06, + "loss": 13.3532, + "step": 11750 + }, + { + "epoch": 5.128226025536539, + "grad_norm": 39.289833068847656, + "learning_rate": 9.053134590092065e-06, + "loss": 13.3686, + "step": 11800 + }, + { + "epoch": 5.149959250203749, + "grad_norm": 37.062931060791016, + "learning_rate": 9.048750548005262e-06, + "loss": 13.4362, + "step": 11850 + }, + { + "epoch": 5.171692474870959, + "grad_norm": 56.210750579833984, + "learning_rate": 9.044366505918457e-06, + "loss": 13.4053, + "step": 11900 + }, + { + "epoch": 5.193425699538169, + "grad_norm": 26.70563507080078, + "learning_rate": 9.039982463831653e-06, + "loss": 13.3191, + "step": 11950 + }, + { + "epoch": 5.215158924205379, + "grad_norm": 39.95426940917969, + "learning_rate": 9.03559842174485e-06, + "loss": 13.3161, + "step": 12000 + }, + { + "epoch": 5.236892148872589, + "grad_norm": 29.761014938354492, + "learning_rate": 9.031214379658045e-06, + "loss": 13.5076, + "step": 12050 + }, + { + "epoch": 5.2586253735397985, + "grad_norm": 32.707786560058594, + "learning_rate": 9.026830337571242e-06, + "loss": 13.4865, + "step": 12100 + }, + { + "epoch": 5.280358598207009, + "grad_norm": 30.934314727783203, + "learning_rate": 9.022446295484438e-06, + "loss": 13.3348, + "step": 12150 + }, + { + "epoch": 5.302091822874219, + "grad_norm": 38.97114562988281, + "learning_rate": 9.018062253397633e-06, + "loss": 13.4458, + "step": 12200 + }, + { + "epoch": 5.323825047541429, + "grad_norm": 52.749507904052734, + "learning_rate": 9.013678211310829e-06, + "loss": 13.5492, + "step": 12250 + }, + { + "epoch": 5.345558272208639, + "grad_norm": 37.54782485961914, + "learning_rate": 9.009294169224026e-06, + "loss": 13.3087, + "step": 12300 + }, + { + "epoch": 5.367291496875849, + "grad_norm": 40.16310501098633, + "learning_rate": 9.004910127137221e-06, + "loss": 13.4691, + "step": 12350 + }, + { + "epoch": 5.389024721543059, + "grad_norm": 58.52961349487305, + "learning_rate": 9.000526085050418e-06, + "loss": 13.5101, + "step": 12400 + }, + { + "epoch": 5.410757946210269, + "grad_norm": 31.150737762451172, + "learning_rate": 8.996142042963613e-06, + "loss": 13.3933, + "step": 12450 + }, + { + "epoch": 5.432491170877479, + "grad_norm": 31.380889892578125, + "learning_rate": 8.991758000876809e-06, + "loss": 13.5171, + "step": 12500 + }, + { + "epoch": 5.4542243955446885, + "grad_norm": 45.46767044067383, + "learning_rate": 8.987373958790006e-06, + "loss": 13.4807, + "step": 12550 + }, + { + "epoch": 5.475957620211899, + "grad_norm": 42.970542907714844, + "learning_rate": 8.982989916703201e-06, + "loss": 13.4787, + "step": 12600 + }, + { + "epoch": 5.497690844879109, + "grad_norm": 51.134578704833984, + "learning_rate": 8.978605874616397e-06, + "loss": 13.4804, + "step": 12650 + }, + { + "epoch": 5.519424069546319, + "grad_norm": 37.2877082824707, + "learning_rate": 8.974221832529592e-06, + "loss": 13.5335, + "step": 12700 + }, + { + "epoch": 5.541157294213529, + "grad_norm": 41.327144622802734, + "learning_rate": 8.96983779044279e-06, + "loss": 13.5202, + "step": 12750 + }, + { + "epoch": 5.562890518880739, + "grad_norm": 41.8232421875, + "learning_rate": 8.965453748355985e-06, + "loss": 13.4273, + "step": 12800 + }, + { + "epoch": 5.584623743547949, + "grad_norm": 34.09703063964844, + "learning_rate": 8.961069706269182e-06, + "loss": 13.5441, + "step": 12850 + }, + { + "epoch": 5.606356968215159, + "grad_norm": 34.51966094970703, + "learning_rate": 8.956685664182377e-06, + "loss": 13.5746, + "step": 12900 + }, + { + "epoch": 5.628090192882369, + "grad_norm": 44.580360412597656, + "learning_rate": 8.952301622095572e-06, + "loss": 13.5962, + "step": 12950 + }, + { + "epoch": 5.6498234175495785, + "grad_norm": 49.46404266357422, + "learning_rate": 8.94791758000877e-06, + "loss": 13.5788, + "step": 13000 + }, + { + "epoch": 5.671556642216789, + "grad_norm": 33.00864028930664, + "learning_rate": 8.943533537921965e-06, + "loss": 13.4571, + "step": 13050 + }, + { + "epoch": 5.693289866883999, + "grad_norm": 31.570575714111328, + "learning_rate": 8.93914949583516e-06, + "loss": 13.512, + "step": 13100 + }, + { + "epoch": 5.715023091551209, + "grad_norm": 31.16398048400879, + "learning_rate": 8.934765453748356e-06, + "loss": 13.5388, + "step": 13150 + }, + { + "epoch": 5.736756316218419, + "grad_norm": 31.840078353881836, + "learning_rate": 8.930381411661553e-06, + "loss": 13.5697, + "step": 13200 + }, + { + "epoch": 5.758489540885629, + "grad_norm": 41.02314376831055, + "learning_rate": 8.925997369574748e-06, + "loss": 13.5952, + "step": 13250 + }, + { + "epoch": 5.780222765552839, + "grad_norm": 38.16290283203125, + "learning_rate": 8.921613327487945e-06, + "loss": 13.6544, + "step": 13300 + }, + { + "epoch": 5.801955990220049, + "grad_norm": 34.18564224243164, + "learning_rate": 8.91722928540114e-06, + "loss": 13.6995, + "step": 13350 + }, + { + "epoch": 5.823689214887259, + "grad_norm": 27.264175415039062, + "learning_rate": 8.912845243314336e-06, + "loss": 13.5539, + "step": 13400 + }, + { + "epoch": 5.8454224395544685, + "grad_norm": 39.271888732910156, + "learning_rate": 8.908461201227533e-06, + "loss": 13.6108, + "step": 13450 + }, + { + "epoch": 5.867155664221679, + "grad_norm": 25.51955223083496, + "learning_rate": 8.904077159140728e-06, + "loss": 13.5095, + "step": 13500 + }, + { + "epoch": 5.888888888888889, + "grad_norm": 37.255367279052734, + "learning_rate": 8.899693117053926e-06, + "loss": 13.5916, + "step": 13550 + }, + { + "epoch": 5.910622113556099, + "grad_norm": 36.901702880859375, + "learning_rate": 8.89530907496712e-06, + "loss": 13.5283, + "step": 13600 + }, + { + "epoch": 5.932355338223309, + "grad_norm": 36.892799377441406, + "learning_rate": 8.890925032880316e-06, + "loss": 13.6032, + "step": 13650 + }, + { + "epoch": 5.954088562890519, + "grad_norm": 36.8080940246582, + "learning_rate": 8.886540990793512e-06, + "loss": 13.7407, + "step": 13700 + }, + { + "epoch": 5.975821787557729, + "grad_norm": 41.102657318115234, + "learning_rate": 8.882156948706709e-06, + "loss": 13.5335, + "step": 13750 + }, + { + "epoch": 5.997555012224939, + "grad_norm": 31.643165588378906, + "learning_rate": 8.877772906619904e-06, + "loss": 13.6137, + "step": 13800 + }, + { + "epoch": 6.0191252377071445, + "grad_norm": 35.148006439208984, + "learning_rate": 8.8733888645331e-06, + "loss": 13.1715, + "step": 13850 + }, + { + "epoch": 6.040858462374355, + "grad_norm": 34.13616943359375, + "learning_rate": 8.869004822446297e-06, + "loss": 13.1644, + "step": 13900 + }, + { + "epoch": 6.062591687041564, + "grad_norm": 43.90581512451172, + "learning_rate": 8.864620780359492e-06, + "loss": 13.0996, + "step": 13950 + }, + { + "epoch": 6.084324911708775, + "grad_norm": 36.725379943847656, + "learning_rate": 8.860236738272689e-06, + "loss": 13.1415, + "step": 14000 + }, + { + "epoch": 6.106058136375985, + "grad_norm": 32.847129821777344, + "learning_rate": 8.855852696185884e-06, + "loss": 13.1429, + "step": 14050 + }, + { + "epoch": 6.127791361043195, + "grad_norm": 27.32487678527832, + "learning_rate": 8.85146865409908e-06, + "loss": 13.2287, + "step": 14100 + }, + { + "epoch": 6.149524585710405, + "grad_norm": 38.18893051147461, + "learning_rate": 8.847084612012275e-06, + "loss": 13.1909, + "step": 14150 + }, + { + "epoch": 6.171257810377615, + "grad_norm": 29.566404342651367, + "learning_rate": 8.842700569925472e-06, + "loss": 13.1921, + "step": 14200 + }, + { + "epoch": 6.192991035044825, + "grad_norm": 27.988677978515625, + "learning_rate": 8.838316527838668e-06, + "loss": 13.1361, + "step": 14250 + }, + { + "epoch": 6.2147242597120345, + "grad_norm": 36.260833740234375, + "learning_rate": 8.833932485751863e-06, + "loss": 13.1959, + "step": 14300 + }, + { + "epoch": 6.236457484379245, + "grad_norm": 37.56959533691406, + "learning_rate": 8.82954844366506e-06, + "loss": 13.0934, + "step": 14350 + }, + { + "epoch": 6.258190709046454, + "grad_norm": 37.16026306152344, + "learning_rate": 8.825164401578256e-06, + "loss": 13.3774, + "step": 14400 + }, + { + "epoch": 6.279923933713665, + "grad_norm": 51.96893310546875, + "learning_rate": 8.820780359491453e-06, + "loss": 13.2566, + "step": 14450 + }, + { + "epoch": 6.301657158380875, + "grad_norm": 31.46018409729004, + "learning_rate": 8.816396317404648e-06, + "loss": 13.242, + "step": 14500 + }, + { + "epoch": 6.323390383048085, + "grad_norm": 40.38423538208008, + "learning_rate": 8.812012275317843e-06, + "loss": 13.2649, + "step": 14550 + }, + { + "epoch": 6.345123607715295, + "grad_norm": 33.40611267089844, + "learning_rate": 8.807628233231039e-06, + "loss": 13.2143, + "step": 14600 + }, + { + "epoch": 6.3668568323825045, + "grad_norm": 32.6546745300293, + "learning_rate": 8.803244191144236e-06, + "loss": 13.2293, + "step": 14650 + }, + { + "epoch": 6.388590057049715, + "grad_norm": 30.99147605895996, + "learning_rate": 8.798860149057433e-06, + "loss": 13.1595, + "step": 14700 + }, + { + "epoch": 6.4103232817169244, + "grad_norm": 49.923667907714844, + "learning_rate": 8.794476106970627e-06, + "loss": 13.1624, + "step": 14750 + }, + { + "epoch": 6.432056506384135, + "grad_norm": 27.526941299438477, + "learning_rate": 8.790092064883824e-06, + "loss": 13.245, + "step": 14800 + }, + { + "epoch": 6.453789731051344, + "grad_norm": 41.09890365600586, + "learning_rate": 8.785708022797019e-06, + "loss": 13.1945, + "step": 14850 + }, + { + "epoch": 6.475522955718555, + "grad_norm": 36.0584831237793, + "learning_rate": 8.781323980710216e-06, + "loss": 13.207, + "step": 14900 + }, + { + "epoch": 6.497256180385765, + "grad_norm": 30.85024642944336, + "learning_rate": 8.776939938623412e-06, + "loss": 13.2022, + "step": 14950 + }, + { + "epoch": 6.518989405052975, + "grad_norm": 34.92485427856445, + "learning_rate": 8.772555896536607e-06, + "loss": 13.3696, + "step": 15000 + }, + { + "epoch": 6.540722629720185, + "grad_norm": 33.38056564331055, + "learning_rate": 8.768171854449802e-06, + "loss": 13.2597, + "step": 15050 + }, + { + "epoch": 6.5624558543873945, + "grad_norm": 29.834815979003906, + "learning_rate": 8.763787812363e-06, + "loss": 13.2281, + "step": 15100 + }, + { + "epoch": 6.584189079054605, + "grad_norm": 30.077539443969727, + "learning_rate": 8.759403770276197e-06, + "loss": 13.2554, + "step": 15150 + }, + { + "epoch": 6.605922303721814, + "grad_norm": 43.224586486816406, + "learning_rate": 8.755019728189392e-06, + "loss": 13.1997, + "step": 15200 + }, + { + "epoch": 6.627655528389025, + "grad_norm": 48.51641082763672, + "learning_rate": 8.750635686102587e-06, + "loss": 13.2243, + "step": 15250 + }, + { + "epoch": 6.649388753056234, + "grad_norm": 29.839174270629883, + "learning_rate": 8.746251644015783e-06, + "loss": 13.3737, + "step": 15300 + }, + { + "epoch": 6.671121977723445, + "grad_norm": 44.47172546386719, + "learning_rate": 8.74186760192898e-06, + "loss": 13.1659, + "step": 15350 + }, + { + "epoch": 6.692855202390655, + "grad_norm": 27.568334579467773, + "learning_rate": 8.737483559842175e-06, + "loss": 13.2285, + "step": 15400 + }, + { + "epoch": 6.714588427057865, + "grad_norm": 31.159231185913086, + "learning_rate": 8.73309951775537e-06, + "loss": 13.3102, + "step": 15450 + }, + { + "epoch": 6.736321651725075, + "grad_norm": 30.869430541992188, + "learning_rate": 8.728715475668566e-06, + "loss": 13.2286, + "step": 15500 + }, + { + "epoch": 6.7580548763922845, + "grad_norm": 51.48735427856445, + "learning_rate": 8.724331433581763e-06, + "loss": 13.3347, + "step": 15550 + }, + { + "epoch": 6.779788101059495, + "grad_norm": 35.06986999511719, + "learning_rate": 8.71994739149496e-06, + "loss": 13.1973, + "step": 15600 + }, + { + "epoch": 6.801521325726704, + "grad_norm": 27.670289993286133, + "learning_rate": 8.715563349408155e-06, + "loss": 13.162, + "step": 15650 + }, + { + "epoch": 6.823254550393915, + "grad_norm": 34.26895523071289, + "learning_rate": 8.711179307321351e-06, + "loss": 13.3011, + "step": 15700 + }, + { + "epoch": 6.844987775061124, + "grad_norm": 41.056182861328125, + "learning_rate": 8.706795265234546e-06, + "loss": 13.2564, + "step": 15750 + }, + { + "epoch": 6.866720999728335, + "grad_norm": 47.23772048950195, + "learning_rate": 8.702411223147743e-06, + "loss": 13.3127, + "step": 15800 + }, + { + "epoch": 6.888454224395545, + "grad_norm": 65.80028533935547, + "learning_rate": 8.698027181060939e-06, + "loss": 13.2797, + "step": 15850 + }, + { + "epoch": 6.910187449062755, + "grad_norm": 40.93989562988281, + "learning_rate": 8.693643138974134e-06, + "loss": 13.3707, + "step": 15900 + }, + { + "epoch": 6.931920673729965, + "grad_norm": 83.51680755615234, + "learning_rate": 8.689259096887331e-06, + "loss": 13.217, + "step": 15950 + }, + { + "epoch": 6.9536538983971745, + "grad_norm": 32.16157150268555, + "learning_rate": 8.684875054800527e-06, + "loss": 13.274, + "step": 16000 + }, + { + "epoch": 6.975387123064385, + "grad_norm": 31.57478904724121, + "learning_rate": 8.680491012713724e-06, + "loss": 13.2057, + "step": 16050 + }, + { + "epoch": 6.997120347731594, + "grad_norm": 37.837303161621094, + "learning_rate": 8.676106970626919e-06, + "loss": 13.262, + "step": 16100 + }, + { + "epoch": 7.0186905732138, + "grad_norm": 24.430326461791992, + "learning_rate": 8.671722928540114e-06, + "loss": 12.9144, + "step": 16150 + }, + { + "epoch": 7.040423797881011, + "grad_norm": 45.298194885253906, + "learning_rate": 8.66733888645331e-06, + "loss": 12.8617, + "step": 16200 + }, + { + "epoch": 7.06215702254822, + "grad_norm": 52.39512252807617, + "learning_rate": 8.662954844366507e-06, + "loss": 12.9514, + "step": 16250 + }, + { + "epoch": 7.083890247215431, + "grad_norm": 35.9492073059082, + "learning_rate": 8.658570802279702e-06, + "loss": 12.9577, + "step": 16300 + }, + { + "epoch": 7.105623471882641, + "grad_norm": 31.363454818725586, + "learning_rate": 8.6541867601929e-06, + "loss": 12.9849, + "step": 16350 + }, + { + "epoch": 7.1273566965498505, + "grad_norm": 24.993553161621094, + "learning_rate": 8.649802718106095e-06, + "loss": 12.9269, + "step": 16400 + }, + { + "epoch": 7.149089921217061, + "grad_norm": 28.327381134033203, + "learning_rate": 8.64541867601929e-06, + "loss": 12.941, + "step": 16450 + }, + { + "epoch": 7.17082314588427, + "grad_norm": 30.908496856689453, + "learning_rate": 8.641034633932487e-06, + "loss": 13.0525, + "step": 16500 + }, + { + "epoch": 7.192556370551481, + "grad_norm": 41.53740310668945, + "learning_rate": 8.636650591845683e-06, + "loss": 13.0038, + "step": 16550 + }, + { + "epoch": 7.21428959521869, + "grad_norm": 34.16611862182617, + "learning_rate": 8.632266549758878e-06, + "loss": 12.9893, + "step": 16600 + }, + { + "epoch": 7.236022819885901, + "grad_norm": 28.183107376098633, + "learning_rate": 8.627882507672073e-06, + "loss": 13.0103, + "step": 16650 + }, + { + "epoch": 7.25775604455311, + "grad_norm": 28.345674514770508, + "learning_rate": 8.62349846558527e-06, + "loss": 12.9886, + "step": 16700 + }, + { + "epoch": 7.279489269220321, + "grad_norm": 36.2637825012207, + "learning_rate": 8.619114423498466e-06, + "loss": 12.9905, + "step": 16750 + }, + { + "epoch": 7.301222493887531, + "grad_norm": 32.89162826538086, + "learning_rate": 8.614730381411663e-06, + "loss": 12.9033, + "step": 16800 + }, + { + "epoch": 7.3229557185547405, + "grad_norm": 31.151569366455078, + "learning_rate": 8.610346339324858e-06, + "loss": 13.026, + "step": 16850 + }, + { + "epoch": 7.344688943221951, + "grad_norm": 32.4716682434082, + "learning_rate": 8.605962297238054e-06, + "loss": 12.9932, + "step": 16900 + }, + { + "epoch": 7.36642216788916, + "grad_norm": 28.446046829223633, + "learning_rate": 8.60157825515125e-06, + "loss": 13.0201, + "step": 16950 + }, + { + "epoch": 7.388155392556371, + "grad_norm": 27.000221252441406, + "learning_rate": 8.597194213064446e-06, + "loss": 13.0463, + "step": 17000 + }, + { + "epoch": 7.40988861722358, + "grad_norm": 35.49698257446289, + "learning_rate": 8.592810170977642e-06, + "loss": 12.9461, + "step": 17050 + }, + { + "epoch": 7.431621841890791, + "grad_norm": 48.70148849487305, + "learning_rate": 8.588426128890837e-06, + "loss": 13.0921, + "step": 17100 + }, + { + "epoch": 7.453355066558, + "grad_norm": 28.99524688720703, + "learning_rate": 8.584042086804034e-06, + "loss": 12.9729, + "step": 17150 + }, + { + "epoch": 7.475088291225211, + "grad_norm": 28.51788902282715, + "learning_rate": 8.57965804471723e-06, + "loss": 13.0311, + "step": 17200 + }, + { + "epoch": 7.496821515892421, + "grad_norm": 48.5558967590332, + "learning_rate": 8.575274002630427e-06, + "loss": 13.0931, + "step": 17250 + }, + { + "epoch": 7.5185547405596305, + "grad_norm": 35.883365631103516, + "learning_rate": 8.570889960543622e-06, + "loss": 13.0601, + "step": 17300 + }, + { + "epoch": 7.540287965226841, + "grad_norm": 30.609474182128906, + "learning_rate": 8.566505918456817e-06, + "loss": 13.0277, + "step": 17350 + }, + { + "epoch": 7.56202118989405, + "grad_norm": 31.2172794342041, + "learning_rate": 8.562121876370014e-06, + "loss": 12.9501, + "step": 17400 + }, + { + "epoch": 7.583754414561261, + "grad_norm": 42.7708740234375, + "learning_rate": 8.55773783428321e-06, + "loss": 13.0667, + "step": 17450 + }, + { + "epoch": 7.60548763922847, + "grad_norm": 30.39897346496582, + "learning_rate": 8.553353792196407e-06, + "loss": 13.0591, + "step": 17500 + }, + { + "epoch": 7.627220863895681, + "grad_norm": 26.951528549194336, + "learning_rate": 8.548969750109602e-06, + "loss": 12.9949, + "step": 17550 + }, + { + "epoch": 7.64895408856289, + "grad_norm": 33.658206939697266, + "learning_rate": 8.544585708022798e-06, + "loss": 13.0532, + "step": 17600 + }, + { + "epoch": 7.670687313230101, + "grad_norm": 34.114768981933594, + "learning_rate": 8.540201665935993e-06, + "loss": 13.1035, + "step": 17650 + }, + { + "epoch": 7.692420537897311, + "grad_norm": 29.691999435424805, + "learning_rate": 8.53581762384919e-06, + "loss": 13.0645, + "step": 17700 + }, + { + "epoch": 7.7141537625645205, + "grad_norm": 39.269493103027344, + "learning_rate": 8.531433581762385e-06, + "loss": 13.112, + "step": 17750 + }, + { + "epoch": 7.735886987231731, + "grad_norm": 37.816837310791016, + "learning_rate": 8.527049539675581e-06, + "loss": 13.0634, + "step": 17800 + }, + { + "epoch": 7.75762021189894, + "grad_norm": 36.515132904052734, + "learning_rate": 8.522665497588778e-06, + "loss": 13.0395, + "step": 17850 + }, + { + "epoch": 7.779353436566151, + "grad_norm": 22.76226043701172, + "learning_rate": 8.518281455501973e-06, + "loss": 13.0559, + "step": 17900 + }, + { + "epoch": 7.80108666123336, + "grad_norm": 28.64872169494629, + "learning_rate": 8.51389741341517e-06, + "loss": 13.0638, + "step": 17950 + }, + { + "epoch": 7.822819885900571, + "grad_norm": 41.4809684753418, + "learning_rate": 8.509513371328366e-06, + "loss": 13.0299, + "step": 18000 + }, + { + "epoch": 7.84455311056778, + "grad_norm": 25.84028434753418, + "learning_rate": 8.505129329241561e-06, + "loss": 13.0003, + "step": 18050 + }, + { + "epoch": 7.8662863352349905, + "grad_norm": 36.24126434326172, + "learning_rate": 8.500745287154757e-06, + "loss": 13.0231, + "step": 18100 + }, + { + "epoch": 7.888019559902201, + "grad_norm": 19.62076187133789, + "learning_rate": 8.496361245067954e-06, + "loss": 13.006, + "step": 18150 + }, + { + "epoch": 7.9097527845694104, + "grad_norm": 28.422643661499023, + "learning_rate": 8.491977202981149e-06, + "loss": 12.9981, + "step": 18200 + }, + { + "epoch": 7.931486009236621, + "grad_norm": 36.77701187133789, + "learning_rate": 8.487593160894344e-06, + "loss": 13.1429, + "step": 18250 + }, + { + "epoch": 7.95321923390383, + "grad_norm": 36.51480484008789, + "learning_rate": 8.483209118807542e-06, + "loss": 12.9689, + "step": 18300 + }, + { + "epoch": 7.974952458571041, + "grad_norm": 30.303489685058594, + "learning_rate": 8.478825076720737e-06, + "loss": 13.0392, + "step": 18350 + }, + { + "epoch": 7.99668568323825, + "grad_norm": 41.148353576660156, + "learning_rate": 8.474441034633934e-06, + "loss": 13.0697, + "step": 18400 + }, + { + "epoch": 8.018255908720457, + "grad_norm": 30.144062042236328, + "learning_rate": 8.47005699254713e-06, + "loss": 12.7092, + "step": 18450 + }, + { + "epoch": 8.039989133387666, + "grad_norm": 33.70432662963867, + "learning_rate": 8.465672950460325e-06, + "loss": 12.8033, + "step": 18500 + }, + { + "epoch": 8.061722358054876, + "grad_norm": 25.66695785522461, + "learning_rate": 8.46128890837352e-06, + "loss": 12.7704, + "step": 18550 + }, + { + "epoch": 8.083455582722086, + "grad_norm": 38.33973693847656, + "learning_rate": 8.456904866286717e-06, + "loss": 12.8512, + "step": 18600 + }, + { + "epoch": 8.105188807389297, + "grad_norm": 25.794679641723633, + "learning_rate": 8.452520824199914e-06, + "loss": 12.7138, + "step": 18650 + }, + { + "epoch": 8.126922032056507, + "grad_norm": 39.2582893371582, + "learning_rate": 8.44813678211311e-06, + "loss": 12.7657, + "step": 18700 + }, + { + "epoch": 8.148655256723716, + "grad_norm": 30.886682510375977, + "learning_rate": 8.443752740026305e-06, + "loss": 12.7667, + "step": 18750 + }, + { + "epoch": 8.170388481390926, + "grad_norm": 39.30559158325195, + "learning_rate": 8.4393686979395e-06, + "loss": 12.7548, + "step": 18800 + }, + { + "epoch": 8.192121706058137, + "grad_norm": 22.945003509521484, + "learning_rate": 8.434984655852698e-06, + "loss": 12.8788, + "step": 18850 + }, + { + "epoch": 8.213854930725347, + "grad_norm": 30.998369216918945, + "learning_rate": 8.430600613765893e-06, + "loss": 12.8048, + "step": 18900 + }, + { + "epoch": 8.235588155392556, + "grad_norm": 29.44565773010254, + "learning_rate": 8.426216571679088e-06, + "loss": 12.7907, + "step": 18950 + }, + { + "epoch": 8.257321380059766, + "grad_norm": 29.368488311767578, + "learning_rate": 8.421832529592284e-06, + "loss": 12.8157, + "step": 19000 + }, + { + "epoch": 8.279054604726976, + "grad_norm": 28.4185791015625, + "learning_rate": 8.41744848750548e-06, + "loss": 12.8382, + "step": 19050 + }, + { + "epoch": 8.300787829394187, + "grad_norm": 45.91888427734375, + "learning_rate": 8.413064445418678e-06, + "loss": 12.9013, + "step": 19100 + }, + { + "epoch": 8.322521054061397, + "grad_norm": 36.90361022949219, + "learning_rate": 8.408680403331873e-06, + "loss": 12.8076, + "step": 19150 + }, + { + "epoch": 8.344254278728606, + "grad_norm": 54.692935943603516, + "learning_rate": 8.404296361245069e-06, + "loss": 12.8288, + "step": 19200 + }, + { + "epoch": 8.365987503395816, + "grad_norm": 27.947093963623047, + "learning_rate": 8.399912319158264e-06, + "loss": 12.8577, + "step": 19250 + }, + { + "epoch": 8.387720728063027, + "grad_norm": 28.992555618286133, + "learning_rate": 8.395528277071461e-06, + "loss": 12.882, + "step": 19300 + }, + { + "epoch": 8.409453952730237, + "grad_norm": 22.34044647216797, + "learning_rate": 8.391144234984656e-06, + "loss": 12.8171, + "step": 19350 + }, + { + "epoch": 8.431187177397446, + "grad_norm": 50.96314239501953, + "learning_rate": 8.386760192897852e-06, + "loss": 12.8761, + "step": 19400 + }, + { + "epoch": 8.452920402064656, + "grad_norm": Infinity, + "learning_rate": 8.382376150811047e-06, + "loss": 12.8613, + "step": 19450 + }, + { + "epoch": 8.474653626731866, + "grad_norm": 25.97089195251465, + "learning_rate": 8.377992108724244e-06, + "loss": 12.8875, + "step": 19500 + }, + { + "epoch": 8.496386851399077, + "grad_norm": 30.094532012939453, + "learning_rate": 8.373608066637441e-06, + "loss": 12.8784, + "step": 19550 + }, + { + "epoch": 8.518120076066285, + "grad_norm": 37.806156158447266, + "learning_rate": 8.369224024550637e-06, + "loss": 12.8339, + "step": 19600 + }, + { + "epoch": 8.539853300733496, + "grad_norm": 38.92607498168945, + "learning_rate": 8.364839982463832e-06, + "loss": 12.8541, + "step": 19650 + }, + { + "epoch": 8.561586525400706, + "grad_norm": 31.54934310913086, + "learning_rate": 8.360455940377028e-06, + "loss": 12.8991, + "step": 19700 + }, + { + "epoch": 8.583319750067917, + "grad_norm": 37.04362869262695, + "learning_rate": 8.356071898290225e-06, + "loss": 12.9116, + "step": 19750 + }, + { + "epoch": 8.605052974735127, + "grad_norm": 38.93299865722656, + "learning_rate": 8.35168785620342e-06, + "loss": 12.8499, + "step": 19800 + }, + { + "epoch": 8.626786199402336, + "grad_norm": 28.214290618896484, + "learning_rate": 8.347303814116615e-06, + "loss": 12.8512, + "step": 19850 + }, + { + "epoch": 8.648519424069546, + "grad_norm": 27.576839447021484, + "learning_rate": 8.34291977202981e-06, + "loss": 12.8824, + "step": 19900 + }, + { + "epoch": 8.670252648736756, + "grad_norm": 25.321149826049805, + "learning_rate": 8.338535729943008e-06, + "loss": 12.842, + "step": 19950 + }, + { + "epoch": 8.691985873403967, + "grad_norm": 36.43674087524414, + "learning_rate": 8.334151687856205e-06, + "loss": 12.9156, + "step": 20000 + }, + { + "epoch": 8.691985873403967, + "eval_cer": 0.07732709565131522, + "eval_loss": 2.3227267265319824, + "eval_runtime": 401.1503, + "eval_samples_per_second": 13.476, + "eval_steps_per_second": 3.37, + "eval_wer": 0.23097817553776104, + "step": 20000 + }, + { + "epoch": 8.713719098071177, + "grad_norm": 30.650314331054688, + "learning_rate": 8.3297676457694e-06, + "loss": 12.9089, + "step": 20050 + }, + { + "epoch": 8.735452322738386, + "grad_norm": 27.448633193969727, + "learning_rate": 8.325383603682596e-06, + "loss": 12.8572, + "step": 20100 + }, + { + "epoch": 8.757185547405596, + "grad_norm": 25.665332794189453, + "learning_rate": 8.320999561595791e-06, + "loss": 12.8087, + "step": 20150 + }, + { + "epoch": 8.778918772072807, + "grad_norm": 43.74554443359375, + "learning_rate": 8.316615519508988e-06, + "loss": 12.915, + "step": 20200 + }, + { + "epoch": 8.800651996740017, + "grad_norm": 31.74461555480957, + "learning_rate": 8.312231477422184e-06, + "loss": 12.8676, + "step": 20250 + }, + { + "epoch": 8.822385221407226, + "grad_norm": 28.51342010498047, + "learning_rate": 8.30784743533538e-06, + "loss": 12.8645, + "step": 20300 + }, + { + "epoch": 8.844118446074436, + "grad_norm": 27.660497665405273, + "learning_rate": 8.303463393248576e-06, + "loss": 12.9217, + "step": 20350 + }, + { + "epoch": 8.865851670741646, + "grad_norm": 41.046485900878906, + "learning_rate": 8.299079351161771e-06, + "loss": 12.8472, + "step": 20400 + }, + { + "epoch": 8.887584895408857, + "grad_norm": 50.21107482910156, + "learning_rate": 8.294695309074969e-06, + "loss": 12.8141, + "step": 20450 + }, + { + "epoch": 8.909318120076065, + "grad_norm": 42.08512878417969, + "learning_rate": 8.290311266988164e-06, + "loss": 12.9162, + "step": 20500 + }, + { + "epoch": 8.931051344743276, + "grad_norm": 22.199024200439453, + "learning_rate": 8.28592722490136e-06, + "loss": 12.8649, + "step": 20550 + }, + { + "epoch": 8.952784569410486, + "grad_norm": 38.15290451049805, + "learning_rate": 8.281543182814555e-06, + "loss": 12.8547, + "step": 20600 + }, + { + "epoch": 8.974517794077697, + "grad_norm": 35.076698303222656, + "learning_rate": 8.277159140727752e-06, + "loss": 12.8954, + "step": 20650 + }, + { + "epoch": 8.996251018744907, + "grad_norm": 26.742168426513672, + "learning_rate": 8.272775098640947e-06, + "loss": 12.8845, + "step": 20700 + }, + { + "epoch": 9.017821244227113, + "grad_norm": 18.43798828125, + "learning_rate": 8.268391056554144e-06, + "loss": 12.6111, + "step": 20750 + }, + { + "epoch": 9.039554468894321, + "grad_norm": 22.483016967773438, + "learning_rate": 8.26400701446734e-06, + "loss": 12.6938, + "step": 20800 + }, + { + "epoch": 9.061287693561532, + "grad_norm": 22.414525985717773, + "learning_rate": 8.259622972380535e-06, + "loss": 12.6499, + "step": 20850 + }, + { + "epoch": 9.083020918228742, + "grad_norm": 33.88186264038086, + "learning_rate": 8.255238930293732e-06, + "loss": 12.5987, + "step": 20900 + }, + { + "epoch": 9.104754142895953, + "grad_norm": 34.6947021484375, + "learning_rate": 8.250854888206928e-06, + "loss": 12.6804, + "step": 20950 + }, + { + "epoch": 9.126487367563163, + "grad_norm": 22.22621726989746, + "learning_rate": 8.246470846120123e-06, + "loss": 12.7388, + "step": 21000 + }, + { + "epoch": 9.148220592230372, + "grad_norm": 30.4085693359375, + "learning_rate": 8.242086804033318e-06, + "loss": 12.7085, + "step": 21050 + }, + { + "epoch": 9.169953816897582, + "grad_norm": 131.27008056640625, + "learning_rate": 8.237702761946515e-06, + "loss": 12.7142, + "step": 21100 + }, + { + "epoch": 9.191687041564792, + "grad_norm": 28.05132293701172, + "learning_rate": 8.23331871985971e-06, + "loss": 12.698, + "step": 21150 + }, + { + "epoch": 9.213420266232003, + "grad_norm": 157.52548217773438, + "learning_rate": 8.228934677772908e-06, + "loss": 12.7275, + "step": 21200 + }, + { + "epoch": 9.235153490899211, + "grad_norm": 29.362707138061523, + "learning_rate": 8.224550635686103e-06, + "loss": 12.648, + "step": 21250 + }, + { + "epoch": 9.256886715566422, + "grad_norm": 27.221683502197266, + "learning_rate": 8.220166593599299e-06, + "loss": 12.7306, + "step": 21300 + }, + { + "epoch": 9.278619940233632, + "grad_norm": 18.6680850982666, + "learning_rate": 8.215782551512496e-06, + "loss": 12.6896, + "step": 21350 + }, + { + "epoch": 9.300353164900843, + "grad_norm": 35.81766128540039, + "learning_rate": 8.211398509425691e-06, + "loss": 12.6838, + "step": 21400 + }, + { + "epoch": 9.322086389568053, + "grad_norm": 24.64043426513672, + "learning_rate": 8.207014467338888e-06, + "loss": 12.7201, + "step": 21450 + }, + { + "epoch": 9.343819614235262, + "grad_norm": 41.39848327636719, + "learning_rate": 8.202630425252084e-06, + "loss": 12.7289, + "step": 21500 + }, + { + "epoch": 9.365552838902472, + "grad_norm": 23.982431411743164, + "learning_rate": 8.198246383165279e-06, + "loss": 12.7145, + "step": 21550 + }, + { + "epoch": 9.387286063569682, + "grad_norm": 25.513904571533203, + "learning_rate": 8.193862341078474e-06, + "loss": 12.6646, + "step": 21600 + }, + { + "epoch": 9.409019288236893, + "grad_norm": 28.16943359375, + "learning_rate": 8.189478298991671e-06, + "loss": 12.7157, + "step": 21650 + }, + { + "epoch": 9.430752512904101, + "grad_norm": 31.33350944519043, + "learning_rate": 8.185094256904867e-06, + "loss": 12.7245, + "step": 21700 + }, + { + "epoch": 9.452485737571312, + "grad_norm": 22.30205726623535, + "learning_rate": 8.180710214818062e-06, + "loss": 12.7082, + "step": 21750 + }, + { + "epoch": 9.474218962238522, + "grad_norm": 31.175230026245117, + "learning_rate": 8.17632617273126e-06, + "loss": 12.7716, + "step": 21800 + }, + { + "epoch": 9.495952186905733, + "grad_norm": 24.61014747619629, + "learning_rate": 8.171942130644455e-06, + "loss": 12.7153, + "step": 21850 + }, + { + "epoch": 9.517685411572941, + "grad_norm": 37.26193618774414, + "learning_rate": 8.167558088557652e-06, + "loss": 12.7623, + "step": 21900 + }, + { + "epoch": 9.539418636240152, + "grad_norm": 29.6248779296875, + "learning_rate": 8.163174046470847e-06, + "loss": 12.7862, + "step": 21950 + }, + { + "epoch": 9.561151860907362, + "grad_norm": 37.52980422973633, + "learning_rate": 8.158790004384042e-06, + "loss": 12.6912, + "step": 22000 + }, + { + "epoch": 9.582885085574572, + "grad_norm": 35.345035552978516, + "learning_rate": 8.154405962297238e-06, + "loss": 12.677, + "step": 22050 + }, + { + "epoch": 9.604618310241783, + "grad_norm": 32.45883560180664, + "learning_rate": 8.150021920210435e-06, + "loss": 12.6662, + "step": 22100 + }, + { + "epoch": 9.626351534908991, + "grad_norm": 46.35236358642578, + "learning_rate": 8.14563787812363e-06, + "loss": 12.7472, + "step": 22150 + }, + { + "epoch": 9.648084759576202, + "grad_norm": 26.202049255371094, + "learning_rate": 8.141253836036826e-06, + "loss": 12.7174, + "step": 22200 + }, + { + "epoch": 9.669817984243412, + "grad_norm": 27.350576400756836, + "learning_rate": 8.136869793950023e-06, + "loss": 12.6917, + "step": 22250 + }, + { + "epoch": 9.691551208910623, + "grad_norm": 32.96540451049805, + "learning_rate": 8.132485751863218e-06, + "loss": 12.7865, + "step": 22300 + }, + { + "epoch": 9.713284433577833, + "grad_norm": 33.34325408935547, + "learning_rate": 8.128101709776415e-06, + "loss": 12.8177, + "step": 22350 + }, + { + "epoch": 9.735017658245042, + "grad_norm": 24.0529727935791, + "learning_rate": 8.12371766768961e-06, + "loss": 12.7816, + "step": 22400 + }, + { + "epoch": 9.756750882912252, + "grad_norm": 31.504335403442383, + "learning_rate": 8.119333625602806e-06, + "loss": 12.7014, + "step": 22450 + }, + { + "epoch": 9.778484107579462, + "grad_norm": 37.35165023803711, + "learning_rate": 8.114949583516001e-06, + "loss": 12.6674, + "step": 22500 + }, + { + "epoch": 9.800217332246673, + "grad_norm": 22.923002243041992, + "learning_rate": 8.110565541429199e-06, + "loss": 12.7619, + "step": 22550 + }, + { + "epoch": 9.821950556913881, + "grad_norm": 29.871366500854492, + "learning_rate": 8.106181499342396e-06, + "loss": 12.7368, + "step": 22600 + }, + { + "epoch": 9.843683781581092, + "grad_norm": 40.105369567871094, + "learning_rate": 8.101797457255591e-06, + "loss": 12.6962, + "step": 22650 + }, + { + "epoch": 9.865417006248302, + "grad_norm": 25.92096710205078, + "learning_rate": 8.097413415168786e-06, + "loss": 12.686, + "step": 22700 + }, + { + "epoch": 9.887150230915513, + "grad_norm": 42.663368225097656, + "learning_rate": 8.093029373081982e-06, + "loss": 12.7663, + "step": 22750 + }, + { + "epoch": 9.908883455582721, + "grad_norm": 30.958925247192383, + "learning_rate": 8.088645330995179e-06, + "loss": 12.7574, + "step": 22800 + }, + { + "epoch": 9.930616680249932, + "grad_norm": 32.973209381103516, + "learning_rate": 8.084261288908374e-06, + "loss": 12.7376, + "step": 22850 + }, + { + "epoch": 9.952349904917142, + "grad_norm": 24.848648071289062, + "learning_rate": 8.07987724682157e-06, + "loss": 12.7988, + "step": 22900 + }, + { + "epoch": 9.974083129584352, + "grad_norm": 38.90625762939453, + "learning_rate": 8.075493204734765e-06, + "loss": 12.7848, + "step": 22950 + }, + { + "epoch": 9.995816354251563, + "grad_norm": 169.55076599121094, + "learning_rate": 8.071109162647962e-06, + "loss": 12.7591, + "step": 23000 + }, + { + "epoch": 10.017386579733769, + "grad_norm": 25.580976486206055, + "learning_rate": 8.06672512056116e-06, + "loss": 12.4225, + "step": 23050 + }, + { + "epoch": 10.039119804400977, + "grad_norm": 35.71001434326172, + "learning_rate": 8.062341078474355e-06, + "loss": 12.6339, + "step": 23100 + }, + { + "epoch": 10.060853029068188, + "grad_norm": 27.853500366210938, + "learning_rate": 8.05795703638755e-06, + "loss": 12.5467, + "step": 23150 + }, + { + "epoch": 10.082586253735398, + "grad_norm": 25.689022064208984, + "learning_rate": 8.053572994300745e-06, + "loss": 12.6073, + "step": 23200 + }, + { + "epoch": 10.104319478402608, + "grad_norm": 19.449281692504883, + "learning_rate": 8.049188952213942e-06, + "loss": 12.65, + "step": 23250 + }, + { + "epoch": 10.126052703069817, + "grad_norm": 50.91756820678711, + "learning_rate": 8.044804910127138e-06, + "loss": 12.6245, + "step": 23300 + }, + { + "epoch": 10.147785927737027, + "grad_norm": 30.20039939880371, + "learning_rate": 8.040420868040333e-06, + "loss": 12.5309, + "step": 23350 + }, + { + "epoch": 10.169519152404238, + "grad_norm": 19.78704071044922, + "learning_rate": 8.036036825953529e-06, + "loss": 12.5593, + "step": 23400 + }, + { + "epoch": 10.191252377071448, + "grad_norm": 19.870885848999023, + "learning_rate": 8.031652783866726e-06, + "loss": 12.5285, + "step": 23450 + }, + { + "epoch": 10.212985601738659, + "grad_norm": 28.326723098754883, + "learning_rate": 8.027268741779923e-06, + "loss": 12.5193, + "step": 23500 + }, + { + "epoch": 10.234718826405867, + "grad_norm": 27.501436233520508, + "learning_rate": 8.022884699693118e-06, + "loss": 12.5663, + "step": 23550 + }, + { + "epoch": 10.256452051073078, + "grad_norm": 28.51038932800293, + "learning_rate": 8.018500657606314e-06, + "loss": 12.6105, + "step": 23600 + }, + { + "epoch": 10.278185275740288, + "grad_norm": 38.11888885498047, + "learning_rate": 8.014116615519509e-06, + "loss": 12.6369, + "step": 23650 + }, + { + "epoch": 10.299918500407498, + "grad_norm": 56.63121032714844, + "learning_rate": 8.009732573432706e-06, + "loss": 12.5986, + "step": 23700 + }, + { + "epoch": 10.321651725074709, + "grad_norm": 30.95232582092285, + "learning_rate": 8.005348531345901e-06, + "loss": 12.6104, + "step": 23750 + }, + { + "epoch": 10.343384949741917, + "grad_norm": 46.855831146240234, + "learning_rate": 8.000964489259098e-06, + "loss": 12.6277, + "step": 23800 + }, + { + "epoch": 10.365118174409128, + "grad_norm": 38.9176139831543, + "learning_rate": 7.996580447172292e-06, + "loss": 12.5793, + "step": 23850 + }, + { + "epoch": 10.386851399076338, + "grad_norm": 20.209339141845703, + "learning_rate": 7.99219640508549e-06, + "loss": 12.5781, + "step": 23900 + }, + { + "epoch": 10.408584623743549, + "grad_norm": 34.40525817871094, + "learning_rate": 7.987812362998686e-06, + "loss": 12.6018, + "step": 23950 + }, + { + "epoch": 10.430317848410757, + "grad_norm": 44.757041931152344, + "learning_rate": 7.983428320911882e-06, + "loss": 12.6543, + "step": 24000 + }, + { + "epoch": 10.452051073077968, + "grad_norm": 40.83699035644531, + "learning_rate": 7.979044278825077e-06, + "loss": 12.6135, + "step": 24050 + }, + { + "epoch": 10.473784297745178, + "grad_norm": 31.089038848876953, + "learning_rate": 7.974660236738272e-06, + "loss": 12.6269, + "step": 24100 + }, + { + "epoch": 10.495517522412388, + "grad_norm": 33.82300567626953, + "learning_rate": 7.97027619465147e-06, + "loss": 12.622, + "step": 24150 + }, + { + "epoch": 10.517250747079597, + "grad_norm": 25.88127899169922, + "learning_rate": 7.965892152564665e-06, + "loss": 12.6332, + "step": 24200 + }, + { + "epoch": 10.538983971746807, + "grad_norm": 29.95918083190918, + "learning_rate": 7.961508110477862e-06, + "loss": 12.6166, + "step": 24250 + }, + { + "epoch": 10.560717196414018, + "grad_norm": 34.399444580078125, + "learning_rate": 7.957124068391057e-06, + "loss": 12.5997, + "step": 24300 + }, + { + "epoch": 10.582450421081228, + "grad_norm": 26.007383346557617, + "learning_rate": 7.952740026304253e-06, + "loss": 12.5829, + "step": 24350 + }, + { + "epoch": 10.604183645748439, + "grad_norm": 14.864594459533691, + "learning_rate": 7.94835598421745e-06, + "loss": 12.6532, + "step": 24400 + }, + { + "epoch": 10.625916870415647, + "grad_norm": 31.178630828857422, + "learning_rate": 7.943971942130645e-06, + "loss": 12.5909, + "step": 24450 + }, + { + "epoch": 10.647650095082858, + "grad_norm": 31.065549850463867, + "learning_rate": 7.93958790004384e-06, + "loss": 12.5674, + "step": 24500 + }, + { + "epoch": 10.669383319750068, + "grad_norm": 28.21125030517578, + "learning_rate": 7.935203857957036e-06, + "loss": 12.6476, + "step": 24550 + }, + { + "epoch": 10.691116544417278, + "grad_norm": 31.474586486816406, + "learning_rate": 7.930819815870233e-06, + "loss": 12.5938, + "step": 24600 + }, + { + "epoch": 10.712849769084489, + "grad_norm": 26.097501754760742, + "learning_rate": 7.926435773783428e-06, + "loss": 12.6483, + "step": 24650 + }, + { + "epoch": 10.734582993751697, + "grad_norm": 40.45956039428711, + "learning_rate": 7.922051731696626e-06, + "loss": 12.6591, + "step": 24700 + }, + { + "epoch": 10.756316218418908, + "grad_norm": 23.737592697143555, + "learning_rate": 7.917667689609821e-06, + "loss": 12.5698, + "step": 24750 + }, + { + "epoch": 10.778049443086118, + "grad_norm": 32.13654708862305, + "learning_rate": 7.913283647523016e-06, + "loss": 12.5617, + "step": 24800 + }, + { + "epoch": 10.799782667753329, + "grad_norm": 28.451892852783203, + "learning_rate": 7.908899605436213e-06, + "loss": 12.6304, + "step": 24850 + }, + { + "epoch": 10.821515892420537, + "grad_norm": 37.13362121582031, + "learning_rate": 7.904515563349409e-06, + "loss": 12.6649, + "step": 24900 + }, + { + "epoch": 10.843249117087748, + "grad_norm": 45.161277770996094, + "learning_rate": 7.900131521262606e-06, + "loss": 12.6335, + "step": 24950 + }, + { + "epoch": 10.864982341754958, + "grad_norm": 25.36030387878418, + "learning_rate": 7.8957474791758e-06, + "loss": 12.7371, + "step": 25000 + }, + { + "epoch": 10.886715566422168, + "grad_norm": 38.44227981567383, + "learning_rate": 7.891363437088997e-06, + "loss": 12.6187, + "step": 25050 + }, + { + "epoch": 10.908448791089377, + "grad_norm": 46.692874908447266, + "learning_rate": 7.886979395002192e-06, + "loss": 12.6517, + "step": 25100 + }, + { + "epoch": 10.930182015756587, + "grad_norm": 28.845399856567383, + "learning_rate": 7.882595352915389e-06, + "loss": 12.5677, + "step": 25150 + }, + { + "epoch": 10.951915240423798, + "grad_norm": 31.64191436767578, + "learning_rate": 7.878211310828585e-06, + "loss": 12.6347, + "step": 25200 + }, + { + "epoch": 10.973648465091008, + "grad_norm": 32.57988357543945, + "learning_rate": 7.87382726874178e-06, + "loss": 12.5652, + "step": 25250 + }, + { + "epoch": 10.995381689758219, + "grad_norm": 28.151342391967773, + "learning_rate": 7.869443226654977e-06, + "loss": 12.5981, + "step": 25300 + }, + { + "epoch": 11.016951915240424, + "grad_norm": 29.2868595123291, + "learning_rate": 7.865059184568172e-06, + "loss": 12.4366, + "step": 25350 + }, + { + "epoch": 11.038685139907633, + "grad_norm": 31.722579956054688, + "learning_rate": 7.86067514248137e-06, + "loss": 12.4879, + "step": 25400 + }, + { + "epoch": 11.060418364574844, + "grad_norm": 29.232097625732422, + "learning_rate": 7.856291100394565e-06, + "loss": 12.4597, + "step": 25450 + }, + { + "epoch": 11.082151589242054, + "grad_norm": 18.49676513671875, + "learning_rate": 7.85190705830776e-06, + "loss": 12.4631, + "step": 25500 + }, + { + "epoch": 11.103884813909264, + "grad_norm": 27.89682388305664, + "learning_rate": 7.847523016220956e-06, + "loss": 12.4507, + "step": 25550 + }, + { + "epoch": 11.125618038576473, + "grad_norm": 30.45709800720215, + "learning_rate": 7.843138974134153e-06, + "loss": 12.5008, + "step": 25600 + }, + { + "epoch": 11.147351263243683, + "grad_norm": 62.570823669433594, + "learning_rate": 7.838754932047348e-06, + "loss": 12.5107, + "step": 25650 + }, + { + "epoch": 11.169084487910894, + "grad_norm": 24.397315979003906, + "learning_rate": 7.834370889960543e-06, + "loss": 12.5059, + "step": 25700 + }, + { + "epoch": 11.190817712578104, + "grad_norm": 18.074167251586914, + "learning_rate": 7.82998684787374e-06, + "loss": 12.5071, + "step": 25750 + }, + { + "epoch": 11.212550937245314, + "grad_norm": 20.450908660888672, + "learning_rate": 7.825602805786936e-06, + "loss": 12.5048, + "step": 25800 + }, + { + "epoch": 11.234284161912523, + "grad_norm": 19.00213623046875, + "learning_rate": 7.821218763700133e-06, + "loss": 12.4887, + "step": 25850 + }, + { + "epoch": 11.256017386579733, + "grad_norm": 23.276472091674805, + "learning_rate": 7.816834721613328e-06, + "loss": 12.5311, + "step": 25900 + }, + { + "epoch": 11.277750611246944, + "grad_norm": 33.67416763305664, + "learning_rate": 7.812450679526524e-06, + "loss": 12.5503, + "step": 25950 + }, + { + "epoch": 11.299483835914154, + "grad_norm": 17.561626434326172, + "learning_rate": 7.80806663743972e-06, + "loss": 12.4831, + "step": 26000 + }, + { + "epoch": 11.321217060581363, + "grad_norm": 24.35294532775879, + "learning_rate": 7.803682595352916e-06, + "loss": 12.4869, + "step": 26050 + }, + { + "epoch": 11.342950285248573, + "grad_norm": 16.80247688293457, + "learning_rate": 7.799298553266113e-06, + "loss": 12.5581, + "step": 26100 + }, + { + "epoch": 11.364683509915784, + "grad_norm": 22.540014266967773, + "learning_rate": 7.794914511179307e-06, + "loss": 12.5552, + "step": 26150 + }, + { + "epoch": 11.386416734582994, + "grad_norm": 23.270639419555664, + "learning_rate": 7.790530469092504e-06, + "loss": 12.5005, + "step": 26200 + }, + { + "epoch": 11.408149959250204, + "grad_norm": 27.789560317993164, + "learning_rate": 7.7861464270057e-06, + "loss": 12.5405, + "step": 26250 + }, + { + "epoch": 11.429883183917413, + "grad_norm": 24.1334285736084, + "learning_rate": 7.781762384918897e-06, + "loss": 12.5056, + "step": 26300 + }, + { + "epoch": 11.451616408584623, + "grad_norm": 35.342288970947266, + "learning_rate": 7.777378342832092e-06, + "loss": 12.501, + "step": 26350 + }, + { + "epoch": 11.473349633251834, + "grad_norm": 27.646997451782227, + "learning_rate": 7.772994300745287e-06, + "loss": 12.4571, + "step": 26400 + }, + { + "epoch": 11.495082857919044, + "grad_norm": 43.06098937988281, + "learning_rate": 7.768610258658483e-06, + "loss": 12.5856, + "step": 26450 + }, + { + "epoch": 11.516816082586253, + "grad_norm": 21.487150192260742, + "learning_rate": 7.76422621657168e-06, + "loss": 12.4849, + "step": 26500 + }, + { + "epoch": 11.538549307253463, + "grad_norm": 21.75229835510254, + "learning_rate": 7.759842174484877e-06, + "loss": 12.5192, + "step": 26550 + }, + { + "epoch": 11.560282531920674, + "grad_norm": 23.02396011352539, + "learning_rate": 7.755458132398072e-06, + "loss": 12.5011, + "step": 26600 + }, + { + "epoch": 11.582015756587884, + "grad_norm": 21.738445281982422, + "learning_rate": 7.751074090311268e-06, + "loss": 12.5525, + "step": 26650 + }, + { + "epoch": 11.603748981255094, + "grad_norm": 38.93478775024414, + "learning_rate": 7.746690048224463e-06, + "loss": 12.4925, + "step": 26700 + }, + { + "epoch": 11.625482205922303, + "grad_norm": 30.070697784423828, + "learning_rate": 7.74230600613766e-06, + "loss": 12.5598, + "step": 26750 + }, + { + "epoch": 11.647215430589513, + "grad_norm": 44.55253982543945, + "learning_rate": 7.737921964050856e-06, + "loss": 12.4896, + "step": 26800 + }, + { + "epoch": 11.668948655256724, + "grad_norm": 23.052288055419922, + "learning_rate": 7.733537921964051e-06, + "loss": 12.5198, + "step": 26850 + }, + { + "epoch": 11.690681879923934, + "grad_norm": 24.383729934692383, + "learning_rate": 7.729153879877246e-06, + "loss": 12.5321, + "step": 26900 + }, + { + "epoch": 11.712415104591145, + "grad_norm": 23.777788162231445, + "learning_rate": 7.724769837790443e-06, + "loss": 12.5403, + "step": 26950 + }, + { + "epoch": 11.734148329258353, + "grad_norm": 22.8085994720459, + "learning_rate": 7.72038579570364e-06, + "loss": 12.5297, + "step": 27000 + }, + { + "epoch": 11.755881553925564, + "grad_norm": 28.690683364868164, + "learning_rate": 7.716001753616836e-06, + "loss": 12.5626, + "step": 27050 + }, + { + "epoch": 11.777614778592774, + "grad_norm": 23.2988338470459, + "learning_rate": 7.711617711530031e-06, + "loss": 12.4646, + "step": 27100 + }, + { + "epoch": 11.799348003259984, + "grad_norm": 24.85117530822754, + "learning_rate": 7.707233669443227e-06, + "loss": 12.4886, + "step": 27150 + }, + { + "epoch": 11.821081227927193, + "grad_norm": 34.84917449951172, + "learning_rate": 7.702849627356424e-06, + "loss": 12.578, + "step": 27200 + }, + { + "epoch": 11.842814452594403, + "grad_norm": 27.57342529296875, + "learning_rate": 7.698465585269619e-06, + "loss": 12.5423, + "step": 27250 + }, + { + "epoch": 11.864547677261614, + "grad_norm": 21.665023803710938, + "learning_rate": 7.694081543182815e-06, + "loss": 12.4848, + "step": 27300 + }, + { + "epoch": 11.886280901928824, + "grad_norm": 20.787555694580078, + "learning_rate": 7.68969750109601e-06, + "loss": 12.4976, + "step": 27350 + }, + { + "epoch": 11.908014126596033, + "grad_norm": 42.406837463378906, + "learning_rate": 7.685313459009207e-06, + "loss": 12.5549, + "step": 27400 + }, + { + "epoch": 11.929747351263243, + "grad_norm": 23.60106658935547, + "learning_rate": 7.680929416922404e-06, + "loss": 12.5492, + "step": 27450 + }, + { + "epoch": 11.951480575930454, + "grad_norm": 21.591079711914062, + "learning_rate": 7.6765453748356e-06, + "loss": 12.5018, + "step": 27500 + }, + { + "epoch": 11.973213800597664, + "grad_norm": 32.685333251953125, + "learning_rate": 7.672161332748795e-06, + "loss": 12.5378, + "step": 27550 + }, + { + "epoch": 11.994947025264874, + "grad_norm": 26.88076400756836, + "learning_rate": 7.66777729066199e-06, + "loss": 12.5529, + "step": 27600 + }, + { + "epoch": 12.01651725074708, + "grad_norm": 19.660898208618164, + "learning_rate": 7.663393248575187e-06, + "loss": 12.3944, + "step": 27650 + }, + { + "epoch": 12.038250475414289, + "grad_norm": 36.72605514526367, + "learning_rate": 7.659009206488383e-06, + "loss": 12.359, + "step": 27700 + }, + { + "epoch": 12.0599837000815, + "grad_norm": 27.864477157592773, + "learning_rate": 7.65462516440158e-06, + "loss": 12.3951, + "step": 27750 + }, + { + "epoch": 12.08171692474871, + "grad_norm": 34.72395324707031, + "learning_rate": 7.650241122314775e-06, + "loss": 12.4259, + "step": 27800 + }, + { + "epoch": 12.10345014941592, + "grad_norm": 20.68131446838379, + "learning_rate": 7.64585708022797e-06, + "loss": 12.4737, + "step": 27850 + }, + { + "epoch": 12.125183374083129, + "grad_norm": 27.369903564453125, + "learning_rate": 7.641473038141168e-06, + "loss": 12.4838, + "step": 27900 + }, + { + "epoch": 12.14691659875034, + "grad_norm": 14.568199157714844, + "learning_rate": 7.637088996054363e-06, + "loss": 12.3812, + "step": 27950 + }, + { + "epoch": 12.16864982341755, + "grad_norm": 20.099998474121094, + "learning_rate": 7.632704953967558e-06, + "loss": 12.4168, + "step": 28000 + }, + { + "epoch": 12.19038304808476, + "grad_norm": 21.41561508178711, + "learning_rate": 7.628320911880755e-06, + "loss": 12.3799, + "step": 28050 + }, + { + "epoch": 12.21211627275197, + "grad_norm": 23.49574851989746, + "learning_rate": 7.623936869793951e-06, + "loss": 12.4527, + "step": 28100 + }, + { + "epoch": 12.233849497419179, + "grad_norm": 30.164730072021484, + "learning_rate": 7.619552827707146e-06, + "loss": 12.4353, + "step": 28150 + }, + { + "epoch": 12.25558272208639, + "grad_norm": 32.27763748168945, + "learning_rate": 7.6151687856203425e-06, + "loss": 12.4808, + "step": 28200 + }, + { + "epoch": 12.2773159467536, + "grad_norm": 36.46564483642578, + "learning_rate": 7.610784743533538e-06, + "loss": 12.3987, + "step": 28250 + }, + { + "epoch": 12.29904917142081, + "grad_norm": 19.888980865478516, + "learning_rate": 7.606400701446734e-06, + "loss": 12.3889, + "step": 28300 + }, + { + "epoch": 12.320782396088019, + "grad_norm": 20.877737045288086, + "learning_rate": 7.602016659359931e-06, + "loss": 12.4598, + "step": 28350 + }, + { + "epoch": 12.34251562075523, + "grad_norm": 20.208404541015625, + "learning_rate": 7.5976326172731266e-06, + "loss": 12.3682, + "step": 28400 + }, + { + "epoch": 12.36424884542244, + "grad_norm": 48.63652801513672, + "learning_rate": 7.593248575186323e-06, + "loss": 12.3874, + "step": 28450 + }, + { + "epoch": 12.38598207008965, + "grad_norm": 23.263282775878906, + "learning_rate": 7.588864533099518e-06, + "loss": 12.4161, + "step": 28500 + }, + { + "epoch": 12.40771529475686, + "grad_norm": 23.76000213623047, + "learning_rate": 7.5844804910127144e-06, + "loss": 12.4247, + "step": 28550 + }, + { + "epoch": 12.429448519424069, + "grad_norm": 62.45661544799805, + "learning_rate": 7.58009644892591e-06, + "loss": 12.4226, + "step": 28600 + }, + { + "epoch": 12.45118174409128, + "grad_norm": 33.05659484863281, + "learning_rate": 7.575712406839106e-06, + "loss": 12.493, + "step": 28650 + }, + { + "epoch": 12.47291496875849, + "grad_norm": 23.853660583496094, + "learning_rate": 7.5713283647523014e-06, + "loss": 12.4388, + "step": 28700 + }, + { + "epoch": 12.4946481934257, + "grad_norm": 30.970672607421875, + "learning_rate": 7.5669443226654985e-06, + "loss": 12.4721, + "step": 28750 + }, + { + "epoch": 12.516381418092909, + "grad_norm": 20.660356521606445, + "learning_rate": 7.562560280578695e-06, + "loss": 12.4463, + "step": 28800 + }, + { + "epoch": 12.538114642760119, + "grad_norm": 27.25446319580078, + "learning_rate": 7.55817623849189e-06, + "loss": 12.4359, + "step": 28850 + }, + { + "epoch": 12.55984786742733, + "grad_norm": 19.96375274658203, + "learning_rate": 7.553792196405086e-06, + "loss": 12.4274, + "step": 28900 + }, + { + "epoch": 12.58158109209454, + "grad_norm": 25.133895874023438, + "learning_rate": 7.549408154318282e-06, + "loss": 12.4288, + "step": 28950 + }, + { + "epoch": 12.60331431676175, + "grad_norm": 55.64627456665039, + "learning_rate": 7.545024112231478e-06, + "loss": 12.4451, + "step": 29000 + }, + { + "epoch": 12.625047541428959, + "grad_norm": 70.62721252441406, + "learning_rate": 7.540640070144673e-06, + "loss": 12.451, + "step": 29050 + }, + { + "epoch": 12.64678076609617, + "grad_norm": 22.789186477661133, + "learning_rate": 7.5362560280578705e-06, + "loss": 12.43, + "step": 29100 + }, + { + "epoch": 12.66851399076338, + "grad_norm": 25.138248443603516, + "learning_rate": 7.531871985971065e-06, + "loss": 12.4, + "step": 29150 + }, + { + "epoch": 12.69024721543059, + "grad_norm": 18.74398422241211, + "learning_rate": 7.527487943884262e-06, + "loss": 12.4338, + "step": 29200 + }, + { + "epoch": 12.711980440097799, + "grad_norm": 28.796159744262695, + "learning_rate": 7.523103901797458e-06, + "loss": 12.473, + "step": 29250 + }, + { + "epoch": 12.733713664765009, + "grad_norm": 28.044872283935547, + "learning_rate": 7.518719859710654e-06, + "loss": 12.4155, + "step": 29300 + }, + { + "epoch": 12.75544688943222, + "grad_norm": 21.100650787353516, + "learning_rate": 7.51433581762385e-06, + "loss": 12.4329, + "step": 29350 + }, + { + "epoch": 12.77718011409943, + "grad_norm": 24.12652015686035, + "learning_rate": 7.509951775537045e-06, + "loss": 12.4504, + "step": 29400 + }, + { + "epoch": 12.79891333876664, + "grad_norm": 18.889480590820312, + "learning_rate": 7.5055677334502416e-06, + "loss": 12.3981, + "step": 29450 + }, + { + "epoch": 12.820646563433849, + "grad_norm": 20.395387649536133, + "learning_rate": 7.501183691363437e-06, + "loss": 12.4834, + "step": 29500 + }, + { + "epoch": 12.84237978810106, + "grad_norm": 34.01985168457031, + "learning_rate": 7.496799649276634e-06, + "loss": 12.4485, + "step": 29550 + }, + { + "epoch": 12.86411301276827, + "grad_norm": 36.57313537597656, + "learning_rate": 7.49241560718983e-06, + "loss": 12.5063, + "step": 29600 + }, + { + "epoch": 12.88584623743548, + "grad_norm": 21.946285247802734, + "learning_rate": 7.488031565103026e-06, + "loss": 12.501, + "step": 29650 + }, + { + "epoch": 12.907579462102689, + "grad_norm": 26.948814392089844, + "learning_rate": 7.483647523016222e-06, + "loss": 12.5122, + "step": 29700 + }, + { + "epoch": 12.929312686769899, + "grad_norm": 31.4482364654541, + "learning_rate": 7.479263480929417e-06, + "loss": 12.4543, + "step": 29750 + }, + { + "epoch": 12.95104591143711, + "grad_norm": 35.19594192504883, + "learning_rate": 7.4748794388426135e-06, + "loss": 12.4657, + "step": 29800 + }, + { + "epoch": 12.97277913610432, + "grad_norm": 23.498001098632812, + "learning_rate": 7.470495396755809e-06, + "loss": 12.4462, + "step": 29850 + }, + { + "epoch": 12.99451236077153, + "grad_norm": 48.50201416015625, + "learning_rate": 7.466111354669006e-06, + "loss": 12.4134, + "step": 29900 + }, + { + "epoch": 13.016082586253736, + "grad_norm": 25.189435958862305, + "learning_rate": 7.461727312582201e-06, + "loss": 12.3134, + "step": 29950 + }, + { + "epoch": 13.037815810920945, + "grad_norm": 21.985063552856445, + "learning_rate": 7.457343270495398e-06, + "loss": 12.3299, + "step": 30000 + }, + { + "epoch": 13.037815810920945, + "eval_cer": 0.0770617061459272, + "eval_loss": 2.334705352783203, + "eval_runtime": 399.4375, + "eval_samples_per_second": 13.534, + "eval_steps_per_second": 3.385, + "eval_wer": 0.23019312293923694, + "step": 30000 + }, + { + "epoch": 13.059549035588155, + "grad_norm": 15.290221214294434, + "learning_rate": 7.452959228408594e-06, + "loss": 12.3108, + "step": 30050 + }, + { + "epoch": 13.081282260255366, + "grad_norm": 26.75568389892578, + "learning_rate": 7.448575186321789e-06, + "loss": 12.3347, + "step": 30100 + }, + { + "epoch": 13.103015484922576, + "grad_norm": 28.02945327758789, + "learning_rate": 7.4441911442349854e-06, + "loss": 12.3172, + "step": 30150 + }, + { + "epoch": 13.124748709589785, + "grad_norm": 17.39537811279297, + "learning_rate": 7.439807102148181e-06, + "loss": 12.3004, + "step": 30200 + }, + { + "epoch": 13.146481934256995, + "grad_norm": 21.168519973754883, + "learning_rate": 7.435423060061377e-06, + "loss": 12.3882, + "step": 30250 + }, + { + "epoch": 13.168215158924205, + "grad_norm": 24.02804946899414, + "learning_rate": 7.4310390179745725e-06, + "loss": 12.3512, + "step": 30300 + }, + { + "epoch": 13.189948383591416, + "grad_norm": 25.33257484436035, + "learning_rate": 7.4266549758877695e-06, + "loss": 12.3121, + "step": 30350 + }, + { + "epoch": 13.211681608258626, + "grad_norm": 20.40574073791504, + "learning_rate": 7.422270933800965e-06, + "loss": 12.3737, + "step": 30400 + }, + { + "epoch": 13.233414832925835, + "grad_norm": 25.527008056640625, + "learning_rate": 7.417886891714161e-06, + "loss": 12.3575, + "step": 30450 + }, + { + "epoch": 13.255148057593045, + "grad_norm": 23.7490291595459, + "learning_rate": 7.413502849627357e-06, + "loss": 12.3835, + "step": 30500 + }, + { + "epoch": 13.276881282260256, + "grad_norm": 23.39885139465332, + "learning_rate": 7.409118807540553e-06, + "loss": 12.3056, + "step": 30550 + }, + { + "epoch": 13.298614506927466, + "grad_norm": 21.89725112915039, + "learning_rate": 7.404734765453749e-06, + "loss": 12.3262, + "step": 30600 + }, + { + "epoch": 13.320347731594675, + "grad_norm": 20.838117599487305, + "learning_rate": 7.400350723366944e-06, + "loss": 12.3879, + "step": 30650 + }, + { + "epoch": 13.342080956261885, + "grad_norm": 17.388107299804688, + "learning_rate": 7.3959666812801415e-06, + "loss": 12.3611, + "step": 30700 + }, + { + "epoch": 13.363814180929095, + "grad_norm": 19.158178329467773, + "learning_rate": 7.391582639193337e-06, + "loss": 12.3782, + "step": 30750 + }, + { + "epoch": 13.385547405596306, + "grad_norm": 28.794353485107422, + "learning_rate": 7.387198597106533e-06, + "loss": 12.4156, + "step": 30800 + }, + { + "epoch": 13.407280630263516, + "grad_norm": 24.086498260498047, + "learning_rate": 7.3828145550197285e-06, + "loss": 12.3903, + "step": 30850 + }, + { + "epoch": 13.429013854930725, + "grad_norm": 24.688875198364258, + "learning_rate": 7.378430512932925e-06, + "loss": 12.3829, + "step": 30900 + }, + { + "epoch": 13.450747079597935, + "grad_norm": 54.69606018066406, + "learning_rate": 7.374046470846121e-06, + "loss": 12.3398, + "step": 30950 + }, + { + "epoch": 13.472480304265146, + "grad_norm": 25.10434341430664, + "learning_rate": 7.369662428759316e-06, + "loss": 12.3753, + "step": 31000 + }, + { + "epoch": 13.494213528932356, + "grad_norm": 35.44208908081055, + "learning_rate": 7.3652783866725134e-06, + "loss": 12.3937, + "step": 31050 + }, + { + "epoch": 13.515946753599565, + "grad_norm": 19.743236541748047, + "learning_rate": 7.360894344585709e-06, + "loss": 12.439, + "step": 31100 + }, + { + "epoch": 13.537679978266775, + "grad_norm": 29.914348602294922, + "learning_rate": 7.356510302498905e-06, + "loss": 12.3594, + "step": 31150 + }, + { + "epoch": 13.559413202933985, + "grad_norm": 105.84856414794922, + "learning_rate": 7.3521262604121004e-06, + "loss": 12.352, + "step": 31200 + }, + { + "epoch": 13.581146427601196, + "grad_norm": 23.331436157226562, + "learning_rate": 7.347742218325297e-06, + "loss": 12.355, + "step": 31250 + }, + { + "epoch": 13.602879652268406, + "grad_norm": 18.46331214904785, + "learning_rate": 7.343358176238492e-06, + "loss": 12.3481, + "step": 31300 + }, + { + "epoch": 13.624612876935615, + "grad_norm": 22.384254455566406, + "learning_rate": 7.338974134151688e-06, + "loss": 12.4047, + "step": 31350 + }, + { + "epoch": 13.646346101602825, + "grad_norm": 34.16387176513672, + "learning_rate": 7.3345900920648845e-06, + "loss": 12.4131, + "step": 31400 + }, + { + "epoch": 13.668079326270036, + "grad_norm": 59.95965576171875, + "learning_rate": 7.33020604997808e-06, + "loss": 12.3539, + "step": 31450 + }, + { + "epoch": 13.689812550937246, + "grad_norm": 21.647342681884766, + "learning_rate": 7.325822007891277e-06, + "loss": 12.3936, + "step": 31500 + }, + { + "epoch": 13.711545775604455, + "grad_norm": 20.892303466796875, + "learning_rate": 7.321437965804472e-06, + "loss": 12.4042, + "step": 31550 + }, + { + "epoch": 13.733279000271665, + "grad_norm": 25.085771560668945, + "learning_rate": 7.317053923717669e-06, + "loss": 12.4563, + "step": 31600 + }, + { + "epoch": 13.755012224938875, + "grad_norm": 29.819766998291016, + "learning_rate": 7.312669881630864e-06, + "loss": 12.3417, + "step": 31650 + }, + { + "epoch": 13.776745449606086, + "grad_norm": 23.446327209472656, + "learning_rate": 7.30828583954406e-06, + "loss": 12.4085, + "step": 31700 + }, + { + "epoch": 13.798478674273296, + "grad_norm": 30.441680908203125, + "learning_rate": 7.303901797457256e-06, + "loss": 12.3269, + "step": 31750 + }, + { + "epoch": 13.820211898940505, + "grad_norm": 46.045162200927734, + "learning_rate": 7.299517755370452e-06, + "loss": 12.3459, + "step": 31800 + }, + { + "epoch": 13.841945123607715, + "grad_norm": 20.486669540405273, + "learning_rate": 7.295133713283649e-06, + "loss": 12.4457, + "step": 31850 + }, + { + "epoch": 13.863678348274926, + "grad_norm": 18.060197830200195, + "learning_rate": 7.290749671196844e-06, + "loss": 12.4123, + "step": 31900 + }, + { + "epoch": 13.885411572942136, + "grad_norm": 29.656959533691406, + "learning_rate": 7.2863656291100406e-06, + "loss": 12.3888, + "step": 31950 + }, + { + "epoch": 13.907144797609345, + "grad_norm": 16.68509864807129, + "learning_rate": 7.281981587023236e-06, + "loss": 12.3878, + "step": 32000 + }, + { + "epoch": 13.928878022276555, + "grad_norm": 27.963064193725586, + "learning_rate": 7.277597544936432e-06, + "loss": 12.3907, + "step": 32050 + }, + { + "epoch": 13.950611246943765, + "grad_norm": 27.46925163269043, + "learning_rate": 7.2732135028496276e-06, + "loss": 12.4483, + "step": 32100 + }, + { + "epoch": 13.972344471610976, + "grad_norm": 23.631675720214844, + "learning_rate": 7.268829460762824e-06, + "loss": 12.4326, + "step": 32150 + }, + { + "epoch": 13.994077696278186, + "grad_norm": 44.30888748168945, + "learning_rate": 7.264445418676019e-06, + "loss": 12.4026, + "step": 32200 + }, + { + "epoch": 14.015647921760392, + "grad_norm": 17.614269256591797, + "learning_rate": 7.260061376589215e-06, + "loss": 12.2351, + "step": 32250 + }, + { + "epoch": 14.0373811464276, + "grad_norm": 16.82352638244629, + "learning_rate": 7.2556773345024125e-06, + "loss": 12.2887, + "step": 32300 + }, + { + "epoch": 14.059114371094811, + "grad_norm": 22.863889694213867, + "learning_rate": 7.251293292415608e-06, + "loss": 12.2874, + "step": 32350 + }, + { + "epoch": 14.080847595762021, + "grad_norm": 22.543703079223633, + "learning_rate": 7.246909250328804e-06, + "loss": 12.2726, + "step": 32400 + }, + { + "epoch": 14.102580820429232, + "grad_norm": 19.95811653137207, + "learning_rate": 7.2425252082419995e-06, + "loss": 12.2948, + "step": 32450 + }, + { + "epoch": 14.12431404509644, + "grad_norm": 11.412972450256348, + "learning_rate": 7.238141166155196e-06, + "loss": 12.2971, + "step": 32500 + }, + { + "epoch": 14.14604726976365, + "grad_norm": 30.869230270385742, + "learning_rate": 7.233757124068391e-06, + "loss": 12.3222, + "step": 32550 + }, + { + "epoch": 14.167780494430861, + "grad_norm": 37.976741790771484, + "learning_rate": 7.229373081981587e-06, + "loss": 12.3079, + "step": 32600 + }, + { + "epoch": 14.189513719098072, + "grad_norm": 23.526809692382812, + "learning_rate": 7.224989039894783e-06, + "loss": 12.3085, + "step": 32650 + }, + { + "epoch": 14.211246943765282, + "grad_norm": 19.888294219970703, + "learning_rate": 7.22060499780798e-06, + "loss": 12.3141, + "step": 32700 + }, + { + "epoch": 14.23298016843249, + "grad_norm": 16.727022171020508, + "learning_rate": 7.216220955721176e-06, + "loss": 12.275, + "step": 32750 + }, + { + "epoch": 14.254713393099701, + "grad_norm": 14.18730640411377, + "learning_rate": 7.2118369136343715e-06, + "loss": 12.3144, + "step": 32800 + }, + { + "epoch": 14.276446617766911, + "grad_norm": 20.451278686523438, + "learning_rate": 7.207452871547568e-06, + "loss": 12.2727, + "step": 32850 + }, + { + "epoch": 14.298179842434122, + "grad_norm": 16.769447326660156, + "learning_rate": 7.203068829460763e-06, + "loss": 12.3384, + "step": 32900 + }, + { + "epoch": 14.31991306710133, + "grad_norm": 27.05632781982422, + "learning_rate": 7.198684787373959e-06, + "loss": 12.3492, + "step": 32950 + }, + { + "epoch": 14.34164629176854, + "grad_norm": 38.1939582824707, + "learning_rate": 7.194300745287155e-06, + "loss": 12.284, + "step": 33000 + }, + { + "epoch": 14.363379516435751, + "grad_norm": 32.06970977783203, + "learning_rate": 7.189916703200352e-06, + "loss": 12.3158, + "step": 33050 + }, + { + "epoch": 14.385112741102962, + "grad_norm": 25.079200744628906, + "learning_rate": 7.185532661113547e-06, + "loss": 12.3486, + "step": 33100 + }, + { + "epoch": 14.406845965770172, + "grad_norm": 21.099042892456055, + "learning_rate": 7.181148619026743e-06, + "loss": 12.2961, + "step": 33150 + }, + { + "epoch": 14.42857919043738, + "grad_norm": 18.112712860107422, + "learning_rate": 7.17676457693994e-06, + "loss": 12.2852, + "step": 33200 + }, + { + "epoch": 14.450312415104591, + "grad_norm": 18.887737274169922, + "learning_rate": 7.172380534853135e-06, + "loss": 12.276, + "step": 33250 + }, + { + "epoch": 14.472045639771801, + "grad_norm": 21.17413902282715, + "learning_rate": 7.167996492766331e-06, + "loss": 12.3109, + "step": 33300 + }, + { + "epoch": 14.493778864439012, + "grad_norm": 67.79141998291016, + "learning_rate": 7.163612450679527e-06, + "loss": 12.3326, + "step": 33350 + }, + { + "epoch": 14.51551208910622, + "grad_norm": 18.88022232055664, + "learning_rate": 7.159228408592723e-06, + "loss": 12.355, + "step": 33400 + }, + { + "epoch": 14.53724531377343, + "grad_norm": 14.09670639038086, + "learning_rate": 7.154844366505918e-06, + "loss": 12.3049, + "step": 33450 + }, + { + "epoch": 14.558978538440641, + "grad_norm": 22.51435661315918, + "learning_rate": 7.150460324419115e-06, + "loss": 12.3043, + "step": 33500 + }, + { + "epoch": 14.580711763107852, + "grad_norm": 20.429990768432617, + "learning_rate": 7.146076282332311e-06, + "loss": 12.3029, + "step": 33550 + }, + { + "epoch": 14.602444987775062, + "grad_norm": 27.559160232543945, + "learning_rate": 7.141692240245507e-06, + "loss": 12.3017, + "step": 33600 + }, + { + "epoch": 14.62417821244227, + "grad_norm": 26.29608917236328, + "learning_rate": 7.137308198158703e-06, + "loss": 12.347, + "step": 33650 + }, + { + "epoch": 14.645911437109481, + "grad_norm": 12.279489517211914, + "learning_rate": 7.132924156071899e-06, + "loss": 12.2923, + "step": 33700 + }, + { + "epoch": 14.667644661776691, + "grad_norm": 19.162981033325195, + "learning_rate": 7.128540113985095e-06, + "loss": 12.3289, + "step": 33750 + }, + { + "epoch": 14.689377886443902, + "grad_norm": 19.87074089050293, + "learning_rate": 7.12415607189829e-06, + "loss": 12.2862, + "step": 33800 + }, + { + "epoch": 14.71111111111111, + "grad_norm": 22.431442260742188, + "learning_rate": 7.119772029811487e-06, + "loss": 12.3116, + "step": 33850 + }, + { + "epoch": 14.73284433577832, + "grad_norm": 16.823158264160156, + "learning_rate": 7.115387987724683e-06, + "loss": 12.3284, + "step": 33900 + }, + { + "epoch": 14.754577560445531, + "grad_norm": 26.60719108581543, + "learning_rate": 7.111003945637879e-06, + "loss": 12.2962, + "step": 33950 + }, + { + "epoch": 14.776310785112742, + "grad_norm": 20.54785919189453, + "learning_rate": 7.106619903551074e-06, + "loss": 12.2979, + "step": 34000 + }, + { + "epoch": 14.79804400977995, + "grad_norm": 26.004840850830078, + "learning_rate": 7.1022358614642705e-06, + "loss": 12.3252, + "step": 34050 + }, + { + "epoch": 14.81977723444716, + "grad_norm": 21.51180648803711, + "learning_rate": 7.097851819377467e-06, + "loss": 12.2844, + "step": 34100 + }, + { + "epoch": 14.841510459114371, + "grad_norm": 21.89017105102539, + "learning_rate": 7.093467777290662e-06, + "loss": 12.3084, + "step": 34150 + }, + { + "epoch": 14.863243683781581, + "grad_norm": 23.298505783081055, + "learning_rate": 7.089083735203859e-06, + "loss": 12.3825, + "step": 34200 + }, + { + "epoch": 14.884976908448792, + "grad_norm": 21.83428382873535, + "learning_rate": 7.084699693117054e-06, + "loss": 12.2981, + "step": 34250 + }, + { + "epoch": 14.906710133116, + "grad_norm": 26.309865951538086, + "learning_rate": 7.080315651030251e-06, + "loss": 12.2713, + "step": 34300 + }, + { + "epoch": 14.92844335778321, + "grad_norm": 28.134078979492188, + "learning_rate": 7.075931608943446e-06, + "loss": 12.311, + "step": 34350 + }, + { + "epoch": 14.950176582450421, + "grad_norm": 25.938369750976562, + "learning_rate": 7.0715475668566425e-06, + "loss": 12.2782, + "step": 34400 + }, + { + "epoch": 14.971909807117632, + "grad_norm": 25.179311752319336, + "learning_rate": 7.067163524769839e-06, + "loss": 12.3384, + "step": 34450 + }, + { + "epoch": 14.993643031784842, + "grad_norm": 18.602447509765625, + "learning_rate": 7.062779482683034e-06, + "loss": 12.3125, + "step": 34500 + }, + { + "epoch": 15.015213257267048, + "grad_norm": 23.754281997680664, + "learning_rate": 7.05839544059623e-06, + "loss": 12.1766, + "step": 34550 + }, + { + "epoch": 15.036946481934256, + "grad_norm": 22.74106788635254, + "learning_rate": 7.054011398509426e-06, + "loss": 12.228, + "step": 34600 + }, + { + "epoch": 15.058679706601467, + "grad_norm": 35.1696662902832, + "learning_rate": 7.049627356422623e-06, + "loss": 12.2026, + "step": 34650 + }, + { + "epoch": 15.080412931268677, + "grad_norm": 20.032005310058594, + "learning_rate": 7.045243314335818e-06, + "loss": 12.2443, + "step": 34700 + }, + { + "epoch": 15.102146155935888, + "grad_norm": 30.315168380737305, + "learning_rate": 7.040859272249014e-06, + "loss": 12.221, + "step": 34750 + }, + { + "epoch": 15.123879380603096, + "grad_norm": 15.685395240783691, + "learning_rate": 7.03647523016221e-06, + "loss": 12.2394, + "step": 34800 + }, + { + "epoch": 15.145612605270307, + "grad_norm": 24.66408348083496, + "learning_rate": 7.032091188075406e-06, + "loss": 12.1932, + "step": 34850 + }, + { + "epoch": 15.167345829937517, + "grad_norm": 20.8659725189209, + "learning_rate": 7.027707145988602e-06, + "loss": 12.2888, + "step": 34900 + }, + { + "epoch": 15.189079054604727, + "grad_norm": 25.82394027709961, + "learning_rate": 7.023323103901798e-06, + "loss": 12.1971, + "step": 34950 + }, + { + "epoch": 15.210812279271938, + "grad_norm": 17.442481994628906, + "learning_rate": 7.018939061814995e-06, + "loss": 12.2246, + "step": 35000 + }, + { + "epoch": 15.232545503939146, + "grad_norm": 16.10444450378418, + "learning_rate": 7.01455501972819e-06, + "loss": 12.2378, + "step": 35050 + }, + { + "epoch": 15.254278728606357, + "grad_norm": 20.300018310546875, + "learning_rate": 7.010170977641386e-06, + "loss": 12.2806, + "step": 35100 + }, + { + "epoch": 15.276011953273567, + "grad_norm": 30.641281127929688, + "learning_rate": 7.005786935554582e-06, + "loss": 12.2608, + "step": 35150 + }, + { + "epoch": 15.297745177940778, + "grad_norm": 36.21476745605469, + "learning_rate": 7.001402893467778e-06, + "loss": 12.2502, + "step": 35200 + }, + { + "epoch": 15.319478402607986, + "grad_norm": 31.640207290649414, + "learning_rate": 6.997018851380973e-06, + "loss": 12.2664, + "step": 35250 + }, + { + "epoch": 15.341211627275197, + "grad_norm": 14.65031623840332, + "learning_rate": 6.99263480929417e-06, + "loss": 12.2727, + "step": 35300 + }, + { + "epoch": 15.362944851942407, + "grad_norm": 15.896147727966309, + "learning_rate": 6.988250767207367e-06, + "loss": 12.2335, + "step": 35350 + }, + { + "epoch": 15.384678076609617, + "grad_norm": 27.60741424560547, + "learning_rate": 6.983866725120561e-06, + "loss": 12.241, + "step": 35400 + }, + { + "epoch": 15.406411301276828, + "grad_norm": 27.842981338500977, + "learning_rate": 6.979482683033758e-06, + "loss": 12.299, + "step": 35450 + }, + { + "epoch": 15.428144525944036, + "grad_norm": 14.332504272460938, + "learning_rate": 6.975098640946954e-06, + "loss": 12.2547, + "step": 35500 + }, + { + "epoch": 15.449877750611247, + "grad_norm": 13.6268310546875, + "learning_rate": 6.97071459886015e-06, + "loss": 12.2764, + "step": 35550 + }, + { + "epoch": 15.471610975278457, + "grad_norm": 27.122060775756836, + "learning_rate": 6.966330556773345e-06, + "loss": 12.2568, + "step": 35600 + }, + { + "epoch": 15.493344199945668, + "grad_norm": NaN, + "learning_rate": 6.9619465146865415e-06, + "loss": 12.2716, + "step": 35650 + }, + { + "epoch": 15.515077424612876, + "grad_norm": 16.30205726623535, + "learning_rate": 6.957562472599737e-06, + "loss": 12.1952, + "step": 35700 + }, + { + "epoch": 15.536810649280087, + "grad_norm": 17.126123428344727, + "learning_rate": 6.953178430512933e-06, + "loss": 12.2365, + "step": 35750 + }, + { + "epoch": 15.558543873947297, + "grad_norm": 33.20661163330078, + "learning_rate": 6.94879438842613e-06, + "loss": 12.2776, + "step": 35800 + }, + { + "epoch": 15.580277098614507, + "grad_norm": 22.688047409057617, + "learning_rate": 6.944410346339326e-06, + "loss": 12.3202, + "step": 35850 + }, + { + "epoch": 15.602010323281718, + "grad_norm": 19.268665313720703, + "learning_rate": 6.940026304252522e-06, + "loss": 12.2744, + "step": 35900 + }, + { + "epoch": 15.623743547948926, + "grad_norm": 44.28622817993164, + "learning_rate": 6.935642262165717e-06, + "loss": 12.2619, + "step": 35950 + }, + { + "epoch": 15.645476772616137, + "grad_norm": 11.47972297668457, + "learning_rate": 6.9312582200789135e-06, + "loss": 12.2281, + "step": 36000 + }, + { + "epoch": 15.667209997283347, + "grad_norm": 26.456462860107422, + "learning_rate": 6.926874177992109e-06, + "loss": 12.2591, + "step": 36050 + }, + { + "epoch": 15.688943221950558, + "grad_norm": 18.363269805908203, + "learning_rate": 6.922490135905305e-06, + "loss": 12.2958, + "step": 36100 + }, + { + "epoch": 15.710676446617766, + "grad_norm": 21.405649185180664, + "learning_rate": 6.9181060938185005e-06, + "loss": 12.2485, + "step": 36150 + }, + { + "epoch": 15.732409671284977, + "grad_norm": 27.277904510498047, + "learning_rate": 6.913722051731698e-06, + "loss": 12.2324, + "step": 36200 + }, + { + "epoch": 15.754142895952187, + "grad_norm": 20.303300857543945, + "learning_rate": 6.909338009644894e-06, + "loss": 12.2939, + "step": 36250 + }, + { + "epoch": 15.775876120619397, + "grad_norm": 14.886679649353027, + "learning_rate": 6.904953967558089e-06, + "loss": 12.2036, + "step": 36300 + }, + { + "epoch": 15.797609345286606, + "grad_norm": 17.747087478637695, + "learning_rate": 6.9005699254712854e-06, + "loss": 12.2459, + "step": 36350 + }, + { + "epoch": 15.819342569953816, + "grad_norm": 34.592708587646484, + "learning_rate": 6.896185883384481e-06, + "loss": 12.2688, + "step": 36400 + }, + { + "epoch": 15.841075794621027, + "grad_norm": 20.060144424438477, + "learning_rate": 6.891801841297677e-06, + "loss": 12.2528, + "step": 36450 + }, + { + "epoch": 15.862809019288237, + "grad_norm": 13.47815227508545, + "learning_rate": 6.8874177992108724e-06, + "loss": 12.2797, + "step": 36500 + }, + { + "epoch": 15.884542243955448, + "grad_norm": 20.81302833557129, + "learning_rate": 6.883033757124069e-06, + "loss": 12.269, + "step": 36550 + }, + { + "epoch": 15.906275468622656, + "grad_norm": 29.326114654541016, + "learning_rate": 6.878649715037264e-06, + "loss": 12.296, + "step": 36600 + }, + { + "epoch": 15.928008693289867, + "grad_norm": 21.322439193725586, + "learning_rate": 6.874265672950461e-06, + "loss": 12.3283, + "step": 36650 + }, + { + "epoch": 15.949741917957077, + "grad_norm": 25.019590377807617, + "learning_rate": 6.869881630863657e-06, + "loss": 12.2913, + "step": 36700 + }, + { + "epoch": 15.971475142624287, + "grad_norm": 16.494823455810547, + "learning_rate": 6.865497588776853e-06, + "loss": 12.2574, + "step": 36750 + }, + { + "epoch": 15.993208367291498, + "grad_norm": 22.250595092773438, + "learning_rate": 6.861113546690049e-06, + "loss": 12.26, + "step": 36800 + }, + { + "epoch": 16.014778592773702, + "grad_norm": 15.23131275177002, + "learning_rate": 6.856729504603244e-06, + "loss": 12.1167, + "step": 36850 + }, + { + "epoch": 16.036511817440914, + "grad_norm": 18.172534942626953, + "learning_rate": 6.852345462516441e-06, + "loss": 12.1946, + "step": 36900 + }, + { + "epoch": 16.058245042108123, + "grad_norm": 27.63299560546875, + "learning_rate": 6.847961420429636e-06, + "loss": 12.1801, + "step": 36950 + }, + { + "epoch": 16.07997826677533, + "grad_norm": 22.287805557250977, + "learning_rate": 6.843577378342833e-06, + "loss": 12.2187, + "step": 37000 + }, + { + "epoch": 16.101711491442543, + "grad_norm": 15.652294158935547, + "learning_rate": 6.8391933362560285e-06, + "loss": 12.1837, + "step": 37050 + }, + { + "epoch": 16.123444716109752, + "grad_norm": 23.91109848022461, + "learning_rate": 6.834809294169225e-06, + "loss": 12.2074, + "step": 37100 + }, + { + "epoch": 16.145177940776964, + "grad_norm": 24.845624923706055, + "learning_rate": 6.830425252082421e-06, + "loss": 12.1387, + "step": 37150 + }, + { + "epoch": 16.166911165444173, + "grad_norm": 19.137048721313477, + "learning_rate": 6.826041209995616e-06, + "loss": 12.2304, + "step": 37200 + }, + { + "epoch": 16.18864439011138, + "grad_norm": 17.24692153930664, + "learning_rate": 6.8216571679088126e-06, + "loss": 12.208, + "step": 37250 + }, + { + "epoch": 16.210377614778594, + "grad_norm": 20.503686904907227, + "learning_rate": 6.817273125822008e-06, + "loss": 12.2103, + "step": 37300 + }, + { + "epoch": 16.232110839445802, + "grad_norm": 27.404552459716797, + "learning_rate": 6.812889083735205e-06, + "loss": 12.2422, + "step": 37350 + }, + { + "epoch": 16.253844064113014, + "grad_norm": 25.16230010986328, + "learning_rate": 6.8085050416483996e-06, + "loss": 12.2006, + "step": 37400 + }, + { + "epoch": 16.275577288780223, + "grad_norm": 18.156126022338867, + "learning_rate": 6.804120999561597e-06, + "loss": 12.2023, + "step": 37450 + }, + { + "epoch": 16.29731051344743, + "grad_norm": 19.68562889099121, + "learning_rate": 6.799736957474792e-06, + "loss": 12.1877, + "step": 37500 + }, + { + "epoch": 16.319043738114644, + "grad_norm": 17.91988182067871, + "learning_rate": 6.795352915387988e-06, + "loss": 12.215, + "step": 37550 + }, + { + "epoch": 16.340776962781852, + "grad_norm": 15.31675910949707, + "learning_rate": 6.7909688733011845e-06, + "loss": 12.1548, + "step": 37600 + }, + { + "epoch": 16.36251018744906, + "grad_norm": 8.975651741027832, + "learning_rate": 6.78658483121438e-06, + "loss": 12.169, + "step": 37650 + }, + { + "epoch": 16.384243412116273, + "grad_norm": 16.77298927307129, + "learning_rate": 6.782200789127576e-06, + "loss": 12.2139, + "step": 37700 + }, + { + "epoch": 16.405976636783482, + "grad_norm": 23.03885269165039, + "learning_rate": 6.7778167470407715e-06, + "loss": 12.2093, + "step": 37750 + }, + { + "epoch": 16.427709861450694, + "grad_norm": 18.47231101989746, + "learning_rate": 6.773432704953969e-06, + "loss": 12.1992, + "step": 37800 + }, + { + "epoch": 16.449443086117903, + "grad_norm": 28.977338790893555, + "learning_rate": 6.769048662867164e-06, + "loss": 12.1989, + "step": 37850 + }, + { + "epoch": 16.47117631078511, + "grad_norm": 16.37677574157715, + "learning_rate": 6.76466462078036e-06, + "loss": 12.2296, + "step": 37900 + }, + { + "epoch": 16.492909535452323, + "grad_norm": 13.731319427490234, + "learning_rate": 6.760280578693556e-06, + "loss": 12.186, + "step": 37950 + }, + { + "epoch": 16.514642760119532, + "grad_norm": 20.206491470336914, + "learning_rate": 6.755896536606752e-06, + "loss": 12.1552, + "step": 38000 + }, + { + "epoch": 16.536375984786744, + "grad_norm": 19.88826560974121, + "learning_rate": 6.751512494519948e-06, + "loss": 12.2298, + "step": 38050 + }, + { + "epoch": 16.558109209453953, + "grad_norm": 31.184532165527344, + "learning_rate": 6.7471284524331435e-06, + "loss": 12.2041, + "step": 38100 + }, + { + "epoch": 16.57984243412116, + "grad_norm": 37.404266357421875, + "learning_rate": 6.7427444103463405e-06, + "loss": 12.208, + "step": 38150 + }, + { + "epoch": 16.601575658788374, + "grad_norm": 12.503349304199219, + "learning_rate": 6.738360368259536e-06, + "loss": 12.2289, + "step": 38200 + }, + { + "epoch": 16.623308883455582, + "grad_norm": 14.80574893951416, + "learning_rate": 6.733976326172732e-06, + "loss": 12.2027, + "step": 38250 + }, + { + "epoch": 16.645042108122794, + "grad_norm": 18.339298248291016, + "learning_rate": 6.7295922840859276e-06, + "loss": 12.2122, + "step": 38300 + }, + { + "epoch": 16.666775332790003, + "grad_norm": 9.988556861877441, + "learning_rate": 6.725208241999124e-06, + "loss": 12.2169, + "step": 38350 + }, + { + "epoch": 16.68850855745721, + "grad_norm": 16.23221778869629, + "learning_rate": 6.720824199912319e-06, + "loss": 12.2432, + "step": 38400 + }, + { + "epoch": 16.710241782124424, + "grad_norm": 17.93288803100586, + "learning_rate": 6.716440157825515e-06, + "loss": 12.1946, + "step": 38450 + }, + { + "epoch": 16.731975006791632, + "grad_norm": 23.863719940185547, + "learning_rate": 6.7120561157387125e-06, + "loss": 12.2601, + "step": 38500 + }, + { + "epoch": 16.75370823145884, + "grad_norm": 32.24260330200195, + "learning_rate": 6.707672073651907e-06, + "loss": 12.2242, + "step": 38550 + }, + { + "epoch": 16.775441456126053, + "grad_norm": 31.188295364379883, + "learning_rate": 6.703288031565104e-06, + "loss": 12.1977, + "step": 38600 + }, + { + "epoch": 16.797174680793262, + "grad_norm": 21.935489654541016, + "learning_rate": 6.6989039894782995e-06, + "loss": 12.2153, + "step": 38650 + }, + { + "epoch": 16.818907905460474, + "grad_norm": 16.820199966430664, + "learning_rate": 6.694519947391496e-06, + "loss": 12.1841, + "step": 38700 + }, + { + "epoch": 16.840641130127683, + "grad_norm": 27.350257873535156, + "learning_rate": 6.690135905304691e-06, + "loss": 12.2308, + "step": 38750 + }, + { + "epoch": 16.86237435479489, + "grad_norm": 20.717317581176758, + "learning_rate": 6.685751863217887e-06, + "loss": 12.2139, + "step": 38800 + }, + { + "epoch": 16.884107579462103, + "grad_norm": 20.515241622924805, + "learning_rate": 6.681367821131084e-06, + "loss": 12.209, + "step": 38850 + }, + { + "epoch": 16.905840804129312, + "grad_norm": 16.544082641601562, + "learning_rate": 6.676983779044279e-06, + "loss": 12.2183, + "step": 38900 + }, + { + "epoch": 16.927574028796524, + "grad_norm": 17.54091453552246, + "learning_rate": 6.672599736957476e-06, + "loss": 12.2609, + "step": 38950 + }, + { + "epoch": 16.949307253463733, + "grad_norm": 21.071598052978516, + "learning_rate": 6.6682156948706714e-06, + "loss": 12.1727, + "step": 39000 + }, + { + "epoch": 16.97104047813094, + "grad_norm": 17.628015518188477, + "learning_rate": 6.663831652783868e-06, + "loss": 12.1862, + "step": 39050 + }, + { + "epoch": 16.992773702798154, + "grad_norm": 13.298240661621094, + "learning_rate": 6.659447610697063e-06, + "loss": 12.2279, + "step": 39100 + }, + { + "epoch": 17.014343928280358, + "grad_norm": 12.616509437561035, + "learning_rate": 6.655063568610259e-06, + "loss": 12.0914, + "step": 39150 + }, + { + "epoch": 17.03607715294757, + "grad_norm": 21.71387481689453, + "learning_rate": 6.650679526523455e-06, + "loss": 12.1576, + "step": 39200 + }, + { + "epoch": 17.05781037761478, + "grad_norm": 26.497800827026367, + "learning_rate": 6.646295484436651e-06, + "loss": 12.1522, + "step": 39250 + }, + { + "epoch": 17.079543602281987, + "grad_norm": 20.276397705078125, + "learning_rate": 6.641911442349848e-06, + "loss": 12.1579, + "step": 39300 + }, + { + "epoch": 17.1012768269492, + "grad_norm": 18.534727096557617, + "learning_rate": 6.637527400263043e-06, + "loss": 12.178, + "step": 39350 + }, + { + "epoch": 17.123010051616408, + "grad_norm": 29.980501174926758, + "learning_rate": 6.63314335817624e-06, + "loss": 12.1507, + "step": 39400 + }, + { + "epoch": 17.14474327628362, + "grad_norm": 25.486083984375, + "learning_rate": 6.628759316089435e-06, + "loss": 12.148, + "step": 39450 + }, + { + "epoch": 17.16647650095083, + "grad_norm": 24.499359130859375, + "learning_rate": 6.624375274002631e-06, + "loss": 12.1513, + "step": 39500 + }, + { + "epoch": 17.188209725618037, + "grad_norm": 22.07660484313965, + "learning_rate": 6.619991231915827e-06, + "loss": 12.1477, + "step": 39550 + }, + { + "epoch": 17.20994295028525, + "grad_norm": 28.42877960205078, + "learning_rate": 6.615607189829023e-06, + "loss": 12.1747, + "step": 39600 + }, + { + "epoch": 17.231676174952458, + "grad_norm": 22.489025115966797, + "learning_rate": 6.611223147742218e-06, + "loss": 12.1474, + "step": 39650 + }, + { + "epoch": 17.25340939961967, + "grad_norm": 24.58718490600586, + "learning_rate": 6.6068391056554145e-06, + "loss": 12.1613, + "step": 39700 + }, + { + "epoch": 17.27514262428688, + "grad_norm": 92.33475494384766, + "learning_rate": 6.6024550635686116e-06, + "loss": 12.1637, + "step": 39750 + }, + { + "epoch": 17.296875848954087, + "grad_norm": 19.350147247314453, + "learning_rate": 6.598071021481807e-06, + "loss": 12.184, + "step": 39800 + }, + { + "epoch": 17.3186090736213, + "grad_norm": 14.627690315246582, + "learning_rate": 6.593686979395003e-06, + "loss": 12.1552, + "step": 39850 + }, + { + "epoch": 17.34034229828851, + "grad_norm": 11.52912425994873, + "learning_rate": 6.5893029373081986e-06, + "loss": 12.1784, + "step": 39900 + }, + { + "epoch": 17.362075522955717, + "grad_norm": 18.19782829284668, + "learning_rate": 6.584918895221395e-06, + "loss": 12.127, + "step": 39950 + }, + { + "epoch": 17.38380874762293, + "grad_norm": 24.676179885864258, + "learning_rate": 6.58053485313459e-06, + "loss": 12.2129, + "step": 40000 + }, + { + "epoch": 17.38380874762293, + "eval_cer": 0.0766327626430326, + "eval_loss": 2.3462953567504883, + "eval_runtime": 399.3051, + "eval_samples_per_second": 13.539, + "eval_steps_per_second": 3.386, + "eval_wer": 0.22991050400376825, + "step": 40000 + }, + { + "epoch": 17.405541972290138, + "grad_norm": 10.956995010375977, + "learning_rate": 6.5761508110477864e-06, + "loss": 12.1388, + "step": 40050 + }, + { + "epoch": 17.42727519695735, + "grad_norm": 23.64618682861328, + "learning_rate": 6.571766768960982e-06, + "loss": 12.1628, + "step": 40100 + }, + { + "epoch": 17.44900842162456, + "grad_norm": 22.68800926208496, + "learning_rate": 6.567382726874179e-06, + "loss": 12.1489, + "step": 40150 + }, + { + "epoch": 17.470741646291767, + "grad_norm": 17.155860900878906, + "learning_rate": 6.562998684787375e-06, + "loss": 12.1374, + "step": 40200 + }, + { + "epoch": 17.49247487095898, + "grad_norm": 19.839338302612305, + "learning_rate": 6.5586146427005705e-06, + "loss": 12.1891, + "step": 40250 + }, + { + "epoch": 17.514208095626188, + "grad_norm": 24.002262115478516, + "learning_rate": 6.554230600613767e-06, + "loss": 12.1819, + "step": 40300 + }, + { + "epoch": 17.5359413202934, + "grad_norm": 14.681846618652344, + "learning_rate": 6.549846558526962e-06, + "loss": 12.1583, + "step": 40350 + }, + { + "epoch": 17.55767454496061, + "grad_norm": 28.004215240478516, + "learning_rate": 6.545462516440158e-06, + "loss": 12.1953, + "step": 40400 + }, + { + "epoch": 17.579407769627817, + "grad_norm": 18.857913970947266, + "learning_rate": 6.541078474353354e-06, + "loss": 12.1922, + "step": 40450 + }, + { + "epoch": 17.60114099429503, + "grad_norm": 27.09821319580078, + "learning_rate": 6.536694432266551e-06, + "loss": 12.1901, + "step": 40500 + }, + { + "epoch": 17.622874218962238, + "grad_norm": 15.759273529052734, + "learning_rate": 6.532310390179745e-06, + "loss": 12.1536, + "step": 40550 + }, + { + "epoch": 17.64460744362945, + "grad_norm": 13.474365234375, + "learning_rate": 6.5279263480929425e-06, + "loss": 12.1806, + "step": 40600 + }, + { + "epoch": 17.66634066829666, + "grad_norm": 14.334114074707031, + "learning_rate": 6.523542306006139e-06, + "loss": 12.1881, + "step": 40650 + }, + { + "epoch": 17.688073892963867, + "grad_norm": 28.76114845275879, + "learning_rate": 6.519158263919334e-06, + "loss": 12.2126, + "step": 40700 + }, + { + "epoch": 17.70980711763108, + "grad_norm": 22.386192321777344, + "learning_rate": 6.51477422183253e-06, + "loss": 12.153, + "step": 40750 + }, + { + "epoch": 17.73154034229829, + "grad_norm": 12.762558937072754, + "learning_rate": 6.510390179745726e-06, + "loss": 12.1787, + "step": 40800 + }, + { + "epoch": 17.753273566965497, + "grad_norm": 15.222195625305176, + "learning_rate": 6.506006137658922e-06, + "loss": 12.1646, + "step": 40850 + }, + { + "epoch": 17.77500679163271, + "grad_norm": 35.954437255859375, + "learning_rate": 6.501622095572117e-06, + "loss": 12.1707, + "step": 40900 + }, + { + "epoch": 17.796740016299918, + "grad_norm": 11.987882614135742, + "learning_rate": 6.497238053485314e-06, + "loss": 12.1824, + "step": 40950 + }, + { + "epoch": 17.81847324096713, + "grad_norm": 31.296215057373047, + "learning_rate": 6.49285401139851e-06, + "loss": 12.213, + "step": 41000 + }, + { + "epoch": 17.84020646563434, + "grad_norm": 16.63829231262207, + "learning_rate": 6.488469969311706e-06, + "loss": 12.1762, + "step": 41050 + }, + { + "epoch": 17.861939690301547, + "grad_norm": 13.500885963439941, + "learning_rate": 6.484085927224902e-06, + "loss": 12.1677, + "step": 41100 + }, + { + "epoch": 17.88367291496876, + "grad_norm": 29.857112884521484, + "learning_rate": 6.479701885138098e-06, + "loss": 12.1812, + "step": 41150 + }, + { + "epoch": 17.905406139635968, + "grad_norm": 14.494293212890625, + "learning_rate": 6.475317843051294e-06, + "loss": 12.1565, + "step": 41200 + }, + { + "epoch": 17.92713936430318, + "grad_norm": 11.10816478729248, + "learning_rate": 6.470933800964489e-06, + "loss": 12.1799, + "step": 41250 + }, + { + "epoch": 17.94887258897039, + "grad_norm": 19.924617767333984, + "learning_rate": 6.466549758877686e-06, + "loss": 12.1382, + "step": 41300 + }, + { + "epoch": 17.970605813637597, + "grad_norm": 21.809062957763672, + "learning_rate": 6.462165716790882e-06, + "loss": 12.1727, + "step": 41350 + }, + { + "epoch": 17.99233903830481, + "grad_norm": 12.314726829528809, + "learning_rate": 6.457781674704078e-06, + "loss": 12.1355, + "step": 41400 + }, + { + "epoch": 18.013909263787014, + "grad_norm": 22.337913513183594, + "learning_rate": 6.453397632617273e-06, + "loss": 12.0397, + "step": 41450 + }, + { + "epoch": 18.035642488454226, + "grad_norm": 14.111494064331055, + "learning_rate": 6.44901359053047e-06, + "loss": 12.0939, + "step": 41500 + }, + { + "epoch": 18.057375713121434, + "grad_norm": 16.706897735595703, + "learning_rate": 6.444629548443666e-06, + "loss": 12.1425, + "step": 41550 + }, + { + "epoch": 18.079108937788643, + "grad_norm": 26.20379066467285, + "learning_rate": 6.440245506356861e-06, + "loss": 12.1265, + "step": 41600 + }, + { + "epoch": 18.100842162455855, + "grad_norm": 14.789849281311035, + "learning_rate": 6.4358614642700574e-06, + "loss": 12.1377, + "step": 41650 + }, + { + "epoch": 18.122575387123064, + "grad_norm": 34.11836242675781, + "learning_rate": 6.431477422183253e-06, + "loss": 12.1273, + "step": 41700 + }, + { + "epoch": 18.144308611790276, + "grad_norm": 32.26976013183594, + "learning_rate": 6.42709338009645e-06, + "loss": 12.0853, + "step": 41750 + }, + { + "epoch": 18.166041836457484, + "grad_norm": 19.59932518005371, + "learning_rate": 6.422709338009645e-06, + "loss": 12.0887, + "step": 41800 + }, + { + "epoch": 18.187775061124693, + "grad_norm": 16.68062400817871, + "learning_rate": 6.4183252959228415e-06, + "loss": 12.148, + "step": 41850 + }, + { + "epoch": 18.209508285791905, + "grad_norm": 18.44430923461914, + "learning_rate": 6.413941253836037e-06, + "loss": 12.132, + "step": 41900 + }, + { + "epoch": 18.231241510459114, + "grad_norm": 23.202688217163086, + "learning_rate": 6.409557211749233e-06, + "loss": 12.1606, + "step": 41950 + }, + { + "epoch": 18.252974735126326, + "grad_norm": 11.007984161376953, + "learning_rate": 6.405173169662429e-06, + "loss": 12.1526, + "step": 42000 + }, + { + "epoch": 18.274707959793535, + "grad_norm": 43.34115219116211, + "learning_rate": 6.400789127575625e-06, + "loss": 12.1461, + "step": 42050 + }, + { + "epoch": 18.296441184460743, + "grad_norm": 21.273698806762695, + "learning_rate": 6.396405085488822e-06, + "loss": 12.116, + "step": 42100 + }, + { + "epoch": 18.318174409127955, + "grad_norm": 21.992979049682617, + "learning_rate": 6.392021043402017e-06, + "loss": 12.0956, + "step": 42150 + }, + { + "epoch": 18.339907633795164, + "grad_norm": 10.890033721923828, + "learning_rate": 6.3876370013152135e-06, + "loss": 12.0999, + "step": 42200 + }, + { + "epoch": 18.361640858462373, + "grad_norm": 10.554021835327148, + "learning_rate": 6.383252959228409e-06, + "loss": 12.1158, + "step": 42250 + }, + { + "epoch": 18.383374083129585, + "grad_norm": 11.385374069213867, + "learning_rate": 6.378868917141605e-06, + "loss": 12.1541, + "step": 42300 + }, + { + "epoch": 18.405107307796793, + "grad_norm": 11.36735725402832, + "learning_rate": 6.3744848750548005e-06, + "loss": 12.119, + "step": 42350 + }, + { + "epoch": 18.426840532464006, + "grad_norm": 19.5784969329834, + "learning_rate": 6.370100832967997e-06, + "loss": 12.1513, + "step": 42400 + }, + { + "epoch": 18.448573757131214, + "grad_norm": 10.584908485412598, + "learning_rate": 6.365716790881194e-06, + "loss": 12.1572, + "step": 42450 + }, + { + "epoch": 18.470306981798423, + "grad_norm": 23.416278839111328, + "learning_rate": 6.361332748794389e-06, + "loss": 12.1384, + "step": 42500 + }, + { + "epoch": 18.492040206465635, + "grad_norm": 22.098583221435547, + "learning_rate": 6.3569487067075854e-06, + "loss": 12.1005, + "step": 42550 + }, + { + "epoch": 18.513773431132844, + "grad_norm": 27.949371337890625, + "learning_rate": 6.352564664620781e-06, + "loss": 12.1248, + "step": 42600 + }, + { + "epoch": 18.535506655800056, + "grad_norm": 13.4013090133667, + "learning_rate": 6.348180622533977e-06, + "loss": 12.1335, + "step": 42650 + }, + { + "epoch": 18.557239880467264, + "grad_norm": 19.233583450317383, + "learning_rate": 6.3437965804471724e-06, + "loss": 12.1416, + "step": 42700 + }, + { + "epoch": 18.578973105134473, + "grad_norm": 17.514616012573242, + "learning_rate": 6.339412538360369e-06, + "loss": 12.1561, + "step": 42750 + }, + { + "epoch": 18.600706329801685, + "grad_norm": 17.83085823059082, + "learning_rate": 6.335028496273564e-06, + "loss": 12.1655, + "step": 42800 + }, + { + "epoch": 18.622439554468894, + "grad_norm": 32.00389099121094, + "learning_rate": 6.33064445418676e-06, + "loss": 12.1489, + "step": 42850 + }, + { + "epoch": 18.644172779136106, + "grad_norm": 36.5909309387207, + "learning_rate": 6.326260412099957e-06, + "loss": 12.1353, + "step": 42900 + }, + { + "epoch": 18.665906003803315, + "grad_norm": 19.841901779174805, + "learning_rate": 6.321876370013153e-06, + "loss": 12.1237, + "step": 42950 + }, + { + "epoch": 18.687639228470523, + "grad_norm": 12.05302619934082, + "learning_rate": 6.317492327926349e-06, + "loss": 12.0931, + "step": 43000 + }, + { + "epoch": 18.709372453137735, + "grad_norm": 51.42092514038086, + "learning_rate": 6.313108285839544e-06, + "loss": 12.0907, + "step": 43050 + }, + { + "epoch": 18.731105677804944, + "grad_norm": 21.547746658325195, + "learning_rate": 6.308724243752741e-06, + "loss": 12.1051, + "step": 43100 + }, + { + "epoch": 18.752838902472153, + "grad_norm": 17.779346466064453, + "learning_rate": 6.304340201665936e-06, + "loss": 12.1208, + "step": 43150 + }, + { + "epoch": 18.774572127139365, + "grad_norm": 12.786531448364258, + "learning_rate": 6.299956159579132e-06, + "loss": 12.1527, + "step": 43200 + }, + { + "epoch": 18.796305351806573, + "grad_norm": 15.865018844604492, + "learning_rate": 6.295572117492329e-06, + "loss": 12.1003, + "step": 43250 + }, + { + "epoch": 18.818038576473786, + "grad_norm": 12.622864723205566, + "learning_rate": 6.291188075405525e-06, + "loss": 12.1439, + "step": 43300 + }, + { + "epoch": 18.839771801140994, + "grad_norm": 12.189949035644531, + "learning_rate": 6.286804033318721e-06, + "loss": 12.132, + "step": 43350 + }, + { + "epoch": 18.861505025808203, + "grad_norm": 18.03951072692871, + "learning_rate": 6.282419991231916e-06, + "loss": 12.1327, + "step": 43400 + }, + { + "epoch": 18.883238250475415, + "grad_norm": 25.907819747924805, + "learning_rate": 6.2780359491451126e-06, + "loss": 12.1319, + "step": 43450 + }, + { + "epoch": 18.904971475142624, + "grad_norm": 39.924564361572266, + "learning_rate": 6.273651907058308e-06, + "loss": 12.1779, + "step": 43500 + }, + { + "epoch": 18.926704699809836, + "grad_norm": 10.564095497131348, + "learning_rate": 6.269267864971504e-06, + "loss": 12.1198, + "step": 43550 + }, + { + "epoch": 18.948437924477044, + "grad_norm": 16.400606155395508, + "learning_rate": 6.2648838228846996e-06, + "loss": 12.1314, + "step": 43600 + }, + { + "epoch": 18.970171149144253, + "grad_norm": 16.357927322387695, + "learning_rate": 6.260499780797896e-06, + "loss": 12.1305, + "step": 43650 + }, + { + "epoch": 18.991904373811465, + "grad_norm": 18.073299407958984, + "learning_rate": 6.256115738711093e-06, + "loss": 12.1585, + "step": 43700 + }, + { + "epoch": 19.01347459929367, + "grad_norm": 14.831045150756836, + "learning_rate": 6.251731696624288e-06, + "loss": 12.023, + "step": 43750 + }, + { + "epoch": 19.03520782396088, + "grad_norm": 20.606718063354492, + "learning_rate": 6.2473476545374845e-06, + "loss": 12.0678, + "step": 43800 + }, + { + "epoch": 19.05694104862809, + "grad_norm": 20.03177261352539, + "learning_rate": 6.24296361245068e-06, + "loss": 12.1054, + "step": 43850 + }, + { + "epoch": 19.0786742732953, + "grad_norm": 16.764787673950195, + "learning_rate": 6.238579570363876e-06, + "loss": 12.076, + "step": 43900 + }, + { + "epoch": 19.10040749796251, + "grad_norm": 20.074857711791992, + "learning_rate": 6.2341955282770715e-06, + "loss": 12.0784, + "step": 43950 + }, + { + "epoch": 19.12214072262972, + "grad_norm": 14.84661865234375, + "learning_rate": 6.229811486190268e-06, + "loss": 12.0933, + "step": 44000 + }, + { + "epoch": 19.14387394729693, + "grad_norm": 17.447168350219727, + "learning_rate": 6.225427444103463e-06, + "loss": 12.0883, + "step": 44050 + }, + { + "epoch": 19.16560717196414, + "grad_norm": 21.10520362854004, + "learning_rate": 6.22104340201666e-06, + "loss": 12.0839, + "step": 44100 + }, + { + "epoch": 19.18734039663135, + "grad_norm": 17.273950576782227, + "learning_rate": 6.2166593599298564e-06, + "loss": 12.0517, + "step": 44150 + }, + { + "epoch": 19.20907362129856, + "grad_norm": 11.963603019714355, + "learning_rate": 6.212275317843052e-06, + "loss": 12.1211, + "step": 44200 + }, + { + "epoch": 19.23080684596577, + "grad_norm": 28.02683448791504, + "learning_rate": 6.207891275756248e-06, + "loss": 12.1012, + "step": 44250 + }, + { + "epoch": 19.252540070632982, + "grad_norm": 18.750391006469727, + "learning_rate": 6.2035072336694435e-06, + "loss": 12.1257, + "step": 44300 + }, + { + "epoch": 19.27427329530019, + "grad_norm": 13.95964241027832, + "learning_rate": 6.19912319158264e-06, + "loss": 12.1096, + "step": 44350 + }, + { + "epoch": 19.2960065199674, + "grad_norm": 13.954286575317383, + "learning_rate": 6.194739149495835e-06, + "loss": 12.096, + "step": 44400 + }, + { + "epoch": 19.31773974463461, + "grad_norm": 19.977340698242188, + "learning_rate": 6.190355107409032e-06, + "loss": 12.1119, + "step": 44450 + }, + { + "epoch": 19.33947296930182, + "grad_norm": 20.84231948852539, + "learning_rate": 6.1859710653222275e-06, + "loss": 12.117, + "step": 44500 + }, + { + "epoch": 19.36120619396903, + "grad_norm": 15.089071273803711, + "learning_rate": 6.181587023235424e-06, + "loss": 12.0531, + "step": 44550 + }, + { + "epoch": 19.38293941863624, + "grad_norm": 19.530078887939453, + "learning_rate": 6.17720298114862e-06, + "loss": 12.0606, + "step": 44600 + }, + { + "epoch": 19.40467264330345, + "grad_norm": 35.273353576660156, + "learning_rate": 6.172818939061815e-06, + "loss": 12.0703, + "step": 44650 + }, + { + "epoch": 19.42640586797066, + "grad_norm": 27.611345291137695, + "learning_rate": 6.168434896975012e-06, + "loss": 12.0979, + "step": 44700 + }, + { + "epoch": 19.44813909263787, + "grad_norm": 12.463072776794434, + "learning_rate": 6.164050854888207e-06, + "loss": 12.1186, + "step": 44750 + }, + { + "epoch": 19.46987231730508, + "grad_norm": 13.169920921325684, + "learning_rate": 6.159666812801403e-06, + "loss": 12.1214, + "step": 44800 + }, + { + "epoch": 19.49160554197229, + "grad_norm": 19.480173110961914, + "learning_rate": 6.155282770714599e-06, + "loss": 12.0956, + "step": 44850 + }, + { + "epoch": 19.5133387666395, + "grad_norm": 12.746538162231445, + "learning_rate": 6.150898728627796e-06, + "loss": 12.0653, + "step": 44900 + }, + { + "epoch": 19.53507199130671, + "grad_norm": 20.619016647338867, + "learning_rate": 6.146514686540991e-06, + "loss": 12.1175, + "step": 44950 + }, + { + "epoch": 19.55680521597392, + "grad_norm": 19.82939910888672, + "learning_rate": 6.142130644454187e-06, + "loss": 12.126, + "step": 45000 + }, + { + "epoch": 19.57853844064113, + "grad_norm": 14.061666488647461, + "learning_rate": 6.1377466023673836e-06, + "loss": 12.0754, + "step": 45050 + }, + { + "epoch": 19.60027166530834, + "grad_norm": 13.00661849975586, + "learning_rate": 6.133362560280579e-06, + "loss": 12.0868, + "step": 45100 + }, + { + "epoch": 19.62200488997555, + "grad_norm": 14.957731246948242, + "learning_rate": 6.128978518193775e-06, + "loss": 12.0564, + "step": 45150 + }, + { + "epoch": 19.64373811464276, + "grad_norm": 14.701393127441406, + "learning_rate": 6.124594476106971e-06, + "loss": 12.1071, + "step": 45200 + }, + { + "epoch": 19.66547133930997, + "grad_norm": 17.358051300048828, + "learning_rate": 6.120210434020168e-06, + "loss": 12.118, + "step": 45250 + }, + { + "epoch": 19.68720456397718, + "grad_norm": 14.36281967163086, + "learning_rate": 6.115826391933363e-06, + "loss": 12.1246, + "step": 45300 + }, + { + "epoch": 19.70893778864439, + "grad_norm": 30.517263412475586, + "learning_rate": 6.111442349846559e-06, + "loss": 12.1116, + "step": 45350 + }, + { + "epoch": 19.7306710133116, + "grad_norm": 16.39494514465332, + "learning_rate": 6.107058307759755e-06, + "loss": 12.1275, + "step": 45400 + }, + { + "epoch": 19.75240423797881, + "grad_norm": 15.935347557067871, + "learning_rate": 6.102674265672951e-06, + "loss": 12.0961, + "step": 45450 + }, + { + "epoch": 19.77413746264602, + "grad_norm": 17.69158172607422, + "learning_rate": 6.098290223586147e-06, + "loss": 12.1242, + "step": 45500 + }, + { + "epoch": 19.79587068731323, + "grad_norm": 17.6668758392334, + "learning_rate": 6.0939061814993425e-06, + "loss": 12.0872, + "step": 45550 + }, + { + "epoch": 19.81760391198044, + "grad_norm": 16.675373077392578, + "learning_rate": 6.08952213941254e-06, + "loss": 12.0705, + "step": 45600 + }, + { + "epoch": 19.83933713664765, + "grad_norm": 18.560033798217773, + "learning_rate": 6.085138097325735e-06, + "loss": 12.098, + "step": 45650 + }, + { + "epoch": 19.86107036131486, + "grad_norm": 18.61153793334961, + "learning_rate": 6.080754055238931e-06, + "loss": 12.1017, + "step": 45700 + }, + { + "epoch": 19.88280358598207, + "grad_norm": 23.753692626953125, + "learning_rate": 6.076370013152127e-06, + "loss": 12.101, + "step": 45750 + }, + { + "epoch": 19.90453681064928, + "grad_norm": 12.80927848815918, + "learning_rate": 6.071985971065323e-06, + "loss": 12.116, + "step": 45800 + }, + { + "epoch": 19.92627003531649, + "grad_norm": 22.449129104614258, + "learning_rate": 6.067601928978518e-06, + "loss": 12.1071, + "step": 45850 + }, + { + "epoch": 19.9480032599837, + "grad_norm": 53.62459945678711, + "learning_rate": 6.0632178868917145e-06, + "loss": 12.1369, + "step": 45900 + }, + { + "epoch": 19.96973648465091, + "grad_norm": 18.846603393554688, + "learning_rate": 6.058833844804911e-06, + "loss": 12.099, + "step": 45950 + }, + { + "epoch": 19.99146970931812, + "grad_norm": 28.6248836517334, + "learning_rate": 6.054449802718106e-06, + "loss": 12.1369, + "step": 46000 + }, + { + "epoch": 20.013039934800325, + "grad_norm": 17.0070858001709, + "learning_rate": 6.050065760631303e-06, + "loss": 11.9934, + "step": 46050 + }, + { + "epoch": 20.034773159467537, + "grad_norm": 30.195463180541992, + "learning_rate": 6.0456817185444986e-06, + "loss": 12.0606, + "step": 46100 + }, + { + "epoch": 20.056506384134746, + "grad_norm": 15.557343482971191, + "learning_rate": 6.041297676457695e-06, + "loss": 12.0555, + "step": 46150 + }, + { + "epoch": 20.078239608801955, + "grad_norm": 20.677410125732422, + "learning_rate": 6.03691363437089e-06, + "loss": 12.0169, + "step": 46200 + }, + { + "epoch": 20.099972833469167, + "grad_norm": 8.35476016998291, + "learning_rate": 6.032529592284086e-06, + "loss": 12.1085, + "step": 46250 + }, + { + "epoch": 20.121706058136375, + "grad_norm": 21.85611915588379, + "learning_rate": 6.028145550197282e-06, + "loss": 12.0724, + "step": 46300 + }, + { + "epoch": 20.143439282803588, + "grad_norm": 15.336892127990723, + "learning_rate": 6.023761508110478e-06, + "loss": 12.0564, + "step": 46350 + }, + { + "epoch": 20.165172507470796, + "grad_norm": 14.198942184448242, + "learning_rate": 6.019377466023675e-06, + "loss": 12.0629, + "step": 46400 + }, + { + "epoch": 20.186905732138005, + "grad_norm": 19.750280380249023, + "learning_rate": 6.0149934239368705e-06, + "loss": 12.0606, + "step": 46450 + }, + { + "epoch": 20.208638956805217, + "grad_norm": 23.643993377685547, + "learning_rate": 6.010609381850067e-06, + "loss": 12.0624, + "step": 46500 + }, + { + "epoch": 20.230372181472426, + "grad_norm": 15.32921028137207, + "learning_rate": 6.006225339763262e-06, + "loss": 12.0515, + "step": 46550 + }, + { + "epoch": 20.252105406139634, + "grad_norm": 18.966848373413086, + "learning_rate": 6.001841297676458e-06, + "loss": 12.0743, + "step": 46600 + }, + { + "epoch": 20.273838630806846, + "grad_norm": 15.885478973388672, + "learning_rate": 5.997457255589654e-06, + "loss": 12.068, + "step": 46650 + }, + { + "epoch": 20.295571855474055, + "grad_norm": 25.81429672241211, + "learning_rate": 5.99307321350285e-06, + "loss": 12.1066, + "step": 46700 + }, + { + "epoch": 20.317305080141267, + "grad_norm": 14.397024154663086, + "learning_rate": 5.988689171416045e-06, + "loss": 12.1048, + "step": 46750 + }, + { + "epoch": 20.339038304808476, + "grad_norm": 38.001121520996094, + "learning_rate": 5.984305129329242e-06, + "loss": 12.0548, + "step": 46800 + }, + { + "epoch": 20.360771529475684, + "grad_norm": 19.49797248840332, + "learning_rate": 5.979921087242439e-06, + "loss": 12.0747, + "step": 46850 + }, + { + "epoch": 20.382504754142897, + "grad_norm": 13.953147888183594, + "learning_rate": 5.975537045155634e-06, + "loss": 12.0704, + "step": 46900 + }, + { + "epoch": 20.404237978810105, + "grad_norm": 33.00684356689453, + "learning_rate": 5.97115300306883e-06, + "loss": 12.0737, + "step": 46950 + }, + { + "epoch": 20.425971203477317, + "grad_norm": 14.40523910522461, + "learning_rate": 5.966768960982026e-06, + "loss": 12.0644, + "step": 47000 + }, + { + "epoch": 20.447704428144526, + "grad_norm": 17.341297149658203, + "learning_rate": 5.962384918895222e-06, + "loss": 12.0375, + "step": 47050 + }, + { + "epoch": 20.469437652811735, + "grad_norm": 11.500914573669434, + "learning_rate": 5.958000876808417e-06, + "loss": 12.0957, + "step": 47100 + }, + { + "epoch": 20.491170877478947, + "grad_norm": 14.926876068115234, + "learning_rate": 5.9536168347216135e-06, + "loss": 12.0661, + "step": 47150 + }, + { + "epoch": 20.512904102146155, + "grad_norm": 33.41230392456055, + "learning_rate": 5.949232792634809e-06, + "loss": 12.0683, + "step": 47200 + }, + { + "epoch": 20.534637326813368, + "grad_norm": 11.592459678649902, + "learning_rate": 5.944848750548006e-06, + "loss": 12.0852, + "step": 47250 + }, + { + "epoch": 20.556370551480576, + "grad_norm": 11.893900871276855, + "learning_rate": 5.940464708461202e-06, + "loss": 12.0927, + "step": 47300 + }, + { + "epoch": 20.578103776147785, + "grad_norm": 19.416147232055664, + "learning_rate": 5.936080666374398e-06, + "loss": 12.0571, + "step": 47350 + }, + { + "epoch": 20.599837000814997, + "grad_norm": 114.77404022216797, + "learning_rate": 5.931696624287594e-06, + "loss": 12.0694, + "step": 47400 + }, + { + "epoch": 20.621570225482206, + "grad_norm": 22.660274505615234, + "learning_rate": 5.927312582200789e-06, + "loss": 12.0863, + "step": 47450 + }, + { + "epoch": 20.643303450149418, + "grad_norm": 27.254777908325195, + "learning_rate": 5.9229285401139855e-06, + "loss": 12.0506, + "step": 47500 + }, + { + "epoch": 20.665036674816626, + "grad_norm": 18.767820358276367, + "learning_rate": 5.918544498027181e-06, + "loss": 12.0552, + "step": 47550 + }, + { + "epoch": 20.686769899483835, + "grad_norm": 12.995434761047363, + "learning_rate": 5.914160455940378e-06, + "loss": 12.0879, + "step": 47600 + }, + { + "epoch": 20.708503124151047, + "grad_norm": 14.814035415649414, + "learning_rate": 5.909776413853573e-06, + "loss": 12.0959, + "step": 47650 + }, + { + "epoch": 20.730236348818256, + "grad_norm": 25.315176010131836, + "learning_rate": 5.90539237176677e-06, + "loss": 12.0976, + "step": 47700 + }, + { + "epoch": 20.751969573485464, + "grad_norm": 25.416751861572266, + "learning_rate": 5.901008329679966e-06, + "loss": 12.0528, + "step": 47750 + }, + { + "epoch": 20.773702798152677, + "grad_norm": 16.93905258178711, + "learning_rate": 5.896624287593161e-06, + "loss": 12.0705, + "step": 47800 + }, + { + "epoch": 20.795436022819885, + "grad_norm": 30.060588836669922, + "learning_rate": 5.8922402455063574e-06, + "loss": 12.0466, + "step": 47850 + }, + { + "epoch": 20.817169247487097, + "grad_norm": 13.423187255859375, + "learning_rate": 5.887856203419553e-06, + "loss": 12.0681, + "step": 47900 + }, + { + "epoch": 20.838902472154306, + "grad_norm": 13.607131004333496, + "learning_rate": 5.883472161332749e-06, + "loss": 12.0687, + "step": 47950 + }, + { + "epoch": 20.860635696821515, + "grad_norm": 22.271543502807617, + "learning_rate": 5.8790881192459444e-06, + "loss": 12.1039, + "step": 48000 + }, + { + "epoch": 20.882368921488727, + "grad_norm": 25.268817901611328, + "learning_rate": 5.8747040771591415e-06, + "loss": 12.09, + "step": 48050 + }, + { + "epoch": 20.904102146155935, + "grad_norm": 15.665398597717285, + "learning_rate": 5.870320035072338e-06, + "loss": 12.0956, + "step": 48100 + }, + { + "epoch": 20.925835370823147, + "grad_norm": 21.067293167114258, + "learning_rate": 5.865935992985533e-06, + "loss": 12.0499, + "step": 48150 + }, + { + "epoch": 20.947568595490356, + "grad_norm": 22.776708602905273, + "learning_rate": 5.861551950898729e-06, + "loss": 12.0762, + "step": 48200 + }, + { + "epoch": 20.969301820157565, + "grad_norm": 8.629790306091309, + "learning_rate": 5.857167908811925e-06, + "loss": 12.0614, + "step": 48250 + }, + { + "epoch": 20.991035044824777, + "grad_norm": 15.550890922546387, + "learning_rate": 5.852783866725121e-06, + "loss": 12.0792, + "step": 48300 + }, + { + "epoch": 21.01260527030698, + "grad_norm": 12.225948333740234, + "learning_rate": 5.848399824638316e-06, + "loss": 11.9374, + "step": 48350 + }, + { + "epoch": 21.034338494974193, + "grad_norm": 14.14416790008545, + "learning_rate": 5.8440157825515135e-06, + "loss": 12.0157, + "step": 48400 + }, + { + "epoch": 21.056071719641402, + "grad_norm": 17.12042236328125, + "learning_rate": 5.839631740464709e-06, + "loss": 12.0288, + "step": 48450 + }, + { + "epoch": 21.07780494430861, + "grad_norm": 13.070446968078613, + "learning_rate": 5.835247698377905e-06, + "loss": 12.0528, + "step": 48500 + }, + { + "epoch": 21.099538168975823, + "grad_norm": 22.833274841308594, + "learning_rate": 5.830863656291101e-06, + "loss": 12.0479, + "step": 48550 + }, + { + "epoch": 21.12127139364303, + "grad_norm": 19.790773391723633, + "learning_rate": 5.826479614204297e-06, + "loss": 12.044, + "step": 48600 + }, + { + "epoch": 21.143004618310243, + "grad_norm": 16.40357208251953, + "learning_rate": 5.822095572117493e-06, + "loss": 12.0299, + "step": 48650 + }, + { + "epoch": 21.164737842977452, + "grad_norm": 13.88508129119873, + "learning_rate": 5.817711530030688e-06, + "loss": 12.0255, + "step": 48700 + }, + { + "epoch": 21.18647106764466, + "grad_norm": 18.211301803588867, + "learning_rate": 5.813327487943885e-06, + "loss": 12.0283, + "step": 48750 + }, + { + "epoch": 21.208204292311873, + "grad_norm": 13.291574478149414, + "learning_rate": 5.80894344585708e-06, + "loss": 12.0283, + "step": 48800 + }, + { + "epoch": 21.22993751697908, + "grad_norm": 15.905344009399414, + "learning_rate": 5.804559403770277e-06, + "loss": 12.0526, + "step": 48850 + }, + { + "epoch": 21.251670741646294, + "grad_norm": 11.572737693786621, + "learning_rate": 5.800175361683472e-06, + "loss": 12.0322, + "step": 48900 + }, + { + "epoch": 21.273403966313502, + "grad_norm": 17.022083282470703, + "learning_rate": 5.795791319596669e-06, + "loss": 12.0426, + "step": 48950 + }, + { + "epoch": 21.29513719098071, + "grad_norm": 23.31209945678711, + "learning_rate": 5.791407277509865e-06, + "loss": 12.0233, + "step": 49000 + }, + { + "epoch": 21.316870415647923, + "grad_norm": 23.8966007232666, + "learning_rate": 5.78702323542306e-06, + "loss": 12.0526, + "step": 49050 + }, + { + "epoch": 21.33860364031513, + "grad_norm": 17.35943031311035, + "learning_rate": 5.7826391933362565e-06, + "loss": 12.0379, + "step": 49100 + }, + { + "epoch": 21.36033686498234, + "grad_norm": 33.082645416259766, + "learning_rate": 5.778255151249452e-06, + "loss": 12.0407, + "step": 49150 + }, + { + "epoch": 21.382070089649552, + "grad_norm": 13.810714721679688, + "learning_rate": 5.773871109162649e-06, + "loss": 12.0193, + "step": 49200 + }, + { + "epoch": 21.40380331431676, + "grad_norm": 15.985318183898926, + "learning_rate": 5.769487067075844e-06, + "loss": 12.0437, + "step": 49250 + }, + { + "epoch": 21.425536538983973, + "grad_norm": 11.185006141662598, + "learning_rate": 5.765103024989041e-06, + "loss": 12.0347, + "step": 49300 + }, + { + "epoch": 21.44726976365118, + "grad_norm": 13.088438034057617, + "learning_rate": 5.760718982902236e-06, + "loss": 12.036, + "step": 49350 + }, + { + "epoch": 21.46900298831839, + "grad_norm": 35.933502197265625, + "learning_rate": 5.756334940815432e-06, + "loss": 12.0709, + "step": 49400 + }, + { + "epoch": 21.490736212985603, + "grad_norm": 13.896368026733398, + "learning_rate": 5.7519508987286285e-06, + "loss": 12.0181, + "step": 49450 + }, + { + "epoch": 21.51246943765281, + "grad_norm": 15.991681098937988, + "learning_rate": 5.747566856641824e-06, + "loss": 12.0274, + "step": 49500 + }, + { + "epoch": 21.534202662320023, + "grad_norm": 21.10006332397461, + "learning_rate": 5.743182814555021e-06, + "loss": 12.0587, + "step": 49550 + }, + { + "epoch": 21.555935886987232, + "grad_norm": 18.29193115234375, + "learning_rate": 5.738798772468216e-06, + "loss": 12.028, + "step": 49600 + }, + { + "epoch": 21.57766911165444, + "grad_norm": 27.753482818603516, + "learning_rate": 5.7344147303814125e-06, + "loss": 11.9988, + "step": 49650 + }, + { + "epoch": 21.599402336321653, + "grad_norm": 24.744070053100586, + "learning_rate": 5.730030688294608e-06, + "loss": 12.0743, + "step": 49700 + }, + { + "epoch": 21.62113556098886, + "grad_norm": 21.145042419433594, + "learning_rate": 5.725646646207804e-06, + "loss": 12.0425, + "step": 49750 + }, + { + "epoch": 21.64286878565607, + "grad_norm": 13.751763343811035, + "learning_rate": 5.7212626041209996e-06, + "loss": 12.077, + "step": 49800 + }, + { + "epoch": 21.664602010323282, + "grad_norm": 31.52511978149414, + "learning_rate": 5.716878562034196e-06, + "loss": 12.0228, + "step": 49850 + }, + { + "epoch": 21.68633523499049, + "grad_norm": 51.40691375732422, + "learning_rate": 5.712494519947393e-06, + "loss": 12.0487, + "step": 49900 + }, + { + "epoch": 21.708068459657703, + "grad_norm": 12.909490585327148, + "learning_rate": 5.708110477860587e-06, + "loss": 12.0468, + "step": 49950 + }, + { + "epoch": 21.72980168432491, + "grad_norm": 14.6589937210083, + "learning_rate": 5.7037264357737845e-06, + "loss": 12.0168, + "step": 50000 + }, + { + "epoch": 21.72980168432491, + "eval_cer": 0.07568846975176824, + "eval_loss": 2.362048864364624, + "eval_runtime": 397.6775, + "eval_samples_per_second": 13.594, + "eval_steps_per_second": 3.4, + "eval_wer": 0.22898414193750982, + "step": 50000 + }, + { + "epoch": 21.75153490899212, + "grad_norm": 20.892807006835938, + "learning_rate": 5.69934239368698e-06, + "loss": 12.021, + "step": 50050 + }, + { + "epoch": 21.773268133659332, + "grad_norm": 14.854979515075684, + "learning_rate": 5.694958351600176e-06, + "loss": 12.0355, + "step": 50100 + }, + { + "epoch": 21.79500135832654, + "grad_norm": 18.140365600585938, + "learning_rate": 5.6905743095133715e-06, + "loss": 12.0173, + "step": 50150 + }, + { + "epoch": 21.816734582993753, + "grad_norm": 17.70104217529297, + "learning_rate": 5.686190267426568e-06, + "loss": 12.0801, + "step": 50200 + }, + { + "epoch": 21.83846780766096, + "grad_norm": 18.51262092590332, + "learning_rate": 5.681806225339763e-06, + "loss": 12.0334, + "step": 50250 + }, + { + "epoch": 21.86020103232817, + "grad_norm": 15.687026023864746, + "learning_rate": 5.677422183252959e-06, + "loss": 12.0553, + "step": 50300 + }, + { + "epoch": 21.881934256995383, + "grad_norm": 19.184951782226562, + "learning_rate": 5.6730381411661564e-06, + "loss": 12.0409, + "step": 50350 + }, + { + "epoch": 21.90366748166259, + "grad_norm": 18.097457885742188, + "learning_rate": 5.668654099079352e-06, + "loss": 12.058, + "step": 50400 + }, + { + "epoch": 21.925400706329803, + "grad_norm": 26.270936965942383, + "learning_rate": 5.664270056992548e-06, + "loss": 12.064, + "step": 50450 + }, + { + "epoch": 21.947133930997012, + "grad_norm": 26.288280487060547, + "learning_rate": 5.6598860149057434e-06, + "loss": 12.034, + "step": 50500 + }, + { + "epoch": 21.96886715566422, + "grad_norm": 10.051491737365723, + "learning_rate": 5.65550197281894e-06, + "loss": 12.0676, + "step": 50550 + }, + { + "epoch": 21.990600380331433, + "grad_norm": 15.91609001159668, + "learning_rate": 5.651117930732135e-06, + "loss": 12.0488, + "step": 50600 + }, + { + "epoch": 22.012170605813637, + "grad_norm": 16.341890335083008, + "learning_rate": 5.646733888645331e-06, + "loss": 11.9307, + "step": 50650 + }, + { + "epoch": 22.03390383048085, + "grad_norm": 17.389766693115234, + "learning_rate": 5.642349846558527e-06, + "loss": 11.9959, + "step": 50700 + }, + { + "epoch": 22.055637055148058, + "grad_norm": 15.45628547668457, + "learning_rate": 5.637965804471724e-06, + "loss": 12.0322, + "step": 50750 + }, + { + "epoch": 22.077370279815266, + "grad_norm": 14.2662935256958, + "learning_rate": 5.63358176238492e-06, + "loss": 12.0679, + "step": 50800 + }, + { + "epoch": 22.09910350448248, + "grad_norm": 18.397008895874023, + "learning_rate": 5.629197720298115e-06, + "loss": 12.0007, + "step": 50850 + }, + { + "epoch": 22.120836729149687, + "grad_norm": 14.498343467712402, + "learning_rate": 5.624813678211312e-06, + "loss": 11.9903, + "step": 50900 + }, + { + "epoch": 22.1425699538169, + "grad_norm": 26.300201416015625, + "learning_rate": 5.620429636124507e-06, + "loss": 12.0488, + "step": 50950 + }, + { + "epoch": 22.164303178484108, + "grad_norm": 17.42373275756836, + "learning_rate": 5.616045594037703e-06, + "loss": 12.0156, + "step": 51000 + }, + { + "epoch": 22.186036403151316, + "grad_norm": 13.430180549621582, + "learning_rate": 5.611661551950899e-06, + "loss": 12.0147, + "step": 51050 + }, + { + "epoch": 22.20776962781853, + "grad_norm": 8.827760696411133, + "learning_rate": 5.607277509864095e-06, + "loss": 12.0464, + "step": 51100 + }, + { + "epoch": 22.229502852485737, + "grad_norm": 13.834342002868652, + "learning_rate": 5.60289346777729e-06, + "loss": 11.9739, + "step": 51150 + }, + { + "epoch": 22.251236077152946, + "grad_norm": 15.042898178100586, + "learning_rate": 5.598509425690487e-06, + "loss": 12.0098, + "step": 51200 + }, + { + "epoch": 22.272969301820158, + "grad_norm": 19.06934356689453, + "learning_rate": 5.5941253836036836e-06, + "loss": 11.9855, + "step": 51250 + }, + { + "epoch": 22.294702526487367, + "grad_norm": 11.361977577209473, + "learning_rate": 5.589741341516879e-06, + "loss": 12.0193, + "step": 51300 + }, + { + "epoch": 22.31643575115458, + "grad_norm": 19.977092742919922, + "learning_rate": 5.585357299430075e-06, + "loss": 12.0072, + "step": 51350 + }, + { + "epoch": 22.338168975821787, + "grad_norm": 18.312875747680664, + "learning_rate": 5.5809732573432706e-06, + "loss": 12.0161, + "step": 51400 + }, + { + "epoch": 22.359902200488996, + "grad_norm": 10.536518096923828, + "learning_rate": 5.576589215256467e-06, + "loss": 12.0285, + "step": 51450 + }, + { + "epoch": 22.381635425156208, + "grad_norm": 15.011421203613281, + "learning_rate": 5.572205173169662e-06, + "loss": 11.9876, + "step": 51500 + }, + { + "epoch": 22.403368649823417, + "grad_norm": 17.05405616760254, + "learning_rate": 5.567821131082859e-06, + "loss": 12.0425, + "step": 51550 + }, + { + "epoch": 22.42510187449063, + "grad_norm": 16.87340545654297, + "learning_rate": 5.563437088996055e-06, + "loss": 12.0218, + "step": 51600 + }, + { + "epoch": 22.446835099157838, + "grad_norm": 19.586755752563477, + "learning_rate": 5.559053046909251e-06, + "loss": 12.032, + "step": 51650 + }, + { + "epoch": 22.468568323825046, + "grad_norm": 27.009822845458984, + "learning_rate": 5.554669004822447e-06, + "loss": 12.0083, + "step": 51700 + }, + { + "epoch": 22.49030154849226, + "grad_norm": 11.635884284973145, + "learning_rate": 5.5502849627356425e-06, + "loss": 12.025, + "step": 51750 + }, + { + "epoch": 22.512034773159467, + "grad_norm": 17.531131744384766, + "learning_rate": 5.545900920648839e-06, + "loss": 12.0123, + "step": 51800 + }, + { + "epoch": 22.53376799782668, + "grad_norm": 10.203145980834961, + "learning_rate": 5.541516878562034e-06, + "loss": 12.0013, + "step": 51850 + }, + { + "epoch": 22.555501222493888, + "grad_norm": 19.1767635345459, + "learning_rate": 5.537132836475231e-06, + "loss": 12.0279, + "step": 51900 + }, + { + "epoch": 22.577234447161096, + "grad_norm": 31.68284034729004, + "learning_rate": 5.532748794388426e-06, + "loss": 12.0053, + "step": 51950 + }, + { + "epoch": 22.59896767182831, + "grad_norm": 10.772562980651855, + "learning_rate": 5.528364752301623e-06, + "loss": 12.0153, + "step": 52000 + }, + { + "epoch": 22.620700896495517, + "grad_norm": 99.19184875488281, + "learning_rate": 5.523980710214818e-06, + "loss": 12.0059, + "step": 52050 + }, + { + "epoch": 22.642434121162726, + "grad_norm": 20.737354278564453, + "learning_rate": 5.5195966681280145e-06, + "loss": 12.0263, + "step": 52100 + }, + { + "epoch": 22.664167345829938, + "grad_norm": 15.494745254516602, + "learning_rate": 5.515212626041211e-06, + "loss": 12.0129, + "step": 52150 + }, + { + "epoch": 22.685900570497147, + "grad_norm": 34.782100677490234, + "learning_rate": 5.510828583954406e-06, + "loss": 12.0497, + "step": 52200 + }, + { + "epoch": 22.70763379516436, + "grad_norm": 18.235090255737305, + "learning_rate": 5.506444541867602e-06, + "loss": 11.9992, + "step": 52250 + }, + { + "epoch": 22.729367019831567, + "grad_norm": 27.689912796020508, + "learning_rate": 5.502060499780798e-06, + "loss": 12.0023, + "step": 52300 + }, + { + "epoch": 22.751100244498776, + "grad_norm": 18.36990737915039, + "learning_rate": 5.497676457693995e-06, + "loss": 12.0056, + "step": 52350 + }, + { + "epoch": 22.772833469165988, + "grad_norm": 18.038314819335938, + "learning_rate": 5.49329241560719e-06, + "loss": 12.0212, + "step": 52400 + }, + { + "epoch": 22.794566693833197, + "grad_norm": 8.06163501739502, + "learning_rate": 5.488908373520386e-06, + "loss": 12.0274, + "step": 52450 + }, + { + "epoch": 22.81629991850041, + "grad_norm": 15.676831245422363, + "learning_rate": 5.484524331433583e-06, + "loss": 12.0148, + "step": 52500 + }, + { + "epoch": 22.838033143167618, + "grad_norm": 24.74848747253418, + "learning_rate": 5.480140289346778e-06, + "loss": 12.0186, + "step": 52550 + }, + { + "epoch": 22.859766367834826, + "grad_norm": 10.006168365478516, + "learning_rate": 5.475756247259974e-06, + "loss": 12.0071, + "step": 52600 + }, + { + "epoch": 22.88149959250204, + "grad_norm": 10.135807991027832, + "learning_rate": 5.47137220517317e-06, + "loss": 12.0224, + "step": 52650 + }, + { + "epoch": 22.903232817169247, + "grad_norm": 16.03304100036621, + "learning_rate": 5.466988163086367e-06, + "loss": 12.0253, + "step": 52700 + }, + { + "epoch": 22.92496604183646, + "grad_norm": 15.307913780212402, + "learning_rate": 5.462604120999562e-06, + "loss": 12.0234, + "step": 52750 + }, + { + "epoch": 22.946699266503668, + "grad_norm": 27.5895938873291, + "learning_rate": 5.458220078912758e-06, + "loss": 12.0162, + "step": 52800 + }, + { + "epoch": 22.968432491170876, + "grad_norm": 14.608256340026855, + "learning_rate": 5.453836036825954e-06, + "loss": 12.0005, + "step": 52850 + }, + { + "epoch": 22.99016571583809, + "grad_norm": 41.10546112060547, + "learning_rate": 5.44945199473915e-06, + "loss": 12.0735, + "step": 52900 + }, + { + "epoch": 23.011735941320293, + "grad_norm": 12.675127983093262, + "learning_rate": 5.445067952652346e-06, + "loss": 11.9152, + "step": 52950 + }, + { + "epoch": 23.033469165987505, + "grad_norm": 16.779767990112305, + "learning_rate": 5.440683910565542e-06, + "loss": 11.9743, + "step": 53000 + }, + { + "epoch": 23.055202390654713, + "grad_norm": 29.24107551574707, + "learning_rate": 5.436299868478739e-06, + "loss": 11.9844, + "step": 53050 + }, + { + "epoch": 23.076935615321922, + "grad_norm": 15.517463684082031, + "learning_rate": 5.431915826391933e-06, + "loss": 12.0084, + "step": 53100 + }, + { + "epoch": 23.098668839989134, + "grad_norm": 14.068320274353027, + "learning_rate": 5.42753178430513e-06, + "loss": 11.982, + "step": 53150 + }, + { + "epoch": 23.120402064656343, + "grad_norm": 13.296953201293945, + "learning_rate": 5.423147742218326e-06, + "loss": 12.0076, + "step": 53200 + }, + { + "epoch": 23.142135289323555, + "grad_norm": 11.365141868591309, + "learning_rate": 5.418763700131522e-06, + "loss": 11.9825, + "step": 53250 + }, + { + "epoch": 23.163868513990764, + "grad_norm": 11.649621963500977, + "learning_rate": 5.414379658044717e-06, + "loss": 11.9874, + "step": 53300 + }, + { + "epoch": 23.185601738657972, + "grad_norm": 12.506479263305664, + "learning_rate": 5.4099956159579135e-06, + "loss": 12.0203, + "step": 53350 + }, + { + "epoch": 23.207334963325184, + "grad_norm": 26.387269973754883, + "learning_rate": 5.40561157387111e-06, + "loss": 11.9718, + "step": 53400 + }, + { + "epoch": 23.229068187992393, + "grad_norm": 30.277488708496094, + "learning_rate": 5.401227531784305e-06, + "loss": 11.9922, + "step": 53450 + }, + { + "epoch": 23.2508014126596, + "grad_norm": 16.27001953125, + "learning_rate": 5.396843489697502e-06, + "loss": 12.0103, + "step": 53500 + }, + { + "epoch": 23.272534637326814, + "grad_norm": 10.601898193359375, + "learning_rate": 5.392459447610698e-06, + "loss": 11.982, + "step": 53550 + }, + { + "epoch": 23.294267861994022, + "grad_norm": 16.928091049194336, + "learning_rate": 5.388075405523894e-06, + "loss": 11.9921, + "step": 53600 + }, + { + "epoch": 23.316001086661235, + "grad_norm": 17.180408477783203, + "learning_rate": 5.383691363437089e-06, + "loss": 11.9681, + "step": 53650 + }, + { + "epoch": 23.337734311328443, + "grad_norm": 9.645658493041992, + "learning_rate": 5.3793073213502855e-06, + "loss": 11.9921, + "step": 53700 + }, + { + "epoch": 23.359467535995652, + "grad_norm": 7.888517379760742, + "learning_rate": 5.374923279263481e-06, + "loss": 11.9957, + "step": 53750 + }, + { + "epoch": 23.381200760662864, + "grad_norm": 23.52006721496582, + "learning_rate": 5.370539237176677e-06, + "loss": 11.9913, + "step": 53800 + }, + { + "epoch": 23.402933985330073, + "grad_norm": 17.327842712402344, + "learning_rate": 5.366155195089874e-06, + "loss": 11.985, + "step": 53850 + }, + { + "epoch": 23.424667209997285, + "grad_norm": 15.461244583129883, + "learning_rate": 5.3617711530030696e-06, + "loss": 11.9856, + "step": 53900 + }, + { + "epoch": 23.446400434664493, + "grad_norm": 10.2888822555542, + "learning_rate": 5.357387110916266e-06, + "loss": 12.0014, + "step": 53950 + }, + { + "epoch": 23.468133659331702, + "grad_norm": 16.063997268676758, + "learning_rate": 5.353003068829461e-06, + "loss": 11.997, + "step": 54000 + }, + { + "epoch": 23.489866883998914, + "grad_norm": 28.185026168823242, + "learning_rate": 5.3486190267426574e-06, + "loss": 11.9855, + "step": 54050 + }, + { + "epoch": 23.511600108666123, + "grad_norm": 16.92442512512207, + "learning_rate": 5.344234984655853e-06, + "loss": 12.0206, + "step": 54100 + }, + { + "epoch": 23.533333333333335, + "grad_norm": 6.245467662811279, + "learning_rate": 5.339850942569049e-06, + "loss": 11.9748, + "step": 54150 + }, + { + "epoch": 23.555066558000544, + "grad_norm": 14.348546981811523, + "learning_rate": 5.3354669004822444e-06, + "loss": 11.9609, + "step": 54200 + }, + { + "epoch": 23.576799782667752, + "grad_norm": 10.864014625549316, + "learning_rate": 5.331082858395441e-06, + "loss": 11.9947, + "step": 54250 + }, + { + "epoch": 23.598533007334964, + "grad_norm": 8.79773998260498, + "learning_rate": 5.326698816308638e-06, + "loss": 12.0031, + "step": 54300 + }, + { + "epoch": 23.620266232002173, + "grad_norm": 19.14083480834961, + "learning_rate": 5.322314774221833e-06, + "loss": 11.9738, + "step": 54350 + }, + { + "epoch": 23.64199945666938, + "grad_norm": 10.049248695373535, + "learning_rate": 5.317930732135029e-06, + "loss": 11.9514, + "step": 54400 + }, + { + "epoch": 23.663732681336594, + "grad_norm": 11.119285583496094, + "learning_rate": 5.313546690048225e-06, + "loss": 11.9914, + "step": 54450 + }, + { + "epoch": 23.685465906003802, + "grad_norm": 8.268950462341309, + "learning_rate": 5.309162647961421e-06, + "loss": 11.994, + "step": 54500 + }, + { + "epoch": 23.707199130671015, + "grad_norm": 14.429734230041504, + "learning_rate": 5.304778605874616e-06, + "loss": 11.975, + "step": 54550 + }, + { + "epoch": 23.728932355338223, + "grad_norm": 15.248434066772461, + "learning_rate": 5.300394563787813e-06, + "loss": 11.9967, + "step": 54600 + }, + { + "epoch": 23.750665580005432, + "grad_norm": 27.12610626220703, + "learning_rate": 5.296010521701008e-06, + "loss": 12.0066, + "step": 54650 + }, + { + "epoch": 23.772398804672644, + "grad_norm": 11.624201774597168, + "learning_rate": 5.291626479614205e-06, + "loss": 11.9857, + "step": 54700 + }, + { + "epoch": 23.794132029339853, + "grad_norm": 38.6632194519043, + "learning_rate": 5.287242437527401e-06, + "loss": 12.0068, + "step": 54750 + }, + { + "epoch": 23.815865254007065, + "grad_norm": 21.433034896850586, + "learning_rate": 5.282858395440597e-06, + "loss": 11.9545, + "step": 54800 + }, + { + "epoch": 23.837598478674273, + "grad_norm": 12.88279914855957, + "learning_rate": 5.278474353353793e-06, + "loss": 11.9675, + "step": 54850 + }, + { + "epoch": 23.859331703341482, + "grad_norm": 11.213829040527344, + "learning_rate": 5.274090311266988e-06, + "loss": 11.9907, + "step": 54900 + }, + { + "epoch": 23.881064928008694, + "grad_norm": 32.87601852416992, + "learning_rate": 5.2697062691801846e-06, + "loss": 12.0041, + "step": 54950 + }, + { + "epoch": 23.902798152675903, + "grad_norm": 12.214354515075684, + "learning_rate": 5.26532222709338e-06, + "loss": 12.0013, + "step": 55000 + }, + { + "epoch": 23.924531377343115, + "grad_norm": 18.823352813720703, + "learning_rate": 5.260938185006577e-06, + "loss": 12.0205, + "step": 55050 + }, + { + "epoch": 23.946264602010324, + "grad_norm": 11.764278411865234, + "learning_rate": 5.2565541429197716e-06, + "loss": 12.0045, + "step": 55100 + }, + { + "epoch": 23.967997826677532, + "grad_norm": 33.26872253417969, + "learning_rate": 5.252170100832969e-06, + "loss": 11.9852, + "step": 55150 + }, + { + "epoch": 23.989731051344744, + "grad_norm": 20.137388229370117, + "learning_rate": 5.247786058746165e-06, + "loss": 12.0023, + "step": 55200 + }, + { + "epoch": 24.01130127682695, + "grad_norm": 13.359118461608887, + "learning_rate": 5.24340201665936e-06, + "loss": 11.8893, + "step": 55250 + }, + { + "epoch": 24.03303450149416, + "grad_norm": 12.654318809509277, + "learning_rate": 5.2390179745725565e-06, + "loss": 11.9913, + "step": 55300 + }, + { + "epoch": 24.05476772616137, + "grad_norm": 12.723244667053223, + "learning_rate": 5.234633932485752e-06, + "loss": 11.9835, + "step": 55350 + }, + { + "epoch": 24.076500950828578, + "grad_norm": 10.007128715515137, + "learning_rate": 5.230249890398948e-06, + "loss": 11.9639, + "step": 55400 + }, + { + "epoch": 24.09823417549579, + "grad_norm": 24.932937622070312, + "learning_rate": 5.2258658483121435e-06, + "loss": 11.9567, + "step": 55450 + }, + { + "epoch": 24.119967400163, + "grad_norm": 13.288817405700684, + "learning_rate": 5.221481806225341e-06, + "loss": 11.9896, + "step": 55500 + }, + { + "epoch": 24.14170062483021, + "grad_norm": 24.153135299682617, + "learning_rate": 5.217097764138536e-06, + "loss": 11.9458, + "step": 55550 + }, + { + "epoch": 24.16343384949742, + "grad_norm": 21.456832885742188, + "learning_rate": 5.212713722051732e-06, + "loss": 11.9637, + "step": 55600 + }, + { + "epoch": 24.185167074164628, + "grad_norm": 11.885467529296875, + "learning_rate": 5.2083296799649284e-06, + "loss": 11.9763, + "step": 55650 + }, + { + "epoch": 24.20690029883184, + "grad_norm": 18.14926528930664, + "learning_rate": 5.203945637878124e-06, + "loss": 11.9793, + "step": 55700 + }, + { + "epoch": 24.22863352349905, + "grad_norm": 10.626521110534668, + "learning_rate": 5.19956159579132e-06, + "loss": 11.9717, + "step": 55750 + }, + { + "epoch": 24.250366748166257, + "grad_norm": 18.046018600463867, + "learning_rate": 5.1951775537045155e-06, + "loss": 11.9679, + "step": 55800 + }, + { + "epoch": 24.27209997283347, + "grad_norm": 19.871051788330078, + "learning_rate": 5.1907935116177125e-06, + "loss": 11.9655, + "step": 55850 + }, + { + "epoch": 24.29383319750068, + "grad_norm": 26.990354537963867, + "learning_rate": 5.186409469530908e-06, + "loss": 11.9776, + "step": 55900 + }, + { + "epoch": 24.31556642216789, + "grad_norm": 13.593362808227539, + "learning_rate": 5.182025427444104e-06, + "loss": 11.9765, + "step": 55950 + }, + { + "epoch": 24.3372996468351, + "grad_norm": 21.99699592590332, + "learning_rate": 5.1776413853572995e-06, + "loss": 11.9698, + "step": 56000 + }, + { + "epoch": 24.359032871502308, + "grad_norm": 17.28653335571289, + "learning_rate": 5.173257343270496e-06, + "loss": 11.9668, + "step": 56050 + }, + { + "epoch": 24.38076609616952, + "grad_norm": 46.031005859375, + "learning_rate": 5.168873301183692e-06, + "loss": 11.9729, + "step": 56100 + }, + { + "epoch": 24.40249932083673, + "grad_norm": 32.24114227294922, + "learning_rate": 5.164489259096887e-06, + "loss": 11.9543, + "step": 56150 + }, + { + "epoch": 24.42423254550394, + "grad_norm": 32.9847297668457, + "learning_rate": 5.160105217010084e-06, + "loss": 11.9631, + "step": 56200 + }, + { + "epoch": 24.44596577017115, + "grad_norm": 28.538616180419922, + "learning_rate": 5.155721174923279e-06, + "loss": 11.9914, + "step": 56250 + }, + { + "epoch": 24.467698994838358, + "grad_norm": 10.636951446533203, + "learning_rate": 5.151337132836476e-06, + "loss": 11.9533, + "step": 56300 + }, + { + "epoch": 24.48943221950557, + "grad_norm": 18.541378021240234, + "learning_rate": 5.1469530907496715e-06, + "loss": 11.9635, + "step": 56350 + }, + { + "epoch": 24.51116544417278, + "grad_norm": 15.477215766906738, + "learning_rate": 5.142569048662868e-06, + "loss": 11.973, + "step": 56400 + }, + { + "epoch": 24.53289866883999, + "grad_norm": 8.257668495178223, + "learning_rate": 5.138185006576063e-06, + "loss": 11.9541, + "step": 56450 + }, + { + "epoch": 24.5546318935072, + "grad_norm": 12.362825393676758, + "learning_rate": 5.133800964489259e-06, + "loss": 11.9543, + "step": 56500 + }, + { + "epoch": 24.576365118174408, + "grad_norm": 18.897563934326172, + "learning_rate": 5.1294169224024556e-06, + "loss": 11.9828, + "step": 56550 + }, + { + "epoch": 24.59809834284162, + "grad_norm": 22.83639907836914, + "learning_rate": 5.125032880315651e-06, + "loss": 11.9907, + "step": 56600 + }, + { + "epoch": 24.61983156750883, + "grad_norm": 26.016014099121094, + "learning_rate": 5.120648838228848e-06, + "loss": 11.9798, + "step": 56650 + }, + { + "epoch": 24.641564792176037, + "grad_norm": 7.745444297790527, + "learning_rate": 5.1162647961420434e-06, + "loss": 12.0051, + "step": 56700 + }, + { + "epoch": 24.66329801684325, + "grad_norm": 14.89815616607666, + "learning_rate": 5.11188075405524e-06, + "loss": 11.9648, + "step": 56750 + }, + { + "epoch": 24.68503124151046, + "grad_norm": 13.663446426391602, + "learning_rate": 5.107496711968435e-06, + "loss": 11.9961, + "step": 56800 + }, + { + "epoch": 24.70676446617767, + "grad_norm": 15.474350929260254, + "learning_rate": 5.103112669881631e-06, + "loss": 11.9687, + "step": 56850 + }, + { + "epoch": 24.72849769084488, + "grad_norm": 32.1036376953125, + "learning_rate": 5.0987286277948275e-06, + "loss": 12.0102, + "step": 56900 + }, + { + "epoch": 24.750230915512088, + "grad_norm": 21.14737892150879, + "learning_rate": 5.094344585708023e-06, + "loss": 11.9577, + "step": 56950 + }, + { + "epoch": 24.7719641401793, + "grad_norm": 26.35091781616211, + "learning_rate": 5.08996054362122e-06, + "loss": 11.9979, + "step": 57000 + }, + { + "epoch": 24.79369736484651, + "grad_norm": 40.08930587768555, + "learning_rate": 5.085576501534415e-06, + "loss": 11.956, + "step": 57050 + }, + { + "epoch": 24.81543058951372, + "grad_norm": 21.480506896972656, + "learning_rate": 5.081192459447612e-06, + "loss": 11.9701, + "step": 57100 + }, + { + "epoch": 24.83716381418093, + "grad_norm": 12.940244674682617, + "learning_rate": 5.076808417360807e-06, + "loss": 11.9642, + "step": 57150 + }, + { + "epoch": 24.858897038848138, + "grad_norm": 14.284876823425293, + "learning_rate": 5.072424375274003e-06, + "loss": 11.9604, + "step": 57200 + }, + { + "epoch": 24.88063026351535, + "grad_norm": 9.244315147399902, + "learning_rate": 5.068040333187199e-06, + "loss": 11.9762, + "step": 57250 + }, + { + "epoch": 24.90236348818256, + "grad_norm": 21.19985580444336, + "learning_rate": 5.063656291100395e-06, + "loss": 11.9824, + "step": 57300 + }, + { + "epoch": 24.92409671284977, + "grad_norm": 20.60128402709961, + "learning_rate": 5.059272249013591e-06, + "loss": 11.9888, + "step": 57350 + }, + { + "epoch": 24.94582993751698, + "grad_norm": 22.071367263793945, + "learning_rate": 5.0548882069267865e-06, + "loss": 11.9722, + "step": 57400 + }, + { + "epoch": 24.967563162184188, + "grad_norm": 19.631771087646484, + "learning_rate": 5.0505041648399836e-06, + "loss": 11.9691, + "step": 57450 + }, + { + "epoch": 24.9892963868514, + "grad_norm": 11.300741195678711, + "learning_rate": 5.046120122753179e-06, + "loss": 11.9764, + "step": 57500 + }, + { + "epoch": 25.010866612333604, + "grad_norm": 28.297489166259766, + "learning_rate": 5.041736080666375e-06, + "loss": 11.8502, + "step": 57550 + }, + { + "epoch": 25.032599837000816, + "grad_norm": 19.681974411010742, + "learning_rate": 5.0373520385795706e-06, + "loss": 11.9632, + "step": 57600 + }, + { + "epoch": 25.054333061668025, + "grad_norm": 9.978123664855957, + "learning_rate": 5.032967996492767e-06, + "loss": 11.9736, + "step": 57650 + }, + { + "epoch": 25.076066286335234, + "grad_norm": 22.59627342224121, + "learning_rate": 5.028583954405962e-06, + "loss": 11.9202, + "step": 57700 + }, + { + "epoch": 25.097799511002446, + "grad_norm": 15.177567481994629, + "learning_rate": 5.024199912319158e-06, + "loss": 11.9432, + "step": 57750 + }, + { + "epoch": 25.119532735669654, + "grad_norm": 11.103377342224121, + "learning_rate": 5.0198158702323555e-06, + "loss": 11.933, + "step": 57800 + }, + { + "epoch": 25.141265960336867, + "grad_norm": 15.902565956115723, + "learning_rate": 5.015431828145551e-06, + "loss": 11.9292, + "step": 57850 + }, + { + "epoch": 25.162999185004075, + "grad_norm": 21.157047271728516, + "learning_rate": 5.011047786058747e-06, + "loss": 11.9355, + "step": 57900 + }, + { + "epoch": 25.184732409671284, + "grad_norm": 15.4396333694458, + "learning_rate": 5.0066637439719425e-06, + "loss": 11.9417, + "step": 57950 + }, + { + "epoch": 25.206465634338496, + "grad_norm": 17.689163208007812, + "learning_rate": 5.002279701885139e-06, + "loss": 11.9411, + "step": 58000 + }, + { + "epoch": 25.228198859005705, + "grad_norm": 20.323307037353516, + "learning_rate": 4.997895659798335e-06, + "loss": 11.9537, + "step": 58050 + }, + { + "epoch": 25.249932083672913, + "grad_norm": 11.056938171386719, + "learning_rate": 4.99351161771153e-06, + "loss": 11.9289, + "step": 58100 + }, + { + "epoch": 25.271665308340125, + "grad_norm": 13.280766487121582, + "learning_rate": 4.989127575624727e-06, + "loss": 11.9525, + "step": 58150 + }, + { + "epoch": 25.293398533007334, + "grad_norm": 19.21057891845703, + "learning_rate": 4.984743533537922e-06, + "loss": 11.9329, + "step": 58200 + }, + { + "epoch": 25.315131757674546, + "grad_norm": 11.26260757446289, + "learning_rate": 4.980359491451118e-06, + "loss": 11.9324, + "step": 58250 + }, + { + "epoch": 25.336864982341755, + "grad_norm": 23.691085815429688, + "learning_rate": 4.9759754493643145e-06, + "loss": 11.9377, + "step": 58300 + }, + { + "epoch": 25.358598207008963, + "grad_norm": 14.544368743896484, + "learning_rate": 4.97159140727751e-06, + "loss": 11.9555, + "step": 58350 + }, + { + "epoch": 25.380331431676176, + "grad_norm": 30.192901611328125, + "learning_rate": 4.967207365190706e-06, + "loss": 11.9389, + "step": 58400 + }, + { + "epoch": 25.402064656343384, + "grad_norm": 13.255487442016602, + "learning_rate": 4.962823323103902e-06, + "loss": 11.9655, + "step": 58450 + }, + { + "epoch": 25.423797881010596, + "grad_norm": 21.28059959411621, + "learning_rate": 4.9584392810170985e-06, + "loss": 11.9339, + "step": 58500 + }, + { + "epoch": 25.445531105677805, + "grad_norm": 19.402381896972656, + "learning_rate": 4.954055238930294e-06, + "loss": 11.9438, + "step": 58550 + }, + { + "epoch": 25.467264330345014, + "grad_norm": 23.586254119873047, + "learning_rate": 4.94967119684349e-06, + "loss": 11.9565, + "step": 58600 + }, + { + "epoch": 25.488997555012226, + "grad_norm": 12.589113235473633, + "learning_rate": 4.945287154756686e-06, + "loss": 11.9255, + "step": 58650 + }, + { + "epoch": 25.510730779679434, + "grad_norm": 13.459474563598633, + "learning_rate": 4.940903112669882e-06, + "loss": 11.9577, + "step": 58700 + }, + { + "epoch": 25.532464004346647, + "grad_norm": 44.4463005065918, + "learning_rate": 4.936519070583078e-06, + "loss": 11.9748, + "step": 58750 + }, + { + "epoch": 25.554197229013855, + "grad_norm": 17.335121154785156, + "learning_rate": 4.932135028496273e-06, + "loss": 11.9485, + "step": 58800 + }, + { + "epoch": 25.575930453681064, + "grad_norm": 13.910146713256836, + "learning_rate": 4.92775098640947e-06, + "loss": 11.9251, + "step": 58850 + }, + { + "epoch": 25.597663678348276, + "grad_norm": 12.966668128967285, + "learning_rate": 4.923366944322666e-06, + "loss": 11.9461, + "step": 58900 + }, + { + "epoch": 25.619396903015485, + "grad_norm": 11.38027572631836, + "learning_rate": 4.918982902235862e-06, + "loss": 11.9485, + "step": 58950 + }, + { + "epoch": 25.641130127682693, + "grad_norm": 19.2831974029541, + "learning_rate": 4.914598860149058e-06, + "loss": 11.9539, + "step": 59000 + }, + { + "epoch": 25.662863352349905, + "grad_norm": 14.93049144744873, + "learning_rate": 4.910214818062254e-06, + "loss": 11.9358, + "step": 59050 + }, + { + "epoch": 25.684596577017114, + "grad_norm": 20.345487594604492, + "learning_rate": 4.90583077597545e-06, + "loss": 11.963, + "step": 59100 + }, + { + "epoch": 25.706329801684326, + "grad_norm": 22.333740234375, + "learning_rate": 4.901446733888645e-06, + "loss": 11.9593, + "step": 59150 + }, + { + "epoch": 25.728063026351535, + "grad_norm": 15.723165512084961, + "learning_rate": 4.897062691801842e-06, + "loss": 11.9462, + "step": 59200 + }, + { + "epoch": 25.749796251018743, + "grad_norm": 23.927995681762695, + "learning_rate": 4.892678649715038e-06, + "loss": 11.9395, + "step": 59250 + }, + { + "epoch": 25.771529475685956, + "grad_norm": 9.985795974731445, + "learning_rate": 4.888294607628233e-06, + "loss": 11.9668, + "step": 59300 + }, + { + "epoch": 25.793262700353164, + "grad_norm": 13.037304878234863, + "learning_rate": 4.8839105655414294e-06, + "loss": 11.9362, + "step": 59350 + }, + { + "epoch": 25.814995925020376, + "grad_norm": 14.396384239196777, + "learning_rate": 4.879526523454626e-06, + "loss": 11.9613, + "step": 59400 + }, + { + "epoch": 25.836729149687585, + "grad_norm": 12.580947875976562, + "learning_rate": 4.875142481367822e-06, + "loss": 11.947, + "step": 59450 + }, + { + "epoch": 25.858462374354794, + "grad_norm": 9.566840171813965, + "learning_rate": 4.870758439281017e-06, + "loss": 11.9588, + "step": 59500 + }, + { + "epoch": 25.880195599022006, + "grad_norm": 14.287603378295898, + "learning_rate": 4.8663743971942135e-06, + "loss": 11.9367, + "step": 59550 + }, + { + "epoch": 25.901928823689214, + "grad_norm": 22.067798614501953, + "learning_rate": 4.86199035510741e-06, + "loss": 11.9608, + "step": 59600 + }, + { + "epoch": 25.923662048356427, + "grad_norm": 18.1433162689209, + "learning_rate": 4.857606313020605e-06, + "loss": 11.9365, + "step": 59650 + }, + { + "epoch": 25.945395273023635, + "grad_norm": 19.52138900756836, + "learning_rate": 4.853222270933801e-06, + "loss": 11.9533, + "step": 59700 + }, + { + "epoch": 25.967128497690844, + "grad_norm": 8.619915008544922, + "learning_rate": 4.848838228846997e-06, + "loss": 11.9343, + "step": 59750 + }, + { + "epoch": 25.988861722358056, + "grad_norm": 23.551292419433594, + "learning_rate": 4.844454186760194e-06, + "loss": 11.9369, + "step": 59800 + }, + { + "epoch": 26.01043194784026, + "grad_norm": 11.714635848999023, + "learning_rate": 4.840070144673389e-06, + "loss": 11.8362, + "step": 59850 + }, + { + "epoch": 26.032165172507472, + "grad_norm": 12.336874961853027, + "learning_rate": 4.8356861025865855e-06, + "loss": 11.8948, + "step": 59900 + }, + { + "epoch": 26.05389839717468, + "grad_norm": 20.45733642578125, + "learning_rate": 4.831302060499781e-06, + "loss": 11.9095, + "step": 59950 + }, + { + "epoch": 26.07563162184189, + "grad_norm": 19.363704681396484, + "learning_rate": 4.826918018412977e-06, + "loss": 11.9093, + "step": 60000 + }, + { + "epoch": 26.07563162184189, + "eval_cer": 0.07668522335921395, + "eval_loss": 2.3816096782684326, + "eval_runtime": 394.5974, + "eval_samples_per_second": 13.7, + "eval_steps_per_second": 3.426, + "eval_wer": 0.2290783482493327, + "step": 60000 + }, + { + "epoch": 26.0973648465091, + "grad_norm": 10.920241355895996, + "learning_rate": 4.822533976326173e-06, + "loss": 11.8988, + "step": 60050 + }, + { + "epoch": 26.11909807117631, + "grad_norm": 8.169657707214355, + "learning_rate": 4.818149934239369e-06, + "loss": 11.9302, + "step": 60100 + }, + { + "epoch": 26.140831295843522, + "grad_norm": 21.631534576416016, + "learning_rate": 4.813765892152565e-06, + "loss": 11.9295, + "step": 60150 + }, + { + "epoch": 26.16256452051073, + "grad_norm": 15.736180305480957, + "learning_rate": 4.80938185006576e-06, + "loss": 11.9402, + "step": 60200 + }, + { + "epoch": 26.18429774517794, + "grad_norm": 8.994476318359375, + "learning_rate": 4.804997807978957e-06, + "loss": 11.9254, + "step": 60250 + }, + { + "epoch": 26.206030969845152, + "grad_norm": 15.551674842834473, + "learning_rate": 4.800613765892153e-06, + "loss": 11.9274, + "step": 60300 + }, + { + "epoch": 26.22776419451236, + "grad_norm": 8.010394096374512, + "learning_rate": 4.796229723805349e-06, + "loss": 11.9204, + "step": 60350 + }, + { + "epoch": 26.24949741917957, + "grad_norm": 8.433065414428711, + "learning_rate": 4.791845681718545e-06, + "loss": 11.9123, + "step": 60400 + }, + { + "epoch": 26.27123064384678, + "grad_norm": 11.69290542602539, + "learning_rate": 4.787461639631741e-06, + "loss": 11.9423, + "step": 60450 + }, + { + "epoch": 26.29296386851399, + "grad_norm": 11.806631088256836, + "learning_rate": 4.783077597544937e-06, + "loss": 11.9229, + "step": 60500 + }, + { + "epoch": 26.314697093181202, + "grad_norm": 9.421358108520508, + "learning_rate": 4.778693555458132e-06, + "loss": 11.9254, + "step": 60550 + }, + { + "epoch": 26.33643031784841, + "grad_norm": 15.151471138000488, + "learning_rate": 4.7743095133713285e-06, + "loss": 11.9324, + "step": 60600 + }, + { + "epoch": 26.35816354251562, + "grad_norm": 7.523982524871826, + "learning_rate": 4.769925471284525e-06, + "loss": 11.9094, + "step": 60650 + }, + { + "epoch": 26.37989676718283, + "grad_norm": 7.315085411071777, + "learning_rate": 4.765541429197721e-06, + "loss": 11.9156, + "step": 60700 + }, + { + "epoch": 26.40162999185004, + "grad_norm": 37.69257354736328, + "learning_rate": 4.761157387110917e-06, + "loss": 11.9491, + "step": 60750 + }, + { + "epoch": 26.423363216517252, + "grad_norm": 12.536825180053711, + "learning_rate": 4.756773345024113e-06, + "loss": 11.937, + "step": 60800 + }, + { + "epoch": 26.44509644118446, + "grad_norm": 19.952590942382812, + "learning_rate": 4.752389302937309e-06, + "loss": 11.9031, + "step": 60850 + }, + { + "epoch": 26.46682966585167, + "grad_norm": 9.468097686767578, + "learning_rate": 4.748005260850504e-06, + "loss": 11.93, + "step": 60900 + }, + { + "epoch": 26.48856289051888, + "grad_norm": 9.063526153564453, + "learning_rate": 4.7436212187637005e-06, + "loss": 11.9282, + "step": 60950 + }, + { + "epoch": 26.51029611518609, + "grad_norm": 23.76058006286621, + "learning_rate": 4.739237176676897e-06, + "loss": 11.9431, + "step": 61000 + }, + { + "epoch": 26.532029339853302, + "grad_norm": 16.783021926879883, + "learning_rate": 4.734853134590092e-06, + "loss": 11.9221, + "step": 61050 + }, + { + "epoch": 26.55376256452051, + "grad_norm": 20.15511131286621, + "learning_rate": 4.730469092503288e-06, + "loss": 11.9392, + "step": 61100 + }, + { + "epoch": 26.57549578918772, + "grad_norm": 14.874903678894043, + "learning_rate": 4.7260850504164845e-06, + "loss": 11.953, + "step": 61150 + }, + { + "epoch": 26.597229013854932, + "grad_norm": 7.126718044281006, + "learning_rate": 4.721701008329681e-06, + "loss": 11.9446, + "step": 61200 + }, + { + "epoch": 26.61896223852214, + "grad_norm": 9.697017669677734, + "learning_rate": 4.717316966242876e-06, + "loss": 11.9047, + "step": 61250 + }, + { + "epoch": 26.64069546318935, + "grad_norm": 16.13836097717285, + "learning_rate": 4.712932924156072e-06, + "loss": 11.9156, + "step": 61300 + }, + { + "epoch": 26.66242868785656, + "grad_norm": 10.770340919494629, + "learning_rate": 4.708548882069268e-06, + "loss": 11.9602, + "step": 61350 + }, + { + "epoch": 26.68416191252377, + "grad_norm": 20.800886154174805, + "learning_rate": 4.704164839982464e-06, + "loss": 11.9496, + "step": 61400 + }, + { + "epoch": 26.705895137190982, + "grad_norm": 14.415149688720703, + "learning_rate": 4.69978079789566e-06, + "loss": 11.8953, + "step": 61450 + }, + { + "epoch": 26.72762836185819, + "grad_norm": 16.533891677856445, + "learning_rate": 4.695396755808856e-06, + "loss": 11.9318, + "step": 61500 + }, + { + "epoch": 26.7493615865254, + "grad_norm": 12.036311149597168, + "learning_rate": 4.691012713722053e-06, + "loss": 11.9219, + "step": 61550 + }, + { + "epoch": 26.77109481119261, + "grad_norm": 9.894879341125488, + "learning_rate": 4.686628671635248e-06, + "loss": 11.925, + "step": 61600 + }, + { + "epoch": 26.79282803585982, + "grad_norm": 13.89318561553955, + "learning_rate": 4.682244629548444e-06, + "loss": 11.9642, + "step": 61650 + }, + { + "epoch": 26.814561260527032, + "grad_norm": 7.87830114364624, + "learning_rate": 4.67786058746164e-06, + "loss": 11.9357, + "step": 61700 + }, + { + "epoch": 26.83629448519424, + "grad_norm": 12.9856595993042, + "learning_rate": 4.673476545374836e-06, + "loss": 11.9305, + "step": 61750 + }, + { + "epoch": 26.85802770986145, + "grad_norm": 9.654988288879395, + "learning_rate": 4.669092503288032e-06, + "loss": 11.9184, + "step": 61800 + }, + { + "epoch": 26.87976093452866, + "grad_norm": 58.50657653808594, + "learning_rate": 4.664708461201228e-06, + "loss": 11.9274, + "step": 61850 + }, + { + "epoch": 26.90149415919587, + "grad_norm": 9.662385940551758, + "learning_rate": 4.660324419114424e-06, + "loss": 11.9326, + "step": 61900 + }, + { + "epoch": 26.92322738386308, + "grad_norm": 11.249975204467773, + "learning_rate": 4.655940377027619e-06, + "loss": 11.9301, + "step": 61950 + }, + { + "epoch": 26.94496060853029, + "grad_norm": 14.355755805969238, + "learning_rate": 4.651556334940816e-06, + "loss": 11.925, + "step": 62000 + }, + { + "epoch": 26.9666938331975, + "grad_norm": 55.27675247192383, + "learning_rate": 4.647172292854012e-06, + "loss": 11.9072, + "step": 62050 + }, + { + "epoch": 26.988427057864712, + "grad_norm": 9.871424674987793, + "learning_rate": 4.642788250767208e-06, + "loss": 11.9371, + "step": 62100 + }, + { + "epoch": 27.009997283346916, + "grad_norm": 11.582411766052246, + "learning_rate": 4.638404208680404e-06, + "loss": 11.8118, + "step": 62150 + }, + { + "epoch": 27.031730508014128, + "grad_norm": 7.072801113128662, + "learning_rate": 4.6340201665935995e-06, + "loss": 11.8716, + "step": 62200 + }, + { + "epoch": 27.053463732681337, + "grad_norm": 12.715493202209473, + "learning_rate": 4.629636124506796e-06, + "loss": 11.9066, + "step": 62250 + }, + { + "epoch": 27.075196957348545, + "grad_norm": 13.285543441772461, + "learning_rate": 4.625252082419991e-06, + "loss": 11.9065, + "step": 62300 + }, + { + "epoch": 27.096930182015758, + "grad_norm": 14.948770523071289, + "learning_rate": 4.620868040333187e-06, + "loss": 11.8932, + "step": 62350 + }, + { + "epoch": 27.118663406682966, + "grad_norm": 7.0187296867370605, + "learning_rate": 4.616483998246384e-06, + "loss": 11.9063, + "step": 62400 + }, + { + "epoch": 27.14039663135018, + "grad_norm": 11.898140907287598, + "learning_rate": 4.61209995615958e-06, + "loss": 11.8861, + "step": 62450 + }, + { + "epoch": 27.162129856017387, + "grad_norm": 7.729825496673584, + "learning_rate": 4.607715914072775e-06, + "loss": 11.9102, + "step": 62500 + }, + { + "epoch": 27.183863080684596, + "grad_norm": 9.05493450164795, + "learning_rate": 4.6033318719859715e-06, + "loss": 11.901, + "step": 62550 + }, + { + "epoch": 27.205596305351808, + "grad_norm": 48.41245651245117, + "learning_rate": 4.598947829899168e-06, + "loss": 11.9217, + "step": 62600 + }, + { + "epoch": 27.227329530019016, + "grad_norm": 8.19921875, + "learning_rate": 4.594563787812363e-06, + "loss": 11.9103, + "step": 62650 + }, + { + "epoch": 27.249062754686225, + "grad_norm": 7.067399024963379, + "learning_rate": 4.590179745725559e-06, + "loss": 11.9102, + "step": 62700 + }, + { + "epoch": 27.270795979353437, + "grad_norm": 10.219547271728516, + "learning_rate": 4.5857957036387556e-06, + "loss": 11.9086, + "step": 62750 + }, + { + "epoch": 27.292529204020646, + "grad_norm": 11.2730073928833, + "learning_rate": 4.581411661551951e-06, + "loss": 11.8907, + "step": 62800 + }, + { + "epoch": 27.314262428687858, + "grad_norm": 23.644775390625, + "learning_rate": 4.577027619465147e-06, + "loss": 11.9194, + "step": 62850 + }, + { + "epoch": 27.335995653355067, + "grad_norm": 13.088956832885742, + "learning_rate": 4.572643577378343e-06, + "loss": 11.9178, + "step": 62900 + }, + { + "epoch": 27.357728878022275, + "grad_norm": 12.945446968078613, + "learning_rate": 4.56825953529154e-06, + "loss": 11.9127, + "step": 62950 + }, + { + "epoch": 27.379462102689487, + "grad_norm": 7.951735019683838, + "learning_rate": 4.563875493204735e-06, + "loss": 11.9237, + "step": 63000 + }, + { + "epoch": 27.401195327356696, + "grad_norm": 13.66278076171875, + "learning_rate": 4.559491451117931e-06, + "loss": 11.8985, + "step": 63050 + }, + { + "epoch": 27.422928552023908, + "grad_norm": 6.567673683166504, + "learning_rate": 4.555107409031127e-06, + "loss": 11.9311, + "step": 63100 + }, + { + "epoch": 27.444661776691117, + "grad_norm": 11.139328956604004, + "learning_rate": 4.550723366944323e-06, + "loss": 11.9207, + "step": 63150 + }, + { + "epoch": 27.466395001358325, + "grad_norm": 18.506877899169922, + "learning_rate": 4.546339324857519e-06, + "loss": 11.9053, + "step": 63200 + }, + { + "epoch": 27.488128226025538, + "grad_norm": 16.45941925048828, + "learning_rate": 4.5419552827707145e-06, + "loss": 11.9132, + "step": 63250 + }, + { + "epoch": 27.509861450692746, + "grad_norm": 13.74703311920166, + "learning_rate": 4.537571240683912e-06, + "loss": 11.9032, + "step": 63300 + }, + { + "epoch": 27.531594675359955, + "grad_norm": 5.686723232269287, + "learning_rate": 4.533187198597107e-06, + "loss": 11.9135, + "step": 63350 + }, + { + "epoch": 27.553327900027167, + "grad_norm": 47.760013580322266, + "learning_rate": 4.528803156510303e-06, + "loss": 11.9263, + "step": 63400 + }, + { + "epoch": 27.575061124694376, + "grad_norm": 13.832674026489258, + "learning_rate": 4.524419114423499e-06, + "loss": 11.9114, + "step": 63450 + }, + { + "epoch": 27.596794349361588, + "grad_norm": 22.621736526489258, + "learning_rate": 4.520035072336695e-06, + "loss": 11.9134, + "step": 63500 + }, + { + "epoch": 27.618527574028796, + "grad_norm": 13.379792213439941, + "learning_rate": 4.515651030249891e-06, + "loss": 11.9058, + "step": 63550 + }, + { + "epoch": 27.640260798696005, + "grad_norm": 12.987919807434082, + "learning_rate": 4.5112669881630865e-06, + "loss": 11.9083, + "step": 63600 + }, + { + "epoch": 27.661994023363217, + "grad_norm": 16.87094497680664, + "learning_rate": 4.506882946076283e-06, + "loss": 11.908, + "step": 63650 + }, + { + "epoch": 27.683727248030426, + "grad_norm": 9.978212356567383, + "learning_rate": 4.502498903989478e-06, + "loss": 11.8919, + "step": 63700 + }, + { + "epoch": 27.705460472697638, + "grad_norm": 13.08248519897461, + "learning_rate": 4.498114861902675e-06, + "loss": 11.9, + "step": 63750 + }, + { + "epoch": 27.727193697364847, + "grad_norm": 14.08407974243164, + "learning_rate": 4.4937308198158706e-06, + "loss": 11.9366, + "step": 63800 + }, + { + "epoch": 27.748926922032055, + "grad_norm": 11.779139518737793, + "learning_rate": 4.489346777729067e-06, + "loss": 11.9216, + "step": 63850 + }, + { + "epoch": 27.770660146699267, + "grad_norm": 7.019837856292725, + "learning_rate": 4.484962735642262e-06, + "loss": 11.9144, + "step": 63900 + }, + { + "epoch": 27.792393371366476, + "grad_norm": 8.715902328491211, + "learning_rate": 4.480578693555458e-06, + "loss": 11.9335, + "step": 63950 + }, + { + "epoch": 27.814126596033688, + "grad_norm": 17.31736183166504, + "learning_rate": 4.476194651468655e-06, + "loss": 11.9164, + "step": 64000 + }, + { + "epoch": 27.835859820700897, + "grad_norm": 7.397292613983154, + "learning_rate": 4.47181060938185e-06, + "loss": 11.8935, + "step": 64050 + }, + { + "epoch": 27.857593045368105, + "grad_norm": 15.1404447555542, + "learning_rate": 4.467426567295046e-06, + "loss": 11.9156, + "step": 64100 + }, + { + "epoch": 27.879326270035317, + "grad_norm": 16.563631057739258, + "learning_rate": 4.4630425252082425e-06, + "loss": 11.8858, + "step": 64150 + }, + { + "epoch": 27.901059494702526, + "grad_norm": 10.400628089904785, + "learning_rate": 4.458658483121439e-06, + "loss": 11.9083, + "step": 64200 + }, + { + "epoch": 27.92279271936974, + "grad_norm": 8.129082679748535, + "learning_rate": 4.454274441034634e-06, + "loss": 11.9361, + "step": 64250 + }, + { + "epoch": 27.944525944036947, + "grad_norm": 22.946596145629883, + "learning_rate": 4.44989039894783e-06, + "loss": 11.8997, + "step": 64300 + }, + { + "epoch": 27.966259168704156, + "grad_norm": 17.139440536499023, + "learning_rate": 4.445506356861027e-06, + "loss": 11.9067, + "step": 64350 + }, + { + "epoch": 27.987992393371368, + "grad_norm": 8.700691223144531, + "learning_rate": 4.441122314774222e-06, + "loss": 11.8941, + "step": 64400 + }, + { + "epoch": 28.009562618853572, + "grad_norm": 9.634552001953125, + "learning_rate": 4.436738272687418e-06, + "loss": 11.7983, + "step": 64450 + }, + { + "epoch": 28.031295843520784, + "grad_norm": 12.564841270446777, + "learning_rate": 4.432354230600614e-06, + "loss": 11.8731, + "step": 64500 + }, + { + "epoch": 28.053029068187993, + "grad_norm": 10.420557022094727, + "learning_rate": 4.42797018851381e-06, + "loss": 11.896, + "step": 64550 + }, + { + "epoch": 28.0747622928552, + "grad_norm": 13.071510314941406, + "learning_rate": 4.423586146427006e-06, + "loss": 11.8855, + "step": 64600 + }, + { + "epoch": 28.096495517522413, + "grad_norm": 11.409537315368652, + "learning_rate": 4.419202104340202e-06, + "loss": 11.8987, + "step": 64650 + }, + { + "epoch": 28.118228742189622, + "grad_norm": 17.64859390258789, + "learning_rate": 4.4148180622533985e-06, + "loss": 11.8742, + "step": 64700 + }, + { + "epoch": 28.13996196685683, + "grad_norm": 8.101343154907227, + "learning_rate": 4.410434020166594e-06, + "loss": 11.8781, + "step": 64750 + }, + { + "epoch": 28.161695191524043, + "grad_norm": 11.35251522064209, + "learning_rate": 4.40604997807979e-06, + "loss": 11.8891, + "step": 64800 + }, + { + "epoch": 28.18342841619125, + "grad_norm": 19.521108627319336, + "learning_rate": 4.4016659359929855e-06, + "loss": 11.882, + "step": 64850 + }, + { + "epoch": 28.205161640858464, + "grad_norm": 14.904671669006348, + "learning_rate": 4.397281893906182e-06, + "loss": 11.8987, + "step": 64900 + }, + { + "epoch": 28.226894865525672, + "grad_norm": 11.82111644744873, + "learning_rate": 4.392897851819378e-06, + "loss": 11.8718, + "step": 64950 + }, + { + "epoch": 28.24862809019288, + "grad_norm": 7.986074924468994, + "learning_rate": 4.388513809732573e-06, + "loss": 11.8931, + "step": 65000 + }, + { + "epoch": 28.270361314860093, + "grad_norm": 10.135086059570312, + "learning_rate": 4.38412976764577e-06, + "loss": 11.8845, + "step": 65050 + }, + { + "epoch": 28.2920945395273, + "grad_norm": 9.275798797607422, + "learning_rate": 4.379745725558966e-06, + "loss": 11.8647, + "step": 65100 + }, + { + "epoch": 28.313827764194514, + "grad_norm": 7.864231586456299, + "learning_rate": 4.375361683472162e-06, + "loss": 11.9019, + "step": 65150 + }, + { + "epoch": 28.335560988861722, + "grad_norm": 37.51991653442383, + "learning_rate": 4.3709776413853575e-06, + "loss": 11.8779, + "step": 65200 + }, + { + "epoch": 28.35729421352893, + "grad_norm": 7.752624034881592, + "learning_rate": 4.366593599298554e-06, + "loss": 11.9023, + "step": 65250 + }, + { + "epoch": 28.379027438196143, + "grad_norm": 12.627674102783203, + "learning_rate": 4.36220955721175e-06, + "loss": 11.8921, + "step": 65300 + }, + { + "epoch": 28.40076066286335, + "grad_norm": 26.206846237182617, + "learning_rate": 4.357825515124945e-06, + "loss": 11.891, + "step": 65350 + }, + { + "epoch": 28.422493887530564, + "grad_norm": 18.58912467956543, + "learning_rate": 4.3534414730381416e-06, + "loss": 11.908, + "step": 65400 + }, + { + "epoch": 28.444227112197773, + "grad_norm": 16.89732551574707, + "learning_rate": 4.349057430951337e-06, + "loss": 11.8899, + "step": 65450 + }, + { + "epoch": 28.46596033686498, + "grad_norm": 7.8719964027404785, + "learning_rate": 4.344673388864534e-06, + "loss": 11.8946, + "step": 65500 + }, + { + "epoch": 28.487693561532193, + "grad_norm": 11.639144897460938, + "learning_rate": 4.3402893467777294e-06, + "loss": 11.9214, + "step": 65550 + }, + { + "epoch": 28.509426786199402, + "grad_norm": 29.2702579498291, + "learning_rate": 4.335905304690926e-06, + "loss": 11.9217, + "step": 65600 + }, + { + "epoch": 28.531160010866614, + "grad_norm": 48.321807861328125, + "learning_rate": 4.331521262604121e-06, + "loss": 11.889, + "step": 65650 + }, + { + "epoch": 28.552893235533823, + "grad_norm": 12.334220886230469, + "learning_rate": 4.327137220517317e-06, + "loss": 11.8814, + "step": 65700 + }, + { + "epoch": 28.57462646020103, + "grad_norm": 13.60355281829834, + "learning_rate": 4.3227531784305135e-06, + "loss": 11.9137, + "step": 65750 + }, + { + "epoch": 28.596359684868244, + "grad_norm": 12.374007225036621, + "learning_rate": 4.318369136343709e-06, + "loss": 11.9145, + "step": 65800 + }, + { + "epoch": 28.618092909535452, + "grad_norm": 15.23318862915039, + "learning_rate": 4.313985094256905e-06, + "loss": 11.8971, + "step": 65850 + }, + { + "epoch": 28.63982613420266, + "grad_norm": 8.697155952453613, + "learning_rate": 4.3096010521701005e-06, + "loss": 11.8899, + "step": 65900 + }, + { + "epoch": 28.661559358869873, + "grad_norm": 6.101230621337891, + "learning_rate": 4.305217010083298e-06, + "loss": 11.8835, + "step": 65950 + }, + { + "epoch": 28.68329258353708, + "grad_norm": 30.645008087158203, + "learning_rate": 4.300832967996493e-06, + "loss": 11.9118, + "step": 66000 + }, + { + "epoch": 28.705025808204294, + "grad_norm": 10.432790756225586, + "learning_rate": 4.296448925909689e-06, + "loss": 11.898, + "step": 66050 + }, + { + "epoch": 28.726759032871502, + "grad_norm": 22.726320266723633, + "learning_rate": 4.2920648838228855e-06, + "loss": 11.8829, + "step": 66100 + }, + { + "epoch": 28.74849225753871, + "grad_norm": 15.002222061157227, + "learning_rate": 4.287680841736081e-06, + "loss": 11.8922, + "step": 66150 + }, + { + "epoch": 28.770225482205923, + "grad_norm": 16.7822208404541, + "learning_rate": 4.283296799649277e-06, + "loss": 11.9015, + "step": 66200 + }, + { + "epoch": 28.79195870687313, + "grad_norm": 10.86782455444336, + "learning_rate": 4.2789127575624725e-06, + "loss": 11.9095, + "step": 66250 + }, + { + "epoch": 28.813691931540344, + "grad_norm": 14.24905776977539, + "learning_rate": 4.274528715475669e-06, + "loss": 11.8791, + "step": 66300 + }, + { + "epoch": 28.835425156207553, + "grad_norm": 8.511114120483398, + "learning_rate": 4.270144673388865e-06, + "loss": 11.8846, + "step": 66350 + }, + { + "epoch": 28.85715838087476, + "grad_norm": 10.261749267578125, + "learning_rate": 4.265760631302061e-06, + "loss": 11.9029, + "step": 66400 + }, + { + "epoch": 28.878891605541973, + "grad_norm": 48.72242736816406, + "learning_rate": 4.261376589215257e-06, + "loss": 11.9049, + "step": 66450 + }, + { + "epoch": 28.900624830209182, + "grad_norm": 10.668495178222656, + "learning_rate": 4.256992547128453e-06, + "loss": 11.8856, + "step": 66500 + }, + { + "epoch": 28.92235805487639, + "grad_norm": 7.709607124328613, + "learning_rate": 4.252608505041649e-06, + "loss": 11.8888, + "step": 66550 + }, + { + "epoch": 28.944091279543603, + "grad_norm": 30.70176124572754, + "learning_rate": 4.248224462954844e-06, + "loss": 11.9187, + "step": 66600 + }, + { + "epoch": 28.96582450421081, + "grad_norm": 13.879278182983398, + "learning_rate": 4.243840420868041e-06, + "loss": 11.8825, + "step": 66650 + }, + { + "epoch": 28.987557728878024, + "grad_norm": 7.8939714431762695, + "learning_rate": 4.239456378781237e-06, + "loss": 11.9042, + "step": 66700 + }, + { + "epoch": 29.009127954360228, + "grad_norm": 16.196550369262695, + "learning_rate": 4.235072336694432e-06, + "loss": 11.8091, + "step": 66750 + }, + { + "epoch": 29.03086117902744, + "grad_norm": 10.502305030822754, + "learning_rate": 4.2306882946076285e-06, + "loss": 11.8647, + "step": 66800 + }, + { + "epoch": 29.05259440369465, + "grad_norm": 28.054792404174805, + "learning_rate": 4.226304252520825e-06, + "loss": 11.8775, + "step": 66850 + }, + { + "epoch": 29.074327628361857, + "grad_norm": 5.852464199066162, + "learning_rate": 4.221920210434021e-06, + "loss": 11.8717, + "step": 66900 + }, + { + "epoch": 29.09606085302907, + "grad_norm": 10.438371658325195, + "learning_rate": 4.217536168347216e-06, + "loss": 11.8783, + "step": 66950 + }, + { + "epoch": 29.117794077696278, + "grad_norm": 5.391887664794922, + "learning_rate": 4.213152126260413e-06, + "loss": 11.8597, + "step": 67000 + }, + { + "epoch": 29.139527302363486, + "grad_norm": 15.71295166015625, + "learning_rate": 4.208768084173608e-06, + "loss": 11.8726, + "step": 67050 + }, + { + "epoch": 29.1612605270307, + "grad_norm": 15.637112617492676, + "learning_rate": 4.204384042086804e-06, + "loss": 11.8549, + "step": 67100 + }, + { + "epoch": 29.182993751697907, + "grad_norm": 16.201160430908203, + "learning_rate": 4.2000000000000004e-06, + "loss": 11.8751, + "step": 67150 + }, + { + "epoch": 29.20472697636512, + "grad_norm": 18.363697052001953, + "learning_rate": 4.195615957913196e-06, + "loss": 11.9021, + "step": 67200 + }, + { + "epoch": 29.226460201032328, + "grad_norm": 15.013435363769531, + "learning_rate": 4.191231915826393e-06, + "loss": 11.8747, + "step": 67250 + }, + { + "epoch": 29.248193425699537, + "grad_norm": 14.785465240478516, + "learning_rate": 4.186847873739588e-06, + "loss": 11.8775, + "step": 67300 + }, + { + "epoch": 29.26992665036675, + "grad_norm": 13.100189208984375, + "learning_rate": 4.1824638316527845e-06, + "loss": 11.8587, + "step": 67350 + }, + { + "epoch": 29.291659875033957, + "grad_norm": 9.864031791687012, + "learning_rate": 4.17807978956598e-06, + "loss": 11.9118, + "step": 67400 + }, + { + "epoch": 29.31339309970117, + "grad_norm": 19.341495513916016, + "learning_rate": 4.173695747479176e-06, + "loss": 11.8819, + "step": 67450 + }, + { + "epoch": 29.335126324368378, + "grad_norm": 7.35308837890625, + "learning_rate": 4.169311705392372e-06, + "loss": 11.8731, + "step": 67500 + }, + { + "epoch": 29.356859549035587, + "grad_norm": 8.811240196228027, + "learning_rate": 4.164927663305568e-06, + "loss": 11.8819, + "step": 67550 + }, + { + "epoch": 29.3785927737028, + "grad_norm": 9.851766586303711, + "learning_rate": 4.160543621218764e-06, + "loss": 11.8942, + "step": 67600 + }, + { + "epoch": 29.400325998370008, + "grad_norm": 14.708338737487793, + "learning_rate": 4.156159579131959e-06, + "loss": 11.8899, + "step": 67650 + }, + { + "epoch": 29.42205922303722, + "grad_norm": 11.063777923583984, + "learning_rate": 4.1517755370451565e-06, + "loss": 11.8602, + "step": 67700 + }, + { + "epoch": 29.44379244770443, + "grad_norm": 11.282812118530273, + "learning_rate": 4.147391494958352e-06, + "loss": 11.8651, + "step": 67750 + }, + { + "epoch": 29.465525672371637, + "grad_norm": 258.5189514160156, + "learning_rate": 4.143007452871548e-06, + "loss": 11.8813, + "step": 67800 + }, + { + "epoch": 29.48725889703885, + "grad_norm": 17.533771514892578, + "learning_rate": 4.138623410784744e-06, + "loss": 11.8777, + "step": 67850 + }, + { + "epoch": 29.508992121706058, + "grad_norm": 9.061328887939453, + "learning_rate": 4.13423936869794e-06, + "loss": 11.863, + "step": 67900 + }, + { + "epoch": 29.530725346373266, + "grad_norm": 14.129364013671875, + "learning_rate": 4.129855326611136e-06, + "loss": 11.8837, + "step": 67950 + }, + { + "epoch": 29.55245857104048, + "grad_norm": 21.77886390686035, + "learning_rate": 4.125471284524331e-06, + "loss": 11.8897, + "step": 68000 + }, + { + "epoch": 29.574191795707687, + "grad_norm": 8.441765785217285, + "learning_rate": 4.1210872424375276e-06, + "loss": 11.9048, + "step": 68050 + }, + { + "epoch": 29.5959250203749, + "grad_norm": 11.595650672912598, + "learning_rate": 4.116703200350724e-06, + "loss": 11.8899, + "step": 68100 + }, + { + "epoch": 29.617658245042108, + "grad_norm": 16.048147201538086, + "learning_rate": 4.11231915826392e-06, + "loss": 11.8787, + "step": 68150 + }, + { + "epoch": 29.639391469709317, + "grad_norm": 9.9227294921875, + "learning_rate": 4.1079351161771154e-06, + "loss": 11.8748, + "step": 68200 + }, + { + "epoch": 29.66112469437653, + "grad_norm": 9.97187614440918, + "learning_rate": 4.103551074090312e-06, + "loss": 11.8759, + "step": 68250 + }, + { + "epoch": 29.682857919043737, + "grad_norm": 18.43181610107422, + "learning_rate": 4.099167032003508e-06, + "loss": 11.8683, + "step": 68300 + }, + { + "epoch": 29.70459114371095, + "grad_norm": 18.20121192932129, + "learning_rate": 4.094782989916703e-06, + "loss": 11.8865, + "step": 68350 + }, + { + "epoch": 29.726324368378158, + "grad_norm": 6.934305667877197, + "learning_rate": 4.0903989478298995e-06, + "loss": 11.8502, + "step": 68400 + }, + { + "epoch": 29.748057593045367, + "grad_norm": 12.715697288513184, + "learning_rate": 4.086014905743096e-06, + "loss": 11.8733, + "step": 68450 + }, + { + "epoch": 29.76979081771258, + "grad_norm": 9.016664505004883, + "learning_rate": 4.081630863656291e-06, + "loss": 11.8775, + "step": 68500 + }, + { + "epoch": 29.791524042379788, + "grad_norm": 7.763296127319336, + "learning_rate": 4.077246821569487e-06, + "loss": 11.8733, + "step": 68550 + }, + { + "epoch": 29.813257267047, + "grad_norm": 10.350701332092285, + "learning_rate": 4.072862779482684e-06, + "loss": 11.8632, + "step": 68600 + }, + { + "epoch": 29.83499049171421, + "grad_norm": 6.480827331542969, + "learning_rate": 4.06847873739588e-06, + "loss": 11.8788, + "step": 68650 + }, + { + "epoch": 29.856723716381417, + "grad_norm": 20.947677612304688, + "learning_rate": 4.064094695309075e-06, + "loss": 11.8773, + "step": 68700 + }, + { + "epoch": 29.87845694104863, + "grad_norm": 10.931136131286621, + "learning_rate": 4.0597106532222715e-06, + "loss": 11.8612, + "step": 68750 + }, + { + "epoch": 29.900190165715838, + "grad_norm": 10.79286003112793, + "learning_rate": 4.055326611135467e-06, + "loss": 11.8702, + "step": 68800 + }, + { + "epoch": 29.921923390383046, + "grad_norm": 45.66188049316406, + "learning_rate": 4.050942569048663e-06, + "loss": 11.8613, + "step": 68850 + }, + { + "epoch": 29.94365661505026, + "grad_norm": 6.688445091247559, + "learning_rate": 4.046558526961859e-06, + "loss": 11.8739, + "step": 68900 + }, + { + "epoch": 29.965389839717467, + "grad_norm": 14.173410415649414, + "learning_rate": 4.042174484875055e-06, + "loss": 11.8595, + "step": 68950 + }, + { + "epoch": 29.98712306438468, + "grad_norm": 13.653775215148926, + "learning_rate": 4.037790442788251e-06, + "loss": 11.8927, + "step": 69000 + }, + { + "epoch": 30.008693289866883, + "grad_norm": 9.138008117675781, + "learning_rate": 4.033406400701447e-06, + "loss": 11.7683, + "step": 69050 + }, + { + "epoch": 30.030426514534096, + "grad_norm": 16.62093162536621, + "learning_rate": 4.029022358614643e-06, + "loss": 11.8934, + "step": 69100 + }, + { + "epoch": 30.052159739201304, + "grad_norm": 7.672760486602783, + "learning_rate": 4.024638316527839e-06, + "loss": 11.8631, + "step": 69150 + }, + { + "epoch": 30.073892963868513, + "grad_norm": 8.038310050964355, + "learning_rate": 4.020254274441035e-06, + "loss": 11.8599, + "step": 69200 + }, + { + "epoch": 30.095626188535725, + "grad_norm": 10.817283630371094, + "learning_rate": 4.015870232354231e-06, + "loss": 11.8627, + "step": 69250 + }, + { + "epoch": 30.117359413202934, + "grad_norm": 6.556225299835205, + "learning_rate": 4.011486190267427e-06, + "loss": 11.8671, + "step": 69300 + }, + { + "epoch": 30.139092637870142, + "grad_norm": 16.242650985717773, + "learning_rate": 4.007102148180623e-06, + "loss": 11.875, + "step": 69350 + }, + { + "epoch": 30.160825862537354, + "grad_norm": 5.174230575561523, + "learning_rate": 4.002718106093818e-06, + "loss": 11.8433, + "step": 69400 + }, + { + "epoch": 30.182559087204563, + "grad_norm": 7.197856426239014, + "learning_rate": 3.998334064007015e-06, + "loss": 11.8622, + "step": 69450 + }, + { + "epoch": 30.204292311871775, + "grad_norm": 21.63473892211914, + "learning_rate": 3.993950021920211e-06, + "loss": 11.8656, + "step": 69500 + }, + { + "epoch": 30.226025536538984, + "grad_norm": 17.78504753112793, + "learning_rate": 3.989565979833407e-06, + "loss": 11.8521, + "step": 69550 + }, + { + "epoch": 30.247758761206192, + "grad_norm": 18.68705940246582, + "learning_rate": 3.985181937746602e-06, + "loss": 11.8453, + "step": 69600 + }, + { + "epoch": 30.269491985873405, + "grad_norm": 7.354127407073975, + "learning_rate": 3.980797895659799e-06, + "loss": 11.8665, + "step": 69650 + }, + { + "epoch": 30.291225210540613, + "grad_norm": 7.651024341583252, + "learning_rate": 3.976413853572995e-06, + "loss": 11.853, + "step": 69700 + }, + { + "epoch": 30.312958435207825, + "grad_norm": 8.84490966796875, + "learning_rate": 3.97202981148619e-06, + "loss": 11.8501, + "step": 69750 + }, + { + "epoch": 30.334691659875034, + "grad_norm": 8.941247940063477, + "learning_rate": 3.9676457693993865e-06, + "loss": 11.8615, + "step": 69800 + }, + { + "epoch": 30.356424884542243, + "grad_norm": 13.154361724853516, + "learning_rate": 3.963261727312583e-06, + "loss": 11.8584, + "step": 69850 + }, + { + "epoch": 30.378158109209455, + "grad_norm": 13.795583724975586, + "learning_rate": 3.958877685225779e-06, + "loss": 11.8604, + "step": 69900 + }, + { + "epoch": 30.399891333876663, + "grad_norm": 8.268631935119629, + "learning_rate": 3.954493643138974e-06, + "loss": 11.859, + "step": 69950 + }, + { + "epoch": 30.421624558543876, + "grad_norm": 28.959548950195312, + "learning_rate": 3.9501096010521705e-06, + "loss": 11.8702, + "step": 70000 + }, + { + "epoch": 30.421624558543876, + "eval_cer": 0.0757471023169121, + "eval_loss": 2.39117431640625, + "eval_runtime": 396.909, + "eval_samples_per_second": 13.62, + "eval_steps_per_second": 3.406, + "eval_wer": 0.22849740932642487, + "step": 70000 + }, + { + "epoch": 30.443357783211084, + "grad_norm": 15.879110336303711, + "learning_rate": 3.945725558965367e-06, + "loss": 11.8687, + "step": 70050 + }, + { + "epoch": 30.465091007878293, + "grad_norm": 14.088164329528809, + "learning_rate": 3.941341516878562e-06, + "loss": 11.85, + "step": 70100 + }, + { + "epoch": 30.486824232545505, + "grad_norm": 5.0238752365112305, + "learning_rate": 3.936957474791758e-06, + "loss": 11.8533, + "step": 70150 + }, + { + "epoch": 30.508557457212714, + "grad_norm": 11.336899757385254, + "learning_rate": 3.932573432704954e-06, + "loss": 11.8699, + "step": 70200 + }, + { + "epoch": 30.530290681879922, + "grad_norm": 17.313730239868164, + "learning_rate": 3.92818939061815e-06, + "loss": 11.8562, + "step": 70250 + }, + { + "epoch": 30.552023906547134, + "grad_norm": 28.565584182739258, + "learning_rate": 3.923805348531346e-06, + "loss": 11.8547, + "step": 70300 + }, + { + "epoch": 30.573757131214343, + "grad_norm": 6.773772239685059, + "learning_rate": 3.9194213064445425e-06, + "loss": 11.8538, + "step": 70350 + }, + { + "epoch": 30.595490355881555, + "grad_norm": 15.116411209106445, + "learning_rate": 3.915037264357739e-06, + "loss": 11.8638, + "step": 70400 + }, + { + "epoch": 30.617223580548764, + "grad_norm": 9.379572868347168, + "learning_rate": 3.910653222270934e-06, + "loss": 11.8757, + "step": 70450 + }, + { + "epoch": 30.638956805215972, + "grad_norm": 12.259918212890625, + "learning_rate": 3.90626918018413e-06, + "loss": 11.876, + "step": 70500 + }, + { + "epoch": 30.660690029883185, + "grad_norm": 12.57608699798584, + "learning_rate": 3.901885138097326e-06, + "loss": 11.8507, + "step": 70550 + }, + { + "epoch": 30.682423254550393, + "grad_norm": 8.661283493041992, + "learning_rate": 3.897501096010522e-06, + "loss": 11.8491, + "step": 70600 + }, + { + "epoch": 30.704156479217605, + "grad_norm": 9.84383773803711, + "learning_rate": 3.893117053923718e-06, + "loss": 11.8489, + "step": 70650 + }, + { + "epoch": 30.725889703884814, + "grad_norm": 9.917572975158691, + "learning_rate": 3.888733011836914e-06, + "loss": 11.8785, + "step": 70700 + }, + { + "epoch": 30.747622928552023, + "grad_norm": 7.059745788574219, + "learning_rate": 3.88434896975011e-06, + "loss": 11.8737, + "step": 70750 + }, + { + "epoch": 30.769356153219235, + "grad_norm": 20.44463348388672, + "learning_rate": 3.879964927663306e-06, + "loss": 11.8699, + "step": 70800 + }, + { + "epoch": 30.791089377886443, + "grad_norm": 6.311903476715088, + "learning_rate": 3.875580885576502e-06, + "loss": 11.8542, + "step": 70850 + }, + { + "epoch": 30.812822602553656, + "grad_norm": 6.262167930603027, + "learning_rate": 3.871196843489698e-06, + "loss": 11.8626, + "step": 70900 + }, + { + "epoch": 30.834555827220864, + "grad_norm": 8.859283447265625, + "learning_rate": 3.866812801402894e-06, + "loss": 11.8909, + "step": 70950 + }, + { + "epoch": 30.856289051888073, + "grad_norm": 6.593499660491943, + "learning_rate": 3.86242875931609e-06, + "loss": 11.8474, + "step": 71000 + }, + { + "epoch": 30.878022276555285, + "grad_norm": 16.074264526367188, + "learning_rate": 3.8580447172292855e-06, + "loss": 11.8634, + "step": 71050 + }, + { + "epoch": 30.899755501222494, + "grad_norm": 16.934633255004883, + "learning_rate": 3.853660675142482e-06, + "loss": 11.8481, + "step": 71100 + }, + { + "epoch": 30.921488725889702, + "grad_norm": 11.176169395446777, + "learning_rate": 3.849276633055677e-06, + "loss": 11.8678, + "step": 71150 + }, + { + "epoch": 30.943221950556914, + "grad_norm": 13.823466300964355, + "learning_rate": 3.844892590968873e-06, + "loss": 11.8525, + "step": 71200 + }, + { + "epoch": 30.964955175224123, + "grad_norm": 12.757974624633789, + "learning_rate": 3.84050854888207e-06, + "loss": 11.8596, + "step": 71250 + }, + { + "epoch": 30.986688399891335, + "grad_norm": 6.2555108070373535, + "learning_rate": 3.836124506795266e-06, + "loss": 11.8729, + "step": 71300 + }, + { + "epoch": 31.00825862537354, + "grad_norm": 7.998335361480713, + "learning_rate": 3.831740464708461e-06, + "loss": 11.7557, + "step": 71350 + }, + { + "epoch": 31.02999185004075, + "grad_norm": 7.063460826873779, + "learning_rate": 3.8273564226216575e-06, + "loss": 11.8673, + "step": 71400 + }, + { + "epoch": 31.05172507470796, + "grad_norm": 7.559152126312256, + "learning_rate": 3.822972380534854e-06, + "loss": 11.8614, + "step": 71450 + }, + { + "epoch": 31.07345829937517, + "grad_norm": 9.765264511108398, + "learning_rate": 3.818588338448049e-06, + "loss": 11.8243, + "step": 71500 + }, + { + "epoch": 31.09519152404238, + "grad_norm": 8.741211891174316, + "learning_rate": 3.8142042963612453e-06, + "loss": 11.8631, + "step": 71550 + }, + { + "epoch": 31.11692474870959, + "grad_norm": 10.110342025756836, + "learning_rate": 3.809820254274441e-06, + "loss": 11.8624, + "step": 71600 + }, + { + "epoch": 31.138657973376798, + "grad_norm": 7.525726318359375, + "learning_rate": 3.805436212187637e-06, + "loss": 11.8511, + "step": 71650 + }, + { + "epoch": 31.16039119804401, + "grad_norm": 6.264368057250977, + "learning_rate": 3.8010521701008336e-06, + "loss": 11.8447, + "step": 71700 + }, + { + "epoch": 31.18212442271122, + "grad_norm": 6.522670745849609, + "learning_rate": 3.7966681280140294e-06, + "loss": 11.8619, + "step": 71750 + }, + { + "epoch": 31.20385764737843, + "grad_norm": 17.985116958618164, + "learning_rate": 3.7922840859272252e-06, + "loss": 11.8723, + "step": 71800 + }, + { + "epoch": 31.22559087204564, + "grad_norm": 10.094488143920898, + "learning_rate": 3.787900043840421e-06, + "loss": 11.8429, + "step": 71850 + }, + { + "epoch": 31.24732409671285, + "grad_norm": 12.937264442443848, + "learning_rate": 3.7835160017536173e-06, + "loss": 11.8569, + "step": 71900 + }, + { + "epoch": 31.26905732138006, + "grad_norm": 20.594358444213867, + "learning_rate": 3.779131959666813e-06, + "loss": 11.8438, + "step": 71950 + }, + { + "epoch": 31.29079054604727, + "grad_norm": 10.052034378051758, + "learning_rate": 3.774747917580009e-06, + "loss": 11.8419, + "step": 72000 + }, + { + "epoch": 31.31252377071448, + "grad_norm": 8.929048538208008, + "learning_rate": 3.7703638754932047e-06, + "loss": 11.8299, + "step": 72050 + }, + { + "epoch": 31.33425699538169, + "grad_norm": 9.807400703430176, + "learning_rate": 3.7659798334064014e-06, + "loss": 11.8243, + "step": 72100 + }, + { + "epoch": 31.3559902200489, + "grad_norm": 17.955623626708984, + "learning_rate": 3.761595791319597e-06, + "loss": 11.8255, + "step": 72150 + }, + { + "epoch": 31.37772344471611, + "grad_norm": 19.642745971679688, + "learning_rate": 3.757211749232793e-06, + "loss": 11.8498, + "step": 72200 + }, + { + "epoch": 31.39945666938332, + "grad_norm": 8.74807357788086, + "learning_rate": 3.752827707145989e-06, + "loss": 11.8492, + "step": 72250 + }, + { + "epoch": 31.42118989405053, + "grad_norm": 8.516878128051758, + "learning_rate": 3.748443665059185e-06, + "loss": 11.8481, + "step": 72300 + }, + { + "epoch": 31.44292311871774, + "grad_norm": 26.898788452148438, + "learning_rate": 3.744059622972381e-06, + "loss": 11.8371, + "step": 72350 + }, + { + "epoch": 31.46465634338495, + "grad_norm": 6.748674392700195, + "learning_rate": 3.7396755808855766e-06, + "loss": 11.8486, + "step": 72400 + }, + { + "epoch": 31.48638956805216, + "grad_norm": 10.551872253417969, + "learning_rate": 3.7352915387987725e-06, + "loss": 11.8533, + "step": 72450 + }, + { + "epoch": 31.50812279271937, + "grad_norm": 14.1845703125, + "learning_rate": 3.7309074967119687e-06, + "loss": 11.8414, + "step": 72500 + }, + { + "epoch": 31.529856017386578, + "grad_norm": 16.51775360107422, + "learning_rate": 3.726523454625165e-06, + "loss": 11.8535, + "step": 72550 + }, + { + "epoch": 31.55158924205379, + "grad_norm": 24.120222091674805, + "learning_rate": 3.7221394125383607e-06, + "loss": 11.8557, + "step": 72600 + }, + { + "epoch": 31.573322466721, + "grad_norm": 6.063103199005127, + "learning_rate": 3.717755370451557e-06, + "loss": 11.8375, + "step": 72650 + }, + { + "epoch": 31.59505569138821, + "grad_norm": 11.34897232055664, + "learning_rate": 3.7133713283647528e-06, + "loss": 11.8438, + "step": 72700 + }, + { + "epoch": 31.61678891605542, + "grad_norm": 9.746992111206055, + "learning_rate": 3.7089872862779486e-06, + "loss": 11.8471, + "step": 72750 + }, + { + "epoch": 31.63852214072263, + "grad_norm": 8.114310264587402, + "learning_rate": 3.7046032441911444e-06, + "loss": 11.8412, + "step": 72800 + }, + { + "epoch": 31.66025536538984, + "grad_norm": 8.393730163574219, + "learning_rate": 3.70021920210434e-06, + "loss": 11.837, + "step": 72850 + }, + { + "epoch": 31.68198859005705, + "grad_norm": 8.245162963867188, + "learning_rate": 3.6958351600175364e-06, + "loss": 11.8553, + "step": 72900 + }, + { + "epoch": 31.70372181472426, + "grad_norm": 7.575582981109619, + "learning_rate": 3.6914511179307323e-06, + "loss": 11.845, + "step": 72950 + }, + { + "epoch": 31.72545503939147, + "grad_norm": 7.178465366363525, + "learning_rate": 3.6870670758439285e-06, + "loss": 11.8327, + "step": 73000 + }, + { + "epoch": 31.74718826405868, + "grad_norm": 8.260749816894531, + "learning_rate": 3.6826830337571247e-06, + "loss": 11.8478, + "step": 73050 + }, + { + "epoch": 31.76892148872589, + "grad_norm": 45.3736457824707, + "learning_rate": 3.6782989916703205e-06, + "loss": 11.8445, + "step": 73100 + }, + { + "epoch": 31.7906547133931, + "grad_norm": 15.68336296081543, + "learning_rate": 3.6739149495835163e-06, + "loss": 11.8551, + "step": 73150 + }, + { + "epoch": 31.81238793806031, + "grad_norm": 5.821103572845459, + "learning_rate": 3.669530907496712e-06, + "loss": 11.8517, + "step": 73200 + }, + { + "epoch": 31.83412116272752, + "grad_norm": 12.418885231018066, + "learning_rate": 3.665146865409908e-06, + "loss": 11.8517, + "step": 73250 + }, + { + "epoch": 31.85585438739473, + "grad_norm": 8.705698013305664, + "learning_rate": 3.660762823323104e-06, + "loss": 11.8424, + "step": 73300 + }, + { + "epoch": 31.87758761206194, + "grad_norm": 9.667759895324707, + "learning_rate": 3.6563787812363e-06, + "loss": 11.8561, + "step": 73350 + }, + { + "epoch": 31.89932083672915, + "grad_norm": 14.76951789855957, + "learning_rate": 3.651994739149496e-06, + "loss": 11.8605, + "step": 73400 + }, + { + "epoch": 31.921054061396358, + "grad_norm": 14.691853523254395, + "learning_rate": 3.6476106970626925e-06, + "loss": 11.8595, + "step": 73450 + }, + { + "epoch": 31.94278728606357, + "grad_norm": 7.9246721267700195, + "learning_rate": 3.6432266549758883e-06, + "loss": 11.8473, + "step": 73500 + }, + { + "epoch": 31.96452051073078, + "grad_norm": 5.882972240447998, + "learning_rate": 3.638842612889084e-06, + "loss": 11.8418, + "step": 73550 + }, + { + "epoch": 31.98625373539799, + "grad_norm": 6.664644718170166, + "learning_rate": 3.63445857080228e-06, + "loss": 11.8388, + "step": 73600 + }, + { + "epoch": 32.007823960880195, + "grad_norm": 7.938138961791992, + "learning_rate": 3.630074528715476e-06, + "loss": 11.7538, + "step": 73650 + }, + { + "epoch": 32.029557185547404, + "grad_norm": 8.011933326721191, + "learning_rate": 3.625690486628672e-06, + "loss": 11.8182, + "step": 73700 + }, + { + "epoch": 32.05129041021461, + "grad_norm": 11.604764938354492, + "learning_rate": 3.6213064445418678e-06, + "loss": 11.8178, + "step": 73750 + }, + { + "epoch": 32.07302363488183, + "grad_norm": 13.241369247436523, + "learning_rate": 3.6169224024550636e-06, + "loss": 11.8383, + "step": 73800 + }, + { + "epoch": 32.09475685954904, + "grad_norm": 5.11595344543457, + "learning_rate": 3.6125383603682594e-06, + "loss": 11.8519, + "step": 73850 + }, + { + "epoch": 32.116490084216245, + "grad_norm": 17.570140838623047, + "learning_rate": 3.608154318281456e-06, + "loss": 11.8329, + "step": 73900 + }, + { + "epoch": 32.138223308883454, + "grad_norm": 10.764384269714355, + "learning_rate": 3.603770276194652e-06, + "loss": 11.8312, + "step": 73950 + }, + { + "epoch": 32.15995653355066, + "grad_norm": 21.758943557739258, + "learning_rate": 3.5993862341078477e-06, + "loss": 11.8513, + "step": 74000 + }, + { + "epoch": 32.18168975821788, + "grad_norm": 6.227720260620117, + "learning_rate": 3.595002192021044e-06, + "loss": 11.8468, + "step": 74050 + }, + { + "epoch": 32.20342298288509, + "grad_norm": 6.502994537353516, + "learning_rate": 3.5906181499342397e-06, + "loss": 11.8287, + "step": 74100 + }, + { + "epoch": 32.225156207552295, + "grad_norm": 8.124176025390625, + "learning_rate": 3.5862341078474355e-06, + "loss": 11.8244, + "step": 74150 + }, + { + "epoch": 32.246889432219504, + "grad_norm": 17.224422454833984, + "learning_rate": 3.5818500657606313e-06, + "loss": 11.8529, + "step": 74200 + }, + { + "epoch": 32.26862265688671, + "grad_norm": 16.075273513793945, + "learning_rate": 3.577466023673827e-06, + "loss": 11.8337, + "step": 74250 + }, + { + "epoch": 32.29035588155393, + "grad_norm": 10.724888801574707, + "learning_rate": 3.573081981587024e-06, + "loss": 11.8234, + "step": 74300 + }, + { + "epoch": 32.31208910622114, + "grad_norm": 13.913077354431152, + "learning_rate": 3.5686979395002196e-06, + "loss": 11.824, + "step": 74350 + }, + { + "epoch": 32.333822330888346, + "grad_norm": 5.98539924621582, + "learning_rate": 3.5643138974134154e-06, + "loss": 11.8469, + "step": 74400 + }, + { + "epoch": 32.355555555555554, + "grad_norm": 16.95889663696289, + "learning_rate": 3.5599298553266117e-06, + "loss": 11.8407, + "step": 74450 + }, + { + "epoch": 32.37728878022276, + "grad_norm": 11.7858304977417, + "learning_rate": 3.5555458132398075e-06, + "loss": 11.8458, + "step": 74500 + }, + { + "epoch": 32.39902200488998, + "grad_norm": 12.35476303100586, + "learning_rate": 3.5511617711530033e-06, + "loss": 11.8322, + "step": 74550 + }, + { + "epoch": 32.42075522955719, + "grad_norm": 8.592928886413574, + "learning_rate": 3.546777729066199e-06, + "loss": 11.8464, + "step": 74600 + }, + { + "epoch": 32.442488454224396, + "grad_norm": 15.99875259399414, + "learning_rate": 3.5423936869793953e-06, + "loss": 11.8412, + "step": 74650 + }, + { + "epoch": 32.464221678891604, + "grad_norm": 6.029876232147217, + "learning_rate": 3.538009644892591e-06, + "loss": 11.8529, + "step": 74700 + }, + { + "epoch": 32.48595490355881, + "grad_norm": 25.144210815429688, + "learning_rate": 3.5336256028057874e-06, + "loss": 11.8538, + "step": 74750 + }, + { + "epoch": 32.50768812822603, + "grad_norm": 4.607775688171387, + "learning_rate": 3.529241560718983e-06, + "loss": 11.8652, + "step": 74800 + }, + { + "epoch": 32.52942135289324, + "grad_norm": 9.385605812072754, + "learning_rate": 3.5248575186321794e-06, + "loss": 11.8256, + "step": 74850 + }, + { + "epoch": 32.551154577560446, + "grad_norm": 8.230783462524414, + "learning_rate": 3.5204734765453752e-06, + "loss": 11.8065, + "step": 74900 + }, + { + "epoch": 32.572887802227655, + "grad_norm": 38.624691009521484, + "learning_rate": 3.516089434458571e-06, + "loss": 11.8167, + "step": 74950 + }, + { + "epoch": 32.59462102689486, + "grad_norm": 17.61267852783203, + "learning_rate": 3.511705392371767e-06, + "loss": 11.8357, + "step": 75000 + }, + { + "epoch": 32.61635425156208, + "grad_norm": 6.209005355834961, + "learning_rate": 3.507321350284963e-06, + "loss": 11.8375, + "step": 75050 + }, + { + "epoch": 32.63808747622929, + "grad_norm": 14.121482849121094, + "learning_rate": 3.502937308198159e-06, + "loss": 11.8476, + "step": 75100 + }, + { + "epoch": 32.659820700896496, + "grad_norm": 28.74132537841797, + "learning_rate": 3.4985532661113547e-06, + "loss": 11.8317, + "step": 75150 + }, + { + "epoch": 32.681553925563705, + "grad_norm": 6.806987762451172, + "learning_rate": 3.4941692240245514e-06, + "loss": 11.8527, + "step": 75200 + }, + { + "epoch": 32.70328715023091, + "grad_norm": 7.174561500549316, + "learning_rate": 3.489785181937747e-06, + "loss": 11.8188, + "step": 75250 + }, + { + "epoch": 32.72502037489812, + "grad_norm": 13.119464874267578, + "learning_rate": 3.485401139850943e-06, + "loss": 11.8392, + "step": 75300 + }, + { + "epoch": 32.74675359956534, + "grad_norm": 8.41006851196289, + "learning_rate": 3.4810170977641388e-06, + "loss": 11.8283, + "step": 75350 + }, + { + "epoch": 32.768486824232546, + "grad_norm": 10.47354507446289, + "learning_rate": 3.4766330556773346e-06, + "loss": 11.8295, + "step": 75400 + }, + { + "epoch": 32.790220048899755, + "grad_norm": 7.730106353759766, + "learning_rate": 3.472249013590531e-06, + "loss": 11.8672, + "step": 75450 + }, + { + "epoch": 32.811953273566964, + "grad_norm": 6.337311744689941, + "learning_rate": 3.4678649715037266e-06, + "loss": 11.8289, + "step": 75500 + }, + { + "epoch": 32.83368649823417, + "grad_norm": 9.5441255569458, + "learning_rate": 3.4634809294169225e-06, + "loss": 11.8215, + "step": 75550 + }, + { + "epoch": 32.85541972290139, + "grad_norm": 8.01675796508789, + "learning_rate": 3.4590968873301183e-06, + "loss": 11.8303, + "step": 75600 + }, + { + "epoch": 32.8771529475686, + "grad_norm": 10.308701515197754, + "learning_rate": 3.454712845243315e-06, + "loss": 11.8337, + "step": 75650 + }, + { + "epoch": 32.898886172235805, + "grad_norm": 8.78437614440918, + "learning_rate": 3.4503288031565107e-06, + "loss": 11.8283, + "step": 75700 + }, + { + "epoch": 32.920619396903014, + "grad_norm": 12.1674222946167, + "learning_rate": 3.4459447610697065e-06, + "loss": 11.8378, + "step": 75750 + }, + { + "epoch": 32.94235262157022, + "grad_norm": 11.723808288574219, + "learning_rate": 3.4415607189829024e-06, + "loss": 11.8242, + "step": 75800 + }, + { + "epoch": 32.96408584623744, + "grad_norm": 18.23768424987793, + "learning_rate": 3.4371766768960986e-06, + "loss": 11.8577, + "step": 75850 + }, + { + "epoch": 32.98581907090465, + "grad_norm": 23.5877742767334, + "learning_rate": 3.4327926348092944e-06, + "loss": 11.8222, + "step": 75900 + }, + { + "epoch": 33.007389296386854, + "grad_norm": 6.948608875274658, + "learning_rate": 3.42840859272249e-06, + "loss": 11.7388, + "step": 75950 + }, + { + "epoch": 33.02912252105406, + "grad_norm": 4.4768476486206055, + "learning_rate": 3.424024550635686e-06, + "loss": 11.8317, + "step": 76000 + }, + { + "epoch": 33.05085574572127, + "grad_norm": 7.8470282554626465, + "learning_rate": 3.4196405085488823e-06, + "loss": 11.8416, + "step": 76050 + }, + { + "epoch": 33.07258897038848, + "grad_norm": 7.3259053230285645, + "learning_rate": 3.4152564664620785e-06, + "loss": 11.8322, + "step": 76100 + }, + { + "epoch": 33.09432219505569, + "grad_norm": 16.797231674194336, + "learning_rate": 3.4108724243752743e-06, + "loss": 11.8436, + "step": 76150 + }, + { + "epoch": 33.116055419722905, + "grad_norm": 4.982487201690674, + "learning_rate": 3.4064883822884705e-06, + "loss": 11.799, + "step": 76200 + }, + { + "epoch": 33.13778864439011, + "grad_norm": 8.252666473388672, + "learning_rate": 3.4021043402016663e-06, + "loss": 11.8154, + "step": 76250 + }, + { + "epoch": 33.15952186905732, + "grad_norm": 9.021413803100586, + "learning_rate": 3.397720298114862e-06, + "loss": 11.8364, + "step": 76300 + }, + { + "epoch": 33.18125509372453, + "grad_norm": 4.675612926483154, + "learning_rate": 3.393336256028058e-06, + "loss": 11.8131, + "step": 76350 + }, + { + "epoch": 33.20298831839174, + "grad_norm": 8.468708992004395, + "learning_rate": 3.3889522139412538e-06, + "loss": 11.8201, + "step": 76400 + }, + { + "epoch": 33.224721543058955, + "grad_norm": 21.99992561340332, + "learning_rate": 3.38456817185445e-06, + "loss": 11.8318, + "step": 76450 + }, + { + "epoch": 33.24645476772616, + "grad_norm": 5.2964301109313965, + "learning_rate": 3.3801841297676462e-06, + "loss": 11.8196, + "step": 76500 + }, + { + "epoch": 33.26818799239337, + "grad_norm": 12.34626579284668, + "learning_rate": 3.375800087680842e-06, + "loss": 11.8333, + "step": 76550 + }, + { + "epoch": 33.28992121706058, + "grad_norm": 12.113372802734375, + "learning_rate": 3.3714160455940383e-06, + "loss": 11.8187, + "step": 76600 + }, + { + "epoch": 33.31165444172779, + "grad_norm": 15.364481925964355, + "learning_rate": 3.367032003507234e-06, + "loss": 11.8173, + "step": 76650 + }, + { + "epoch": 33.333387666395, + "grad_norm": 4.8235063552856445, + "learning_rate": 3.36264796142043e-06, + "loss": 11.8306, + "step": 76700 + }, + { + "epoch": 33.355120891062214, + "grad_norm": 23.78803253173828, + "learning_rate": 3.3582639193336257e-06, + "loss": 11.8027, + "step": 76750 + }, + { + "epoch": 33.37685411572942, + "grad_norm": 9.344151496887207, + "learning_rate": 3.3538798772468215e-06, + "loss": 11.8113, + "step": 76800 + }, + { + "epoch": 33.39858734039663, + "grad_norm": 8.895915985107422, + "learning_rate": 3.3494958351600178e-06, + "loss": 11.8214, + "step": 76850 + }, + { + "epoch": 33.42032056506384, + "grad_norm": 33.99968719482422, + "learning_rate": 3.3451117930732136e-06, + "loss": 11.8217, + "step": 76900 + }, + { + "epoch": 33.44205378973105, + "grad_norm": 9.92707633972168, + "learning_rate": 3.34072775098641e-06, + "loss": 11.8012, + "step": 76950 + }, + { + "epoch": 33.463787014398264, + "grad_norm": 12.355010986328125, + "learning_rate": 3.336343708899606e-06, + "loss": 11.8093, + "step": 77000 + }, + { + "epoch": 33.48552023906547, + "grad_norm": 12.512097358703613, + "learning_rate": 3.331959666812802e-06, + "loss": 11.8357, + "step": 77050 + }, + { + "epoch": 33.50725346373268, + "grad_norm": 4.472128391265869, + "learning_rate": 3.3275756247259977e-06, + "loss": 11.8175, + "step": 77100 + }, + { + "epoch": 33.52898668839989, + "grad_norm": 12.460317611694336, + "learning_rate": 3.3231915826391935e-06, + "loss": 11.8219, + "step": 77150 + }, + { + "epoch": 33.5507199130671, + "grad_norm": 10.255359649658203, + "learning_rate": 3.3188075405523897e-06, + "loss": 11.8135, + "step": 77200 + }, + { + "epoch": 33.572453137734314, + "grad_norm": 9.60875415802002, + "learning_rate": 3.3144234984655855e-06, + "loss": 11.8244, + "step": 77250 + }, + { + "epoch": 33.59418636240152, + "grad_norm": 7.315709590911865, + "learning_rate": 3.3100394563787813e-06, + "loss": 11.8137, + "step": 77300 + }, + { + "epoch": 33.61591958706873, + "grad_norm": 16.642723083496094, + "learning_rate": 3.305655414291977e-06, + "loss": 11.8368, + "step": 77350 + }, + { + "epoch": 33.63765281173594, + "grad_norm": 4.400660991668701, + "learning_rate": 3.301271372205174e-06, + "loss": 11.8195, + "step": 77400 + }, + { + "epoch": 33.65938603640315, + "grad_norm": 8.862713813781738, + "learning_rate": 3.2968873301183696e-06, + "loss": 11.8168, + "step": 77450 + }, + { + "epoch": 33.681119261070364, + "grad_norm": 10.427742004394531, + "learning_rate": 3.2925032880315654e-06, + "loss": 11.8033, + "step": 77500 + }, + { + "epoch": 33.70285248573757, + "grad_norm": 6.926135540008545, + "learning_rate": 3.2881192459447612e-06, + "loss": 11.828, + "step": 77550 + }, + { + "epoch": 33.72458571040478, + "grad_norm": 5.068178176879883, + "learning_rate": 3.2837352038579575e-06, + "loss": 11.8273, + "step": 77600 + }, + { + "epoch": 33.74631893507199, + "grad_norm": 6.944793224334717, + "learning_rate": 3.2793511617711533e-06, + "loss": 11.813, + "step": 77650 + }, + { + "epoch": 33.7680521597392, + "grad_norm": 36.322383880615234, + "learning_rate": 3.274967119684349e-06, + "loss": 11.8141, + "step": 77700 + }, + { + "epoch": 33.789785384406414, + "grad_norm": 6.488020420074463, + "learning_rate": 3.270583077597545e-06, + "loss": 11.8322, + "step": 77750 + }, + { + "epoch": 33.81151860907362, + "grad_norm": 9.435515403747559, + "learning_rate": 3.2661990355107407e-06, + "loss": 11.8242, + "step": 77800 + }, + { + "epoch": 33.83325183374083, + "grad_norm": 4.060996055603027, + "learning_rate": 3.2618149934239374e-06, + "loss": 11.816, + "step": 77850 + }, + { + "epoch": 33.85498505840804, + "grad_norm": 13.589747428894043, + "learning_rate": 3.257430951337133e-06, + "loss": 11.8091, + "step": 77900 + }, + { + "epoch": 33.87671828307525, + "grad_norm": 11.052616119384766, + "learning_rate": 3.253046909250329e-06, + "loss": 11.8391, + "step": 77950 + }, + { + "epoch": 33.898451507742465, + "grad_norm": 10.746622085571289, + "learning_rate": 3.2486628671635252e-06, + "loss": 11.8432, + "step": 78000 + }, + { + "epoch": 33.92018473240967, + "grad_norm": 10.50125503540039, + "learning_rate": 3.244278825076721e-06, + "loss": 11.8408, + "step": 78050 + }, + { + "epoch": 33.94191795707688, + "grad_norm": 6.73277473449707, + "learning_rate": 3.239894782989917e-06, + "loss": 11.8296, + "step": 78100 + }, + { + "epoch": 33.96365118174409, + "grad_norm": 10.480985641479492, + "learning_rate": 3.2355107409031126e-06, + "loss": 11.8362, + "step": 78150 + }, + { + "epoch": 33.9853844064113, + "grad_norm": 7.480873107910156, + "learning_rate": 3.231126698816309e-06, + "loss": 11.8262, + "step": 78200 + }, + { + "epoch": 34.00695463189351, + "grad_norm": 9.988080024719238, + "learning_rate": 3.2267426567295047e-06, + "loss": 11.735, + "step": 78250 + }, + { + "epoch": 34.028687856560715, + "grad_norm": 10.169675827026367, + "learning_rate": 3.222358614642701e-06, + "loss": 11.8251, + "step": 78300 + }, + { + "epoch": 34.050421081227924, + "grad_norm": 13.815673828125, + "learning_rate": 3.217974572555897e-06, + "loss": 11.8091, + "step": 78350 + }, + { + "epoch": 34.07215430589514, + "grad_norm": 10.704404830932617, + "learning_rate": 3.213590530469093e-06, + "loss": 11.8009, + "step": 78400 + }, + { + "epoch": 34.09388753056235, + "grad_norm": 9.71978759765625, + "learning_rate": 3.2092064883822888e-06, + "loss": 11.8105, + "step": 78450 + }, + { + "epoch": 34.11562075522956, + "grad_norm": 18.075393676757812, + "learning_rate": 3.2048224462954846e-06, + "loss": 11.8083, + "step": 78500 + }, + { + "epoch": 34.137353979896766, + "grad_norm": 10.046432495117188, + "learning_rate": 3.2004384042086804e-06, + "loss": 11.7901, + "step": 78550 + }, + { + "epoch": 34.159087204563974, + "grad_norm": 11.01378345489502, + "learning_rate": 3.1960543621218766e-06, + "loss": 11.7937, + "step": 78600 + }, + { + "epoch": 34.18082042923119, + "grad_norm": 20.022729873657227, + "learning_rate": 3.1916703200350724e-06, + "loss": 11.8135, + "step": 78650 + }, + { + "epoch": 34.2025536538984, + "grad_norm": 6.636748790740967, + "learning_rate": 3.1872862779482687e-06, + "loss": 11.8014, + "step": 78700 + }, + { + "epoch": 34.22428687856561, + "grad_norm": 13.776731491088867, + "learning_rate": 3.182902235861465e-06, + "loss": 11.8353, + "step": 78750 + }, + { + "epoch": 34.246020103232816, + "grad_norm": 4.75822114944458, + "learning_rate": 3.1785181937746607e-06, + "loss": 11.7999, + "step": 78800 + }, + { + "epoch": 34.267753327900024, + "grad_norm": 11.153389930725098, + "learning_rate": 3.1741341516878565e-06, + "loss": 11.8159, + "step": 78850 + }, + { + "epoch": 34.28948655256724, + "grad_norm": 13.353851318359375, + "learning_rate": 3.1697501096010523e-06, + "loss": 11.807, + "step": 78900 + }, + { + "epoch": 34.31121977723445, + "grad_norm": 5.565258026123047, + "learning_rate": 3.165366067514248e-06, + "loss": 11.8092, + "step": 78950 + }, + { + "epoch": 34.33295300190166, + "grad_norm": 16.32341194152832, + "learning_rate": 3.1609820254274444e-06, + "loss": 11.8032, + "step": 79000 + }, + { + "epoch": 34.354686226568866, + "grad_norm": 8.501863479614258, + "learning_rate": 3.15659798334064e-06, + "loss": 11.8121, + "step": 79050 + }, + { + "epoch": 34.376419451236075, + "grad_norm": 5.864038467407227, + "learning_rate": 3.152213941253836e-06, + "loss": 11.8086, + "step": 79100 + }, + { + "epoch": 34.39815267590329, + "grad_norm": 12.040675163269043, + "learning_rate": 3.1478298991670327e-06, + "loss": 11.8134, + "step": 79150 + }, + { + "epoch": 34.4198859005705, + "grad_norm": 24.84530258178711, + "learning_rate": 3.1434458570802285e-06, + "loss": 11.8149, + "step": 79200 + }, + { + "epoch": 34.44161912523771, + "grad_norm": 11.11407470703125, + "learning_rate": 3.1390618149934243e-06, + "loss": 11.8105, + "step": 79250 + }, + { + "epoch": 34.463352349904916, + "grad_norm": 15.913960456848145, + "learning_rate": 3.13467777290662e-06, + "loss": 11.8004, + "step": 79300 + }, + { + "epoch": 34.485085574572125, + "grad_norm": 7.755058288574219, + "learning_rate": 3.1302937308198163e-06, + "loss": 11.8044, + "step": 79350 + }, + { + "epoch": 34.50681879923934, + "grad_norm": 5.433537006378174, + "learning_rate": 3.125909688733012e-06, + "loss": 11.8161, + "step": 79400 + }, + { + "epoch": 34.52855202390655, + "grad_norm": 6.0616912841796875, + "learning_rate": 3.121525646646208e-06, + "loss": 11.806, + "step": 79450 + }, + { + "epoch": 34.55028524857376, + "grad_norm": 8.498095512390137, + "learning_rate": 3.1171416045594038e-06, + "loss": 11.8407, + "step": 79500 + }, + { + "epoch": 34.572018473240966, + "grad_norm": 24.549198150634766, + "learning_rate": 3.1127575624725996e-06, + "loss": 11.8168, + "step": 79550 + }, + { + "epoch": 34.593751697908175, + "grad_norm": 11.136092185974121, + "learning_rate": 3.1083735203857962e-06, + "loss": 11.8184, + "step": 79600 + }, + { + "epoch": 34.61548492257539, + "grad_norm": 8.212324142456055, + "learning_rate": 3.103989478298992e-06, + "loss": 11.802, + "step": 79650 + }, + { + "epoch": 34.6372181472426, + "grad_norm": 4.912588596343994, + "learning_rate": 3.099605436212188e-06, + "loss": 11.8164, + "step": 79700 + }, + { + "epoch": 34.65895137190981, + "grad_norm": 5.911812782287598, + "learning_rate": 3.095221394125384e-06, + "loss": 11.8035, + "step": 79750 + }, + { + "epoch": 34.68068459657702, + "grad_norm": 13.612801551818848, + "learning_rate": 3.09083735203858e-06, + "loss": 11.8221, + "step": 79800 + }, + { + "epoch": 34.702417821244225, + "grad_norm": 7.984292030334473, + "learning_rate": 3.0864533099517757e-06, + "loss": 11.8073, + "step": 79850 + }, + { + "epoch": 34.724151045911434, + "grad_norm": 12.358894348144531, + "learning_rate": 3.0820692678649715e-06, + "loss": 11.793, + "step": 79900 + }, + { + "epoch": 34.74588427057865, + "grad_norm": 9.695011138916016, + "learning_rate": 3.0776852257781673e-06, + "loss": 11.8138, + "step": 79950 + }, + { + "epoch": 34.76761749524586, + "grad_norm": 13.982564926147461, + "learning_rate": 3.0733011836913636e-06, + "loss": 11.8171, + "step": 80000 + }, + { + "epoch": 34.76761749524586, + "eval_cer": 0.0757471023169121, + "eval_loss": 2.4033260345458984, + "eval_runtime": 399.2668, + "eval_samples_per_second": 13.54, + "eval_steps_per_second": 3.386, + "eval_wer": 0.22807348092322186, + "step": 80000 + }, + { + "epoch": 34.78935071991307, + "grad_norm": 12.179760932922363, + "learning_rate": 3.06891714160456e-06, + "loss": 11.8053, + "step": 80050 + }, + { + "epoch": 34.811083944580275, + "grad_norm": 11.413451194763184, + "learning_rate": 3.0645330995177556e-06, + "loss": 11.8123, + "step": 80100 + }, + { + "epoch": 34.832817169247484, + "grad_norm": 4.437108993530273, + "learning_rate": 3.060149057430952e-06, + "loss": 11.8026, + "step": 80150 + }, + { + "epoch": 34.8545503939147, + "grad_norm": 73.1333236694336, + "learning_rate": 3.0557650153441477e-06, + "loss": 11.8005, + "step": 80200 + }, + { + "epoch": 34.87628361858191, + "grad_norm": 8.468038558959961, + "learning_rate": 3.0513809732573435e-06, + "loss": 11.8035, + "step": 80250 + }, + { + "epoch": 34.89801684324912, + "grad_norm": 6.311350345611572, + "learning_rate": 3.0469969311705393e-06, + "loss": 11.8177, + "step": 80300 + }, + { + "epoch": 34.919750067916326, + "grad_norm": 6.435243606567383, + "learning_rate": 3.0426128890837355e-06, + "loss": 11.8085, + "step": 80350 + }, + { + "epoch": 34.941483292583534, + "grad_norm": 23.506589889526367, + "learning_rate": 3.0382288469969313e-06, + "loss": 11.8198, + "step": 80400 + }, + { + "epoch": 34.96321651725075, + "grad_norm": 14.792353630065918, + "learning_rate": 3.033844804910127e-06, + "loss": 11.8236, + "step": 80450 + }, + { + "epoch": 34.98494974191796, + "grad_norm": 9.948033332824707, + "learning_rate": 3.0294607628233234e-06, + "loss": 11.7999, + "step": 80500 + }, + { + "epoch": 35.006519967400166, + "grad_norm": 10.65179443359375, + "learning_rate": 3.0250767207365196e-06, + "loss": 11.7357, + "step": 80550 + }, + { + "epoch": 35.028253192067375, + "grad_norm": 17.818878173828125, + "learning_rate": 3.0206926786497154e-06, + "loss": 11.8027, + "step": 80600 + }, + { + "epoch": 35.04998641673458, + "grad_norm": 12.1105318069458, + "learning_rate": 3.0163086365629112e-06, + "loss": 11.8061, + "step": 80650 + }, + { + "epoch": 35.07171964140179, + "grad_norm": 9.265717506408691, + "learning_rate": 3.011924594476107e-06, + "loss": 11.8093, + "step": 80700 + }, + { + "epoch": 35.093452866069, + "grad_norm": 7.630993366241455, + "learning_rate": 3.0075405523893033e-06, + "loss": 11.8209, + "step": 80750 + }, + { + "epoch": 35.115186090736216, + "grad_norm": 9.022866249084473, + "learning_rate": 3.003156510302499e-06, + "loss": 11.8025, + "step": 80800 + }, + { + "epoch": 35.136919315403425, + "grad_norm": 7.761841297149658, + "learning_rate": 2.998772468215695e-06, + "loss": 11.8045, + "step": 80850 + }, + { + "epoch": 35.158652540070634, + "grad_norm": 5.690446853637695, + "learning_rate": 2.9943884261288915e-06, + "loss": 11.7887, + "step": 80900 + }, + { + "epoch": 35.18038576473784, + "grad_norm": 8.062559127807617, + "learning_rate": 2.9900043840420874e-06, + "loss": 11.8076, + "step": 80950 + }, + { + "epoch": 35.20211898940505, + "grad_norm": 11.13079833984375, + "learning_rate": 2.985620341955283e-06, + "loss": 11.791, + "step": 81000 + }, + { + "epoch": 35.22385221407227, + "grad_norm": 9.493050575256348, + "learning_rate": 2.981236299868479e-06, + "loss": 11.7975, + "step": 81050 + }, + { + "epoch": 35.245585438739475, + "grad_norm": 5.601952075958252, + "learning_rate": 2.9768522577816748e-06, + "loss": 11.8132, + "step": 81100 + }, + { + "epoch": 35.267318663406684, + "grad_norm": 16.74399757385254, + "learning_rate": 2.972468215694871e-06, + "loss": 11.8072, + "step": 81150 + }, + { + "epoch": 35.28905188807389, + "grad_norm": 14.785292625427246, + "learning_rate": 2.968084173608067e-06, + "loss": 11.7861, + "step": 81200 + }, + { + "epoch": 35.3107851127411, + "grad_norm": 4.938207149505615, + "learning_rate": 2.9637001315212626e-06, + "loss": 11.7976, + "step": 81250 + }, + { + "epoch": 35.33251833740831, + "grad_norm": 7.194094181060791, + "learning_rate": 2.9593160894344585e-06, + "loss": 11.8092, + "step": 81300 + }, + { + "epoch": 35.354251562075525, + "grad_norm": 8.084842681884766, + "learning_rate": 2.954932047347655e-06, + "loss": 11.8066, + "step": 81350 + }, + { + "epoch": 35.375984786742734, + "grad_norm": 13.50389289855957, + "learning_rate": 2.950548005260851e-06, + "loss": 11.8166, + "step": 81400 + }, + { + "epoch": 35.39771801140994, + "grad_norm": 34.29204559326172, + "learning_rate": 2.9461639631740467e-06, + "loss": 11.7957, + "step": 81450 + }, + { + "epoch": 35.41945123607715, + "grad_norm": 115.18916320800781, + "learning_rate": 2.9417799210872425e-06, + "loss": 11.7966, + "step": 81500 + }, + { + "epoch": 35.44118446074436, + "grad_norm": 6.231071949005127, + "learning_rate": 2.9373958790004388e-06, + "loss": 11.8096, + "step": 81550 + }, + { + "epoch": 35.462917685411576, + "grad_norm": 12.070874214172363, + "learning_rate": 2.9330118369136346e-06, + "loss": 11.7912, + "step": 81600 + }, + { + "epoch": 35.484650910078784, + "grad_norm": 11.014456748962402, + "learning_rate": 2.9286277948268304e-06, + "loss": 11.7999, + "step": 81650 + }, + { + "epoch": 35.50638413474599, + "grad_norm": 7.878298759460449, + "learning_rate": 2.924243752740026e-06, + "loss": 11.7924, + "step": 81700 + }, + { + "epoch": 35.5281173594132, + "grad_norm": 9.946538925170898, + "learning_rate": 2.9198597106532224e-06, + "loss": 11.7876, + "step": 81750 + }, + { + "epoch": 35.54985058408041, + "grad_norm": 9.082895278930664, + "learning_rate": 2.9154756685664187e-06, + "loss": 11.7947, + "step": 81800 + }, + { + "epoch": 35.571583808747626, + "grad_norm": 8.261942863464355, + "learning_rate": 2.9110916264796145e-06, + "loss": 11.8, + "step": 81850 + }, + { + "epoch": 35.593317033414834, + "grad_norm": 8.274785041809082, + "learning_rate": 2.9067075843928107e-06, + "loss": 11.7947, + "step": 81900 + }, + { + "epoch": 35.61505025808204, + "grad_norm": 12.555307388305664, + "learning_rate": 2.9023235423060065e-06, + "loss": 11.8046, + "step": 81950 + }, + { + "epoch": 35.63678348274925, + "grad_norm": 16.864561080932617, + "learning_rate": 2.8979395002192023e-06, + "loss": 11.8078, + "step": 82000 + }, + { + "epoch": 35.65851670741646, + "grad_norm": 7.3884782791137695, + "learning_rate": 2.893555458132398e-06, + "loss": 11.7997, + "step": 82050 + }, + { + "epoch": 35.680249932083676, + "grad_norm": 5.330382823944092, + "learning_rate": 2.889171416045594e-06, + "loss": 11.7931, + "step": 82100 + }, + { + "epoch": 35.701983156750885, + "grad_norm": 135.67330932617188, + "learning_rate": 2.88478737395879e-06, + "loss": 11.8205, + "step": 82150 + }, + { + "epoch": 35.72371638141809, + "grad_norm": 11.841288566589355, + "learning_rate": 2.880403331871986e-06, + "loss": 11.8093, + "step": 82200 + }, + { + "epoch": 35.7454496060853, + "grad_norm": 7.48586368560791, + "learning_rate": 2.8760192897851822e-06, + "loss": 11.7827, + "step": 82250 + }, + { + "epoch": 35.76718283075251, + "grad_norm": 8.122782707214355, + "learning_rate": 2.8716352476983785e-06, + "loss": 11.8002, + "step": 82300 + }, + { + "epoch": 35.788916055419726, + "grad_norm": 11.976639747619629, + "learning_rate": 2.8672512056115743e-06, + "loss": 11.7877, + "step": 82350 + }, + { + "epoch": 35.810649280086935, + "grad_norm": 4.260416507720947, + "learning_rate": 2.86286716352477e-06, + "loss": 11.8016, + "step": 82400 + }, + { + "epoch": 35.83238250475414, + "grad_norm": 6.422642230987549, + "learning_rate": 2.858483121437966e-06, + "loss": 11.7963, + "step": 82450 + }, + { + "epoch": 35.85411572942135, + "grad_norm": 17.52088165283203, + "learning_rate": 2.8540990793511617e-06, + "loss": 11.8099, + "step": 82500 + }, + { + "epoch": 35.87584895408856, + "grad_norm": 18.16527557373047, + "learning_rate": 2.849715037264358e-06, + "loss": 11.8045, + "step": 82550 + }, + { + "epoch": 35.897582178755776, + "grad_norm": 21.54142189025879, + "learning_rate": 2.8453309951775538e-06, + "loss": 11.8147, + "step": 82600 + }, + { + "epoch": 35.919315403422985, + "grad_norm": 10.738289833068848, + "learning_rate": 2.8409469530907496e-06, + "loss": 11.7977, + "step": 82650 + }, + { + "epoch": 35.94104862809019, + "grad_norm": 7.5517144203186035, + "learning_rate": 2.8365629110039462e-06, + "loss": 11.8223, + "step": 82700 + }, + { + "epoch": 35.9627818527574, + "grad_norm": 17.005064010620117, + "learning_rate": 2.832178868917142e-06, + "loss": 11.7941, + "step": 82750 + }, + { + "epoch": 35.98451507742461, + "grad_norm": 20.802410125732422, + "learning_rate": 2.827794826830338e-06, + "loss": 11.8144, + "step": 82800 + }, + { + "epoch": 36.00608530290682, + "grad_norm": 4.643016815185547, + "learning_rate": 2.8234107847435337e-06, + "loss": 11.7041, + "step": 82850 + }, + { + "epoch": 36.02781852757403, + "grad_norm": 5.0188398361206055, + "learning_rate": 2.81902674265673e-06, + "loss": 11.7857, + "step": 82900 + }, + { + "epoch": 36.049551752241236, + "grad_norm": 43.052833557128906, + "learning_rate": 2.8146427005699257e-06, + "loss": 11.7965, + "step": 82950 + }, + { + "epoch": 36.07128497690845, + "grad_norm": 5.6486382484436035, + "learning_rate": 2.8102586584831215e-06, + "loss": 11.7801, + "step": 83000 + }, + { + "epoch": 36.09301820157566, + "grad_norm": 9.257708549499512, + "learning_rate": 2.8058746163963173e-06, + "loss": 11.7884, + "step": 83050 + }, + { + "epoch": 36.11475142624287, + "grad_norm": 9.969672203063965, + "learning_rate": 2.801490574309514e-06, + "loss": 11.7893, + "step": 83100 + }, + { + "epoch": 36.13648465091008, + "grad_norm": 4.864919185638428, + "learning_rate": 2.79710653222271e-06, + "loss": 11.7891, + "step": 83150 + }, + { + "epoch": 36.158217875577286, + "grad_norm": 15.945795059204102, + "learning_rate": 2.7927224901359056e-06, + "loss": 11.8076, + "step": 83200 + }, + { + "epoch": 36.1799511002445, + "grad_norm": 8.471965789794922, + "learning_rate": 2.7883384480491014e-06, + "loss": 11.8079, + "step": 83250 + }, + { + "epoch": 36.20168432491171, + "grad_norm": 3.7765846252441406, + "learning_rate": 2.7839544059622976e-06, + "loss": 11.7893, + "step": 83300 + }, + { + "epoch": 36.22341754957892, + "grad_norm": 32.80738067626953, + "learning_rate": 2.7795703638754935e-06, + "loss": 11.8117, + "step": 83350 + }, + { + "epoch": 36.24515077424613, + "grad_norm": 11.759632110595703, + "learning_rate": 2.7751863217886893e-06, + "loss": 11.7887, + "step": 83400 + }, + { + "epoch": 36.266883998913336, + "grad_norm": 9.582806587219238, + "learning_rate": 2.770802279701885e-06, + "loss": 11.7988, + "step": 83450 + }, + { + "epoch": 36.28861722358055, + "grad_norm": 13.065892219543457, + "learning_rate": 2.7664182376150813e-06, + "loss": 11.7974, + "step": 83500 + }, + { + "epoch": 36.31035044824776, + "grad_norm": 25.009721755981445, + "learning_rate": 2.7620341955282775e-06, + "loss": 11.7938, + "step": 83550 + }, + { + "epoch": 36.33208367291497, + "grad_norm": 7.72334098815918, + "learning_rate": 2.7576501534414734e-06, + "loss": 11.7859, + "step": 83600 + }, + { + "epoch": 36.35381689758218, + "grad_norm": 8.665655136108398, + "learning_rate": 2.753266111354669e-06, + "loss": 11.7859, + "step": 83650 + }, + { + "epoch": 36.375550122249386, + "grad_norm": 19.630573272705078, + "learning_rate": 2.7488820692678654e-06, + "loss": 11.7842, + "step": 83700 + }, + { + "epoch": 36.3972833469166, + "grad_norm": 13.641834259033203, + "learning_rate": 2.7444980271810612e-06, + "loss": 11.8031, + "step": 83750 + }, + { + "epoch": 36.41901657158381, + "grad_norm": 5.9598917961120605, + "learning_rate": 2.740113985094257e-06, + "loss": 11.7813, + "step": 83800 + }, + { + "epoch": 36.44074979625102, + "grad_norm": 8.549332618713379, + "learning_rate": 2.735729943007453e-06, + "loss": 11.7959, + "step": 83850 + }, + { + "epoch": 36.46248302091823, + "grad_norm": 4.3795857429504395, + "learning_rate": 2.731345900920649e-06, + "loss": 11.7963, + "step": 83900 + }, + { + "epoch": 36.484216245585436, + "grad_norm": 7.300856113433838, + "learning_rate": 2.726961858833845e-06, + "loss": 11.7902, + "step": 83950 + }, + { + "epoch": 36.50594947025265, + "grad_norm": 7.026275157928467, + "learning_rate": 2.722577816747041e-06, + "loss": 11.7988, + "step": 84000 + }, + { + "epoch": 36.52768269491986, + "grad_norm": 12.537973403930664, + "learning_rate": 2.7181937746602373e-06, + "loss": 11.7891, + "step": 84050 + }, + { + "epoch": 36.54941591958707, + "grad_norm": 6.903670787811279, + "learning_rate": 2.713809732573433e-06, + "loss": 11.7984, + "step": 84100 + }, + { + "epoch": 36.57114914425428, + "grad_norm": 11.342251777648926, + "learning_rate": 2.709425690486629e-06, + "loss": 11.7915, + "step": 84150 + }, + { + "epoch": 36.59288236892149, + "grad_norm": 10.707886695861816, + "learning_rate": 2.7050416483998248e-06, + "loss": 11.7955, + "step": 84200 + }, + { + "epoch": 36.614615593588695, + "grad_norm": 7.921166896820068, + "learning_rate": 2.7006576063130206e-06, + "loss": 11.7868, + "step": 84250 + }, + { + "epoch": 36.63634881825591, + "grad_norm": 9.0649995803833, + "learning_rate": 2.696273564226217e-06, + "loss": 11.7949, + "step": 84300 + }, + { + "epoch": 36.65808204292312, + "grad_norm": 13.355379104614258, + "learning_rate": 2.6918895221394126e-06, + "loss": 11.7763, + "step": 84350 + }, + { + "epoch": 36.67981526759033, + "grad_norm": 12.289958953857422, + "learning_rate": 2.6875054800526084e-06, + "loss": 11.7861, + "step": 84400 + }, + { + "epoch": 36.70154849225754, + "grad_norm": 4.684927940368652, + "learning_rate": 2.683121437965805e-06, + "loss": 11.7803, + "step": 84450 + }, + { + "epoch": 36.723281716924745, + "grad_norm": 7.917582035064697, + "learning_rate": 2.678737395879001e-06, + "loss": 11.799, + "step": 84500 + }, + { + "epoch": 36.74501494159196, + "grad_norm": 5.413401126861572, + "learning_rate": 2.6743533537921967e-06, + "loss": 11.7873, + "step": 84550 + }, + { + "epoch": 36.76674816625917, + "grad_norm": 14.283327102661133, + "learning_rate": 2.6699693117053925e-06, + "loss": 11.7867, + "step": 84600 + }, + { + "epoch": 36.78848139092638, + "grad_norm": 8.163966178894043, + "learning_rate": 2.6655852696185883e-06, + "loss": 11.8112, + "step": 84650 + }, + { + "epoch": 36.81021461559359, + "grad_norm": 7.235820770263672, + "learning_rate": 2.6612012275317846e-06, + "loss": 11.781, + "step": 84700 + }, + { + "epoch": 36.831947840260796, + "grad_norm": 4.746747016906738, + "learning_rate": 2.6568171854449804e-06, + "loss": 11.7941, + "step": 84750 + }, + { + "epoch": 36.85368106492801, + "grad_norm": 4.514492511749268, + "learning_rate": 2.652433143358176e-06, + "loss": 11.7947, + "step": 84800 + }, + { + "epoch": 36.87541428959522, + "grad_norm": 10.46290111541748, + "learning_rate": 2.648049101271372e-06, + "loss": 11.7826, + "step": 84850 + }, + { + "epoch": 36.89714751426243, + "grad_norm": 8.848064422607422, + "learning_rate": 2.6436650591845687e-06, + "loss": 11.7802, + "step": 84900 + }, + { + "epoch": 36.91888073892964, + "grad_norm": 6.194151401519775, + "learning_rate": 2.6392810170977645e-06, + "loss": 11.7857, + "step": 84950 + }, + { + "epoch": 36.940613963596846, + "grad_norm": 8.114167213439941, + "learning_rate": 2.6348969750109603e-06, + "loss": 11.7934, + "step": 85000 + }, + { + "epoch": 36.96234718826406, + "grad_norm": 11.507193565368652, + "learning_rate": 2.6305129329241565e-06, + "loss": 11.7928, + "step": 85050 + }, + { + "epoch": 36.98408041293127, + "grad_norm": 12.524467468261719, + "learning_rate": 2.6261288908373523e-06, + "loss": 11.7915, + "step": 85100 + }, + { + "epoch": 37.00565063841348, + "grad_norm": 12.674412727355957, + "learning_rate": 2.621744848750548e-06, + "loss": 11.6996, + "step": 85150 + }, + { + "epoch": 37.027383863080686, + "grad_norm": 4.092529773712158, + "learning_rate": 2.617360806663744e-06, + "loss": 11.784, + "step": 85200 + }, + { + "epoch": 37.049117087747895, + "grad_norm": 4.772137641906738, + "learning_rate": 2.6129767645769398e-06, + "loss": 11.7883, + "step": 85250 + }, + { + "epoch": 37.070850312415104, + "grad_norm": 9.263222694396973, + "learning_rate": 2.608592722490136e-06, + "loss": 11.7655, + "step": 85300 + }, + { + "epoch": 37.09258353708231, + "grad_norm": 11.615614891052246, + "learning_rate": 2.6042086804033322e-06, + "loss": 11.7987, + "step": 85350 + }, + { + "epoch": 37.11431676174953, + "grad_norm": 3.8077142238616943, + "learning_rate": 2.599824638316528e-06, + "loss": 11.7786, + "step": 85400 + }, + { + "epoch": 37.13604998641674, + "grad_norm": 5.6897993087768555, + "learning_rate": 2.5954405962297243e-06, + "loss": 11.797, + "step": 85450 + }, + { + "epoch": 37.157783211083945, + "grad_norm": 5.3308305740356445, + "learning_rate": 2.59105655414292e-06, + "loss": 11.7697, + "step": 85500 + }, + { + "epoch": 37.179516435751154, + "grad_norm": 8.419775009155273, + "learning_rate": 2.586672512056116e-06, + "loss": 11.7771, + "step": 85550 + }, + { + "epoch": 37.20124966041836, + "grad_norm": 4.629072189331055, + "learning_rate": 2.5822884699693117e-06, + "loss": 11.7899, + "step": 85600 + }, + { + "epoch": 37.22298288508557, + "grad_norm": 16.38741683959961, + "learning_rate": 2.5779044278825075e-06, + "loss": 11.7799, + "step": 85650 + }, + { + "epoch": 37.24471610975279, + "grad_norm": 39.32244110107422, + "learning_rate": 2.5735203857957038e-06, + "loss": 11.791, + "step": 85700 + }, + { + "epoch": 37.266449334419995, + "grad_norm": 14.12863826751709, + "learning_rate": 2.5691363437089e-06, + "loss": 11.7906, + "step": 85750 + }, + { + "epoch": 37.288182559087204, + "grad_norm": 7.304044723510742, + "learning_rate": 2.564752301622096e-06, + "loss": 11.7865, + "step": 85800 + }, + { + "epoch": 37.30991578375441, + "grad_norm": 11.347620964050293, + "learning_rate": 2.560368259535292e-06, + "loss": 11.8021, + "step": 85850 + }, + { + "epoch": 37.33164900842162, + "grad_norm": 9.358373641967773, + "learning_rate": 2.555984217448488e-06, + "loss": 11.7979, + "step": 85900 + }, + { + "epoch": 37.35338223308884, + "grad_norm": 3.5885915756225586, + "learning_rate": 2.5516001753616837e-06, + "loss": 11.7782, + "step": 85950 + }, + { + "epoch": 37.375115457756046, + "grad_norm": 9.129725456237793, + "learning_rate": 2.5472161332748795e-06, + "loss": 11.783, + "step": 86000 + }, + { + "epoch": 37.396848682423254, + "grad_norm": 8.22261905670166, + "learning_rate": 2.5428320911880757e-06, + "loss": 11.7652, + "step": 86050 + }, + { + "epoch": 37.41858190709046, + "grad_norm": 6.796608924865723, + "learning_rate": 2.5384480491012715e-06, + "loss": 11.7897, + "step": 86100 + }, + { + "epoch": 37.44031513175767, + "grad_norm": 3.8288304805755615, + "learning_rate": 2.5340640070144673e-06, + "loss": 11.7803, + "step": 86150 + }, + { + "epoch": 37.46204835642489, + "grad_norm": 4.8984599113464355, + "learning_rate": 2.529679964927664e-06, + "loss": 11.7998, + "step": 86200 + }, + { + "epoch": 37.483781581092096, + "grad_norm": 11.828535079956055, + "learning_rate": 2.52529592284086e-06, + "loss": 11.7756, + "step": 86250 + }, + { + "epoch": 37.505514805759304, + "grad_norm": 7.632526397705078, + "learning_rate": 2.5209118807540556e-06, + "loss": 11.7816, + "step": 86300 + }, + { + "epoch": 37.52724803042651, + "grad_norm": 13.461671829223633, + "learning_rate": 2.5165278386672514e-06, + "loss": 11.7924, + "step": 86350 + }, + { + "epoch": 37.54898125509372, + "grad_norm": 5.831872940063477, + "learning_rate": 2.5121437965804472e-06, + "loss": 11.767, + "step": 86400 + }, + { + "epoch": 37.57071447976094, + "grad_norm": 15.29990005493164, + "learning_rate": 2.5077597544936435e-06, + "loss": 11.7853, + "step": 86450 + }, + { + "epoch": 37.592447704428146, + "grad_norm": 18.313222885131836, + "learning_rate": 2.5033757124068393e-06, + "loss": 11.7759, + "step": 86500 + }, + { + "epoch": 37.614180929095355, + "grad_norm": 7.100087642669678, + "learning_rate": 2.4989916703200355e-06, + "loss": 11.7861, + "step": 86550 + }, + { + "epoch": 37.63591415376256, + "grad_norm": 10.970837593078613, + "learning_rate": 2.4946076282332313e-06, + "loss": 11.7859, + "step": 86600 + }, + { + "epoch": 37.65764737842977, + "grad_norm": 17.918962478637695, + "learning_rate": 2.490223586146427e-06, + "loss": 11.781, + "step": 86650 + }, + { + "epoch": 37.67938060309699, + "grad_norm": 23.407426834106445, + "learning_rate": 2.485839544059623e-06, + "loss": 11.7763, + "step": 86700 + }, + { + "epoch": 37.701113827764196, + "grad_norm": 6.149535655975342, + "learning_rate": 2.481455501972819e-06, + "loss": 11.7819, + "step": 86750 + }, + { + "epoch": 37.722847052431405, + "grad_norm": 5.372469425201416, + "learning_rate": 2.477071459886015e-06, + "loss": 11.7762, + "step": 86800 + }, + { + "epoch": 37.74458027709861, + "grad_norm": 21.797292709350586, + "learning_rate": 2.472687417799211e-06, + "loss": 11.7822, + "step": 86850 + }, + { + "epoch": 37.76631350176582, + "grad_norm": 17.68259048461914, + "learning_rate": 2.468303375712407e-06, + "loss": 11.7753, + "step": 86900 + }, + { + "epoch": 37.78804672643304, + "grad_norm": 10.163092613220215, + "learning_rate": 2.463919333625603e-06, + "loss": 11.7662, + "step": 86950 + }, + { + "epoch": 37.809779951100246, + "grad_norm": 6.926383972167969, + "learning_rate": 2.459535291538799e-06, + "loss": 11.7897, + "step": 87000 + }, + { + "epoch": 37.831513175767455, + "grad_norm": 8.474647521972656, + "learning_rate": 2.455151249451995e-06, + "loss": 11.78, + "step": 87050 + }, + { + "epoch": 37.853246400434664, + "grad_norm": 16.252666473388672, + "learning_rate": 2.4507672073651907e-06, + "loss": 11.7683, + "step": 87100 + }, + { + "epoch": 37.87497962510187, + "grad_norm": 9.422881126403809, + "learning_rate": 2.446383165278387e-06, + "loss": 11.78, + "step": 87150 + }, + { + "epoch": 37.89671284976909, + "grad_norm": 13.145493507385254, + "learning_rate": 2.441999123191583e-06, + "loss": 11.7806, + "step": 87200 + }, + { + "epoch": 37.9184460744363, + "grad_norm": 9.204483985900879, + "learning_rate": 2.437615081104779e-06, + "loss": 11.7722, + "step": 87250 + }, + { + "epoch": 37.940179299103505, + "grad_norm": 9.611700057983398, + "learning_rate": 2.4332310390179748e-06, + "loss": 11.7957, + "step": 87300 + }, + { + "epoch": 37.961912523770714, + "grad_norm": 11.884017944335938, + "learning_rate": 2.4288469969311706e-06, + "loss": 11.7755, + "step": 87350 + }, + { + "epoch": 37.98364574843792, + "grad_norm": 5.692808151245117, + "learning_rate": 2.424462954844367e-06, + "loss": 11.7766, + "step": 87400 + }, + { + "epoch": 38.00521597392013, + "grad_norm": 66.74461364746094, + "learning_rate": 2.4200789127575626e-06, + "loss": 11.7051, + "step": 87450 + }, + { + "epoch": 38.02694919858734, + "grad_norm": 5.268041610717773, + "learning_rate": 2.4156948706707584e-06, + "loss": 11.7609, + "step": 87500 + }, + { + "epoch": 38.04868242325455, + "grad_norm": 8.159131050109863, + "learning_rate": 2.4113108285839547e-06, + "loss": 11.773, + "step": 87550 + }, + { + "epoch": 38.07041564792176, + "grad_norm": 8.749338150024414, + "learning_rate": 2.4069267864971505e-06, + "loss": 11.7782, + "step": 87600 + }, + { + "epoch": 38.09214887258897, + "grad_norm": 9.121374130249023, + "learning_rate": 2.4025427444103467e-06, + "loss": 11.7703, + "step": 87650 + }, + { + "epoch": 38.11388209725618, + "grad_norm": 10.743656158447266, + "learning_rate": 2.3981587023235425e-06, + "loss": 11.7798, + "step": 87700 + }, + { + "epoch": 38.13561532192339, + "grad_norm": 5.2683000564575195, + "learning_rate": 2.3937746602367383e-06, + "loss": 11.7791, + "step": 87750 + }, + { + "epoch": 38.1573485465906, + "grad_norm": 9.600702285766602, + "learning_rate": 2.389390618149934e-06, + "loss": 11.7658, + "step": 87800 + }, + { + "epoch": 38.17908177125781, + "grad_norm": 10.094902992248535, + "learning_rate": 2.3850065760631304e-06, + "loss": 11.7891, + "step": 87850 + }, + { + "epoch": 38.20081499592502, + "grad_norm": 8.227887153625488, + "learning_rate": 2.3806225339763266e-06, + "loss": 11.7706, + "step": 87900 + }, + { + "epoch": 38.22254822059223, + "grad_norm": 7.997677803039551, + "learning_rate": 2.3762384918895224e-06, + "loss": 11.772, + "step": 87950 + }, + { + "epoch": 38.24428144525944, + "grad_norm": 6.51764440536499, + "learning_rate": 2.3718544498027182e-06, + "loss": 11.7739, + "step": 88000 + }, + { + "epoch": 38.26601466992665, + "grad_norm": 4.949069499969482, + "learning_rate": 2.367470407715914e-06, + "loss": 11.778, + "step": 88050 + }, + { + "epoch": 38.28774789459386, + "grad_norm": 4.438246250152588, + "learning_rate": 2.3630863656291103e-06, + "loss": 11.7742, + "step": 88100 + }, + { + "epoch": 38.30948111926107, + "grad_norm": 11.066926956176758, + "learning_rate": 2.358702323542306e-06, + "loss": 11.7817, + "step": 88150 + }, + { + "epoch": 38.33121434392828, + "grad_norm": 6.765926837921143, + "learning_rate": 2.3543182814555023e-06, + "loss": 11.7767, + "step": 88200 + }, + { + "epoch": 38.35294756859549, + "grad_norm": 6.704973220825195, + "learning_rate": 2.349934239368698e-06, + "loss": 11.7671, + "step": 88250 + }, + { + "epoch": 38.3746807932627, + "grad_norm": 10.575370788574219, + "learning_rate": 2.3455501972818944e-06, + "loss": 11.771, + "step": 88300 + }, + { + "epoch": 38.396414017929914, + "grad_norm": 3.860527992248535, + "learning_rate": 2.34116615519509e-06, + "loss": 11.7685, + "step": 88350 + }, + { + "epoch": 38.41814724259712, + "grad_norm": 10.35341739654541, + "learning_rate": 2.336782113108286e-06, + "loss": 11.7715, + "step": 88400 + }, + { + "epoch": 38.43988046726433, + "grad_norm": 7.268162727355957, + "learning_rate": 2.332398071021482e-06, + "loss": 11.7878, + "step": 88450 + }, + { + "epoch": 38.46161369193154, + "grad_norm": 6.3647871017456055, + "learning_rate": 2.328014028934678e-06, + "loss": 11.7675, + "step": 88500 + }, + { + "epoch": 38.48334691659875, + "grad_norm": 27.453014373779297, + "learning_rate": 2.323629986847874e-06, + "loss": 11.7635, + "step": 88550 + }, + { + "epoch": 38.505080141265964, + "grad_norm": 12.105439186096191, + "learning_rate": 2.31924594476107e-06, + "loss": 11.7825, + "step": 88600 + }, + { + "epoch": 38.52681336593317, + "grad_norm": 12.992817878723145, + "learning_rate": 2.314861902674266e-06, + "loss": 11.76, + "step": 88650 + }, + { + "epoch": 38.54854659060038, + "grad_norm": 8.603561401367188, + "learning_rate": 2.3104778605874617e-06, + "loss": 11.7655, + "step": 88700 + }, + { + "epoch": 38.57027981526759, + "grad_norm": 4.036582946777344, + "learning_rate": 2.306093818500658e-06, + "loss": 11.7882, + "step": 88750 + }, + { + "epoch": 38.5920130399348, + "grad_norm": 5.422863483428955, + "learning_rate": 2.3017097764138538e-06, + "loss": 11.7683, + "step": 88800 + }, + { + "epoch": 38.61374626460201, + "grad_norm": 5.842752933502197, + "learning_rate": 2.2973257343270496e-06, + "loss": 11.7695, + "step": 88850 + }, + { + "epoch": 38.63547948926922, + "grad_norm": 9.653190612792969, + "learning_rate": 2.292941692240246e-06, + "loss": 11.7873, + "step": 88900 + }, + { + "epoch": 38.65721271393643, + "grad_norm": 5.730354309082031, + "learning_rate": 2.2885576501534416e-06, + "loss": 11.768, + "step": 88950 + }, + { + "epoch": 38.67894593860364, + "grad_norm": 17.826345443725586, + "learning_rate": 2.284173608066638e-06, + "loss": 11.776, + "step": 89000 + }, + { + "epoch": 38.70067916327085, + "grad_norm": 9.027566909790039, + "learning_rate": 2.2797895659798336e-06, + "loss": 11.755, + "step": 89050 + }, + { + "epoch": 38.72241238793806, + "grad_norm": 4.53999662399292, + "learning_rate": 2.2754055238930295e-06, + "loss": 11.765, + "step": 89100 + }, + { + "epoch": 38.74414561260527, + "grad_norm": 8.569631576538086, + "learning_rate": 2.2710214818062253e-06, + "loss": 11.7777, + "step": 89150 + }, + { + "epoch": 38.76587883727248, + "grad_norm": 24.965681076049805, + "learning_rate": 2.2666374397194215e-06, + "loss": 11.7738, + "step": 89200 + }, + { + "epoch": 38.78761206193969, + "grad_norm": 7.554117202758789, + "learning_rate": 2.2622533976326173e-06, + "loss": 11.7801, + "step": 89250 + }, + { + "epoch": 38.8093452866069, + "grad_norm": 16.02465057373047, + "learning_rate": 2.2578693555458135e-06, + "loss": 11.785, + "step": 89300 + }, + { + "epoch": 38.83107851127411, + "grad_norm": 9.892585754394531, + "learning_rate": 2.2534853134590094e-06, + "loss": 11.7812, + "step": 89350 + }, + { + "epoch": 38.85281173594132, + "grad_norm": 20.471792221069336, + "learning_rate": 2.2491012713722056e-06, + "loss": 11.774, + "step": 89400 + }, + { + "epoch": 38.87454496060853, + "grad_norm": 15.924908638000488, + "learning_rate": 2.2447172292854014e-06, + "loss": 11.7735, + "step": 89450 + }, + { + "epoch": 38.89627818527574, + "grad_norm": 9.257697105407715, + "learning_rate": 2.2403331871985972e-06, + "loss": 11.784, + "step": 89500 + }, + { + "epoch": 38.91801140994295, + "grad_norm": 8.59609317779541, + "learning_rate": 2.235949145111793e-06, + "loss": 11.7924, + "step": 89550 + }, + { + "epoch": 38.93974463461016, + "grad_norm": 5.643759727478027, + "learning_rate": 2.2315651030249893e-06, + "loss": 11.7727, + "step": 89600 + }, + { + "epoch": 38.96147785927737, + "grad_norm": 3.887133836746216, + "learning_rate": 2.227181060938185e-06, + "loss": 11.7728, + "step": 89650 + }, + { + "epoch": 38.98321108394458, + "grad_norm": 15.085949897766113, + "learning_rate": 2.2227970188513813e-06, + "loss": 11.7837, + "step": 89700 + }, + { + "epoch": 39.00478130942679, + "grad_norm": 4.734740257263184, + "learning_rate": 2.218412976764577e-06, + "loss": 11.6785, + "step": 89750 + }, + { + "epoch": 39.026514534094, + "grad_norm": 7.277717590332031, + "learning_rate": 2.214028934677773e-06, + "loss": 11.7636, + "step": 89800 + }, + { + "epoch": 39.04824775876121, + "grad_norm": 12.622576713562012, + "learning_rate": 2.209644892590969e-06, + "loss": 11.7632, + "step": 89850 + }, + { + "epoch": 39.069980983428415, + "grad_norm": 8.72470760345459, + "learning_rate": 2.205260850504165e-06, + "loss": 11.7806, + "step": 89900 + }, + { + "epoch": 39.091714208095624, + "grad_norm": 8.548195838928223, + "learning_rate": 2.2008768084173608e-06, + "loss": 11.7642, + "step": 89950 + }, + { + "epoch": 39.11344743276284, + "grad_norm": 5.267911911010742, + "learning_rate": 2.196492766330557e-06, + "loss": 11.7606, + "step": 90000 + }, + { + "epoch": 39.11344743276284, + "eval_cer": 0.07434609260242184, + "eval_loss": 2.4127438068389893, + "eval_runtime": 397.8168, + "eval_samples_per_second": 13.589, + "eval_steps_per_second": 3.399, + "eval_wer": 0.22556131260794474, + "step": 90000 + }, + { + "epoch": 39.13518065743005, + "grad_norm": 13.38624095916748, + "learning_rate": 2.1921087242437532e-06, + "loss": 11.7632, + "step": 90050 + }, + { + "epoch": 39.15691388209726, + "grad_norm": 13.824868202209473, + "learning_rate": 2.187724682156949e-06, + "loss": 11.7722, + "step": 90100 + }, + { + "epoch": 39.178647106764465, + "grad_norm": 10.600975036621094, + "learning_rate": 2.183340640070145e-06, + "loss": 11.7692, + "step": 90150 + }, + { + "epoch": 39.200380331431674, + "grad_norm": 10.806214332580566, + "learning_rate": 2.1789565979833407e-06, + "loss": 11.753, + "step": 90200 + }, + { + "epoch": 39.22211355609888, + "grad_norm": 7.800635814666748, + "learning_rate": 2.1745725558965365e-06, + "loss": 11.7616, + "step": 90250 + }, + { + "epoch": 39.2438467807661, + "grad_norm": 7.492881774902344, + "learning_rate": 2.1701885138097327e-06, + "loss": 11.7439, + "step": 90300 + }, + { + "epoch": 39.26558000543331, + "grad_norm": 12.061040878295898, + "learning_rate": 2.1658044717229285e-06, + "loss": 11.7567, + "step": 90350 + }, + { + "epoch": 39.287313230100516, + "grad_norm": 6.541141510009766, + "learning_rate": 2.1614204296361248e-06, + "loss": 11.775, + "step": 90400 + }, + { + "epoch": 39.309046454767724, + "grad_norm": 5.959283828735352, + "learning_rate": 2.1570363875493206e-06, + "loss": 11.7633, + "step": 90450 + }, + { + "epoch": 39.33077967943493, + "grad_norm": 9.226263046264648, + "learning_rate": 2.152652345462517e-06, + "loss": 11.7746, + "step": 90500 + }, + { + "epoch": 39.35251290410215, + "grad_norm": 15.486861228942871, + "learning_rate": 2.1482683033757126e-06, + "loss": 11.775, + "step": 90550 + }, + { + "epoch": 39.37424612876936, + "grad_norm": 6.420067310333252, + "learning_rate": 2.1438842612889084e-06, + "loss": 11.7812, + "step": 90600 + }, + { + "epoch": 39.395979353436566, + "grad_norm": 7.908544540405273, + "learning_rate": 2.1395002192021042e-06, + "loss": 11.7573, + "step": 90650 + }, + { + "epoch": 39.417712578103774, + "grad_norm": 7.737216472625732, + "learning_rate": 2.1351161771153005e-06, + "loss": 11.7621, + "step": 90700 + }, + { + "epoch": 39.43944580277098, + "grad_norm": 5.630201816558838, + "learning_rate": 2.1307321350284967e-06, + "loss": 11.7629, + "step": 90750 + }, + { + "epoch": 39.4611790274382, + "grad_norm": 5.563817024230957, + "learning_rate": 2.1263480929416925e-06, + "loss": 11.7663, + "step": 90800 + }, + { + "epoch": 39.48291225210541, + "grad_norm": 6.785152912139893, + "learning_rate": 2.1219640508548883e-06, + "loss": 11.7575, + "step": 90850 + }, + { + "epoch": 39.504645476772616, + "grad_norm": 5.261542797088623, + "learning_rate": 2.117580008768084e-06, + "loss": 11.7676, + "step": 90900 + }, + { + "epoch": 39.526378701439825, + "grad_norm": 8.951974868774414, + "learning_rate": 2.1131959666812804e-06, + "loss": 11.7626, + "step": 90950 + }, + { + "epoch": 39.54811192610703, + "grad_norm": 5.925467491149902, + "learning_rate": 2.108811924594476e-06, + "loss": 11.7652, + "step": 91000 + }, + { + "epoch": 39.56984515077425, + "grad_norm": 6.1618194580078125, + "learning_rate": 2.1044278825076724e-06, + "loss": 11.7526, + "step": 91050 + }, + { + "epoch": 39.59157837544146, + "grad_norm": 5.625064373016357, + "learning_rate": 2.1000438404208682e-06, + "loss": 11.7605, + "step": 91100 + }, + { + "epoch": 39.613311600108666, + "grad_norm": 12.690841674804688, + "learning_rate": 2.0956597983340645e-06, + "loss": 11.7649, + "step": 91150 + }, + { + "epoch": 39.635044824775875, + "grad_norm": 12.550384521484375, + "learning_rate": 2.0912757562472603e-06, + "loss": 11.792, + "step": 91200 + }, + { + "epoch": 39.65677804944308, + "grad_norm": 9.650748252868652, + "learning_rate": 2.086891714160456e-06, + "loss": 11.7733, + "step": 91250 + }, + { + "epoch": 39.6785112741103, + "grad_norm": 8.173276901245117, + "learning_rate": 2.082507672073652e-06, + "loss": 11.7835, + "step": 91300 + }, + { + "epoch": 39.70024449877751, + "grad_norm": 8.434061050415039, + "learning_rate": 2.0781236299868477e-06, + "loss": 11.7474, + "step": 91350 + }, + { + "epoch": 39.721977723444716, + "grad_norm": 9.518670082092285, + "learning_rate": 2.073739587900044e-06, + "loss": 11.7687, + "step": 91400 + }, + { + "epoch": 39.743710948111925, + "grad_norm": 6.331693649291992, + "learning_rate": 2.06935554581324e-06, + "loss": 11.7578, + "step": 91450 + }, + { + "epoch": 39.765444172779134, + "grad_norm": 9.190653800964355, + "learning_rate": 2.064971503726436e-06, + "loss": 11.7756, + "step": 91500 + }, + { + "epoch": 39.78717739744635, + "grad_norm": 13.398709297180176, + "learning_rate": 2.060587461639632e-06, + "loss": 11.7704, + "step": 91550 + }, + { + "epoch": 39.80891062211356, + "grad_norm": 11.104494094848633, + "learning_rate": 2.056203419552828e-06, + "loss": 11.7623, + "step": 91600 + }, + { + "epoch": 39.83064384678077, + "grad_norm": 5.477586269378662, + "learning_rate": 2.051819377466024e-06, + "loss": 11.7647, + "step": 91650 + }, + { + "epoch": 39.852377071447975, + "grad_norm": 3.5749564170837402, + "learning_rate": 2.0474353353792197e-06, + "loss": 11.7695, + "step": 91700 + }, + { + "epoch": 39.874110296115184, + "grad_norm": 5.978471755981445, + "learning_rate": 2.043051293292416e-06, + "loss": 11.7648, + "step": 91750 + }, + { + "epoch": 39.89584352078239, + "grad_norm": 25.19099235534668, + "learning_rate": 2.0386672512056117e-06, + "loss": 11.7612, + "step": 91800 + }, + { + "epoch": 39.91757674544961, + "grad_norm": 5.882903099060059, + "learning_rate": 2.034283209118808e-06, + "loss": 11.7799, + "step": 91850 + }, + { + "epoch": 39.93930997011682, + "grad_norm": 27.048709869384766, + "learning_rate": 2.0298991670320037e-06, + "loss": 11.7737, + "step": 91900 + }, + { + "epoch": 39.961043194784025, + "grad_norm": 9.073412895202637, + "learning_rate": 2.0255151249451996e-06, + "loss": 11.7612, + "step": 91950 + }, + { + "epoch": 39.982776419451234, + "grad_norm": 13.002010345458984, + "learning_rate": 2.0211310828583954e-06, + "loss": 11.7782, + "step": 92000 + }, + { + "epoch": 40.00434664493344, + "grad_norm": 9.383039474487305, + "learning_rate": 2.0167470407715916e-06, + "loss": 11.6788, + "step": 92050 + }, + { + "epoch": 40.02607986960065, + "grad_norm": 9.476020812988281, + "learning_rate": 2.0123629986847874e-06, + "loss": 11.7597, + "step": 92100 + }, + { + "epoch": 40.04781309426786, + "grad_norm": 8.409124374389648, + "learning_rate": 2.0079789565979836e-06, + "loss": 11.7492, + "step": 92150 + }, + { + "epoch": 40.069546318935075, + "grad_norm": 6.030084609985352, + "learning_rate": 2.0035949145111795e-06, + "loss": 11.7751, + "step": 92200 + }, + { + "epoch": 40.09127954360228, + "grad_norm": 4.96845006942749, + "learning_rate": 1.9992108724243757e-06, + "loss": 11.7443, + "step": 92250 + }, + { + "epoch": 40.11301276826949, + "grad_norm": 11.096965789794922, + "learning_rate": 1.9948268303375715e-06, + "loss": 11.7718, + "step": 92300 + }, + { + "epoch": 40.1347459929367, + "grad_norm": 6.45682430267334, + "learning_rate": 1.9904427882507673e-06, + "loss": 11.7592, + "step": 92350 + }, + { + "epoch": 40.15647921760391, + "grad_norm": 7.665036678314209, + "learning_rate": 1.986058746163963e-06, + "loss": 11.7715, + "step": 92400 + }, + { + "epoch": 40.178212442271125, + "grad_norm": 4.477396011352539, + "learning_rate": 1.9816747040771594e-06, + "loss": 11.7572, + "step": 92450 + }, + { + "epoch": 40.19994566693833, + "grad_norm": 13.868246078491211, + "learning_rate": 1.977290661990355e-06, + "loss": 11.7656, + "step": 92500 + }, + { + "epoch": 40.22167889160554, + "grad_norm": 7.29514217376709, + "learning_rate": 1.9729066199035514e-06, + "loss": 11.7525, + "step": 92550 + }, + { + "epoch": 40.24341211627275, + "grad_norm": 5.126781463623047, + "learning_rate": 1.968522577816747e-06, + "loss": 11.7623, + "step": 92600 + }, + { + "epoch": 40.26514534093996, + "grad_norm": 13.333416938781738, + "learning_rate": 1.964138535729943e-06, + "loss": 11.7645, + "step": 92650 + }, + { + "epoch": 40.286878565607175, + "grad_norm": 4.38609504699707, + "learning_rate": 1.9597544936431393e-06, + "loss": 11.7471, + "step": 92700 + }, + { + "epoch": 40.308611790274384, + "grad_norm": 18.64853858947754, + "learning_rate": 1.955370451556335e-06, + "loss": 11.7614, + "step": 92750 + }, + { + "epoch": 40.33034501494159, + "grad_norm": 10.51586627960205, + "learning_rate": 1.950986409469531e-06, + "loss": 11.7572, + "step": 92800 + }, + { + "epoch": 40.3520782396088, + "grad_norm": 10.547462463378906, + "learning_rate": 1.946602367382727e-06, + "loss": 11.7609, + "step": 92850 + }, + { + "epoch": 40.37381146427601, + "grad_norm": 5.690642356872559, + "learning_rate": 1.9422183252959233e-06, + "loss": 11.7481, + "step": 92900 + }, + { + "epoch": 40.395544688943225, + "grad_norm": 3.9211175441741943, + "learning_rate": 1.937834283209119e-06, + "loss": 11.7461, + "step": 92950 + }, + { + "epoch": 40.417277913610434, + "grad_norm": 4.40631103515625, + "learning_rate": 1.933450241122315e-06, + "loss": 11.7587, + "step": 93000 + }, + { + "epoch": 40.43901113827764, + "grad_norm": 4.007054328918457, + "learning_rate": 1.9290661990355108e-06, + "loss": 11.7649, + "step": 93050 + }, + { + "epoch": 40.46074436294485, + "grad_norm": 9.994613647460938, + "learning_rate": 1.9246821569487066e-06, + "loss": 11.7632, + "step": 93100 + }, + { + "epoch": 40.48247758761206, + "grad_norm": 7.985722541809082, + "learning_rate": 1.920298114861903e-06, + "loss": 11.7661, + "step": 93150 + }, + { + "epoch": 40.50421081227927, + "grad_norm": 17.07448387145996, + "learning_rate": 1.9159140727750986e-06, + "loss": 11.7407, + "step": 93200 + }, + { + "epoch": 40.525944036946484, + "grad_norm": 5.9471211433410645, + "learning_rate": 1.911530030688295e-06, + "loss": 11.7688, + "step": 93250 + }, + { + "epoch": 40.54767726161369, + "grad_norm": 5.977828502655029, + "learning_rate": 1.9071459886014907e-06, + "loss": 11.7515, + "step": 93300 + }, + { + "epoch": 40.5694104862809, + "grad_norm": 37.1627082824707, + "learning_rate": 1.902761946514687e-06, + "loss": 11.7671, + "step": 93350 + }, + { + "epoch": 40.59114371094811, + "grad_norm": 6.538187503814697, + "learning_rate": 1.8983779044278827e-06, + "loss": 11.7546, + "step": 93400 + }, + { + "epoch": 40.61287693561532, + "grad_norm": 7.786463260650635, + "learning_rate": 1.8939938623410785e-06, + "loss": 11.7539, + "step": 93450 + }, + { + "epoch": 40.634610160282534, + "grad_norm": 9.64076042175293, + "learning_rate": 1.8896098202542746e-06, + "loss": 11.7615, + "step": 93500 + }, + { + "epoch": 40.65634338494974, + "grad_norm": 6.382667064666748, + "learning_rate": 1.8852257781674704e-06, + "loss": 11.749, + "step": 93550 + }, + { + "epoch": 40.67807660961695, + "grad_norm": 9.91306209564209, + "learning_rate": 1.8808417360806666e-06, + "loss": 11.7645, + "step": 93600 + }, + { + "epoch": 40.69980983428416, + "grad_norm": 5.685323715209961, + "learning_rate": 1.8764576939938626e-06, + "loss": 11.7511, + "step": 93650 + }, + { + "epoch": 40.72154305895137, + "grad_norm": 6.233023166656494, + "learning_rate": 1.8720736519070584e-06, + "loss": 11.7425, + "step": 93700 + }, + { + "epoch": 40.743276283618584, + "grad_norm": 7.542703628540039, + "learning_rate": 1.8676896098202542e-06, + "loss": 11.7679, + "step": 93750 + }, + { + "epoch": 40.76500950828579, + "grad_norm": 20.795351028442383, + "learning_rate": 1.8633055677334505e-06, + "loss": 11.7475, + "step": 93800 + }, + { + "epoch": 40.786742732953, + "grad_norm": 8.967228889465332, + "learning_rate": 1.8589215256466465e-06, + "loss": 11.7556, + "step": 93850 + }, + { + "epoch": 40.80847595762021, + "grad_norm": 9.180179595947266, + "learning_rate": 1.8545374835598423e-06, + "loss": 11.7549, + "step": 93900 + }, + { + "epoch": 40.83020918228742, + "grad_norm": 4.092516899108887, + "learning_rate": 1.8501534414730381e-06, + "loss": 11.7597, + "step": 93950 + }, + { + "epoch": 40.851942406954635, + "grad_norm": 4.368731498718262, + "learning_rate": 1.8457693993862344e-06, + "loss": 11.7655, + "step": 94000 + }, + { + "epoch": 40.87367563162184, + "grad_norm": 3.8829784393310547, + "learning_rate": 1.8413853572994304e-06, + "loss": 11.7653, + "step": 94050 + }, + { + "epoch": 40.89540885628905, + "grad_norm": 30.0755615234375, + "learning_rate": 1.8370013152126262e-06, + "loss": 11.7594, + "step": 94100 + }, + { + "epoch": 40.91714208095626, + "grad_norm": 5.881009578704834, + "learning_rate": 1.8326172731258222e-06, + "loss": 11.7536, + "step": 94150 + }, + { + "epoch": 40.93887530562347, + "grad_norm": 11.916665077209473, + "learning_rate": 1.828233231039018e-06, + "loss": 11.746, + "step": 94200 + }, + { + "epoch": 40.960608530290685, + "grad_norm": 4.125995635986328, + "learning_rate": 1.8238491889522143e-06, + "loss": 11.7549, + "step": 94250 + }, + { + "epoch": 40.98234175495789, + "grad_norm": 9.298914909362793, + "learning_rate": 1.81946514686541e-06, + "loss": 11.746, + "step": 94300 + }, + { + "epoch": 41.0039119804401, + "grad_norm": 6.543211460113525, + "learning_rate": 1.815081104778606e-06, + "loss": 11.6774, + "step": 94350 + }, + { + "epoch": 41.02564520510731, + "grad_norm": 10.013541221618652, + "learning_rate": 1.810697062691802e-06, + "loss": 11.747, + "step": 94400 + }, + { + "epoch": 41.04737842977452, + "grad_norm": 26.30128288269043, + "learning_rate": 1.8063130206049981e-06, + "loss": 11.7447, + "step": 94450 + }, + { + "epoch": 41.06911165444173, + "grad_norm": 6.381859302520752, + "learning_rate": 1.801928978518194e-06, + "loss": 11.7397, + "step": 94500 + }, + { + "epoch": 41.090844879108936, + "grad_norm": 6.652487754821777, + "learning_rate": 1.79754493643139e-06, + "loss": 11.7431, + "step": 94550 + }, + { + "epoch": 41.11257810377615, + "grad_norm": 5.449718952178955, + "learning_rate": 1.7931608943445858e-06, + "loss": 11.7549, + "step": 94600 + }, + { + "epoch": 41.13431132844336, + "grad_norm": 3.2079617977142334, + "learning_rate": 1.7887768522577818e-06, + "loss": 11.7596, + "step": 94650 + }, + { + "epoch": 41.15604455311057, + "grad_norm": 9.676004409790039, + "learning_rate": 1.7843928101709778e-06, + "loss": 11.7529, + "step": 94700 + }, + { + "epoch": 41.17777777777778, + "grad_norm": 9.563153266906738, + "learning_rate": 1.7800087680841738e-06, + "loss": 11.7469, + "step": 94750 + }, + { + "epoch": 41.199511002444986, + "grad_norm": 8.786322593688965, + "learning_rate": 1.7756247259973697e-06, + "loss": 11.7364, + "step": 94800 + }, + { + "epoch": 41.221244227112194, + "grad_norm": 8.319864273071289, + "learning_rate": 1.7712406839105657e-06, + "loss": 11.7545, + "step": 94850 + }, + { + "epoch": 41.24297745177941, + "grad_norm": 11.104187965393066, + "learning_rate": 1.7668566418237617e-06, + "loss": 11.7425, + "step": 94900 + }, + { + "epoch": 41.26471067644662, + "grad_norm": 5.2200026512146, + "learning_rate": 1.7624725997369577e-06, + "loss": 11.7511, + "step": 94950 + }, + { + "epoch": 41.28644390111383, + "grad_norm": 5.0673933029174805, + "learning_rate": 1.7580885576501535e-06, + "loss": 11.7597, + "step": 95000 + }, + { + "epoch": 41.308177125781036, + "grad_norm": 6.790633678436279, + "learning_rate": 1.7537045155633495e-06, + "loss": 11.7566, + "step": 95050 + }, + { + "epoch": 41.329910350448245, + "grad_norm": 5.625386714935303, + "learning_rate": 1.7493204734765456e-06, + "loss": 11.7483, + "step": 95100 + }, + { + "epoch": 41.35164357511546, + "grad_norm": 5.591269493103027, + "learning_rate": 1.7449364313897416e-06, + "loss": 11.7497, + "step": 95150 + }, + { + "epoch": 41.37337679978267, + "grad_norm": 5.52367639541626, + "learning_rate": 1.7405523893029374e-06, + "loss": 11.7637, + "step": 95200 + }, + { + "epoch": 41.39511002444988, + "grad_norm": 3.352064371109009, + "learning_rate": 1.7361683472161334e-06, + "loss": 11.7539, + "step": 95250 + }, + { + "epoch": 41.416843249117086, + "grad_norm": 10.966846466064453, + "learning_rate": 1.7317843051293292e-06, + "loss": 11.7587, + "step": 95300 + }, + { + "epoch": 41.438576473784295, + "grad_norm": 4.151219844818115, + "learning_rate": 1.7274002630425255e-06, + "loss": 11.7389, + "step": 95350 + }, + { + "epoch": 41.46030969845151, + "grad_norm": 7.952151775360107, + "learning_rate": 1.7230162209557213e-06, + "loss": 11.7477, + "step": 95400 + }, + { + "epoch": 41.48204292311872, + "grad_norm": 7.96391487121582, + "learning_rate": 1.7186321788689173e-06, + "loss": 11.7551, + "step": 95450 + }, + { + "epoch": 41.50377614778593, + "grad_norm": 5.120693206787109, + "learning_rate": 1.7142481367821131e-06, + "loss": 11.7426, + "step": 95500 + }, + { + "epoch": 41.525509372453136, + "grad_norm": 4.3286027908325195, + "learning_rate": 1.7098640946953093e-06, + "loss": 11.7461, + "step": 95550 + }, + { + "epoch": 41.547242597120345, + "grad_norm": 7.304409027099609, + "learning_rate": 1.7054800526085052e-06, + "loss": 11.7565, + "step": 95600 + }, + { + "epoch": 41.56897582178756, + "grad_norm": 5.609922885894775, + "learning_rate": 1.7010960105217012e-06, + "loss": 11.7654, + "step": 95650 + }, + { + "epoch": 41.59070904645477, + "grad_norm": 7.889837265014648, + "learning_rate": 1.696711968434897e-06, + "loss": 11.7447, + "step": 95700 + }, + { + "epoch": 41.61244227112198, + "grad_norm": 6.620103359222412, + "learning_rate": 1.692327926348093e-06, + "loss": 11.7406, + "step": 95750 + }, + { + "epoch": 41.63417549578919, + "grad_norm": 5.3006463050842285, + "learning_rate": 1.687943884261289e-06, + "loss": 11.7421, + "step": 95800 + }, + { + "epoch": 41.655908720456395, + "grad_norm": 6.590972423553467, + "learning_rate": 1.683559842174485e-06, + "loss": 11.7446, + "step": 95850 + }, + { + "epoch": 41.67764194512361, + "grad_norm": 9.739943504333496, + "learning_rate": 1.6791758000876809e-06, + "loss": 11.7506, + "step": 95900 + }, + { + "epoch": 41.69937516979082, + "grad_norm": 5.318305969238281, + "learning_rate": 1.6747917580008769e-06, + "loss": 11.7503, + "step": 95950 + }, + { + "epoch": 41.72110839445803, + "grad_norm": 4.048628807067871, + "learning_rate": 1.6704077159140731e-06, + "loss": 11.7579, + "step": 96000 + }, + { + "epoch": 41.74284161912524, + "grad_norm": 6.817996025085449, + "learning_rate": 1.666023673827269e-06, + "loss": 11.7427, + "step": 96050 + }, + { + "epoch": 41.764574843792445, + "grad_norm": 10.243191719055176, + "learning_rate": 1.6616396317404647e-06, + "loss": 11.7662, + "step": 96100 + }, + { + "epoch": 41.78630806845966, + "grad_norm": 17.077388763427734, + "learning_rate": 1.6572555896536608e-06, + "loss": 11.7471, + "step": 96150 + }, + { + "epoch": 41.80804129312687, + "grad_norm": 10.154135704040527, + "learning_rate": 1.652871547566857e-06, + "loss": 11.7526, + "step": 96200 + }, + { + "epoch": 41.82977451779408, + "grad_norm": 15.618302345275879, + "learning_rate": 1.6484875054800528e-06, + "loss": 11.7451, + "step": 96250 + }, + { + "epoch": 41.85150774246129, + "grad_norm": 5.547327041625977, + "learning_rate": 1.6441034633932486e-06, + "loss": 11.759, + "step": 96300 + }, + { + "epoch": 41.873240967128496, + "grad_norm": 4.5382304191589355, + "learning_rate": 1.6397194213064446e-06, + "loss": 11.7377, + "step": 96350 + }, + { + "epoch": 41.89497419179571, + "grad_norm": 4.469516277313232, + "learning_rate": 1.6353353792196405e-06, + "loss": 11.7555, + "step": 96400 + }, + { + "epoch": 41.91670741646292, + "grad_norm": 4.71604061126709, + "learning_rate": 1.6309513371328367e-06, + "loss": 11.745, + "step": 96450 + }, + { + "epoch": 41.93844064113013, + "grad_norm": 4.897347450256348, + "learning_rate": 1.6265672950460327e-06, + "loss": 11.7572, + "step": 96500 + }, + { + "epoch": 41.96017386579734, + "grad_norm": 19.85100746154785, + "learning_rate": 1.6221832529592285e-06, + "loss": 11.7499, + "step": 96550 + }, + { + "epoch": 41.981907090464546, + "grad_norm": 5.2028117179870605, + "learning_rate": 1.6177992108724243e-06, + "loss": 11.7622, + "step": 96600 + }, + { + "epoch": 42.00347731594675, + "grad_norm": 12.004639625549316, + "learning_rate": 1.6134151687856206e-06, + "loss": 11.6536, + "step": 96650 + }, + { + "epoch": 42.02521054061396, + "grad_norm": 5.1376848220825195, + "learning_rate": 1.6090311266988166e-06, + "loss": 11.7351, + "step": 96700 + }, + { + "epoch": 42.04694376528117, + "grad_norm": 5.914182662963867, + "learning_rate": 1.6046470846120124e-06, + "loss": 11.7339, + "step": 96750 + }, + { + "epoch": 42.068676989948386, + "grad_norm": 7.787753105163574, + "learning_rate": 1.6002630425252082e-06, + "loss": 11.7402, + "step": 96800 + }, + { + "epoch": 42.090410214615595, + "grad_norm": 6.375885009765625, + "learning_rate": 1.5958790004384042e-06, + "loss": 11.7447, + "step": 96850 + }, + { + "epoch": 42.112143439282804, + "grad_norm": 24.318653106689453, + "learning_rate": 1.5914949583516005e-06, + "loss": 11.7561, + "step": 96900 + }, + { + "epoch": 42.13387666395001, + "grad_norm": 3.9973437786102295, + "learning_rate": 1.5871109162647963e-06, + "loss": 11.7402, + "step": 96950 + }, + { + "epoch": 42.15560988861722, + "grad_norm": 7.005822658538818, + "learning_rate": 1.5827268741779923e-06, + "loss": 11.739, + "step": 97000 + }, + { + "epoch": 42.17734311328444, + "grad_norm": 7.374983787536621, + "learning_rate": 1.5783428320911881e-06, + "loss": 11.7416, + "step": 97050 + }, + { + "epoch": 42.199076337951645, + "grad_norm": 7.227538108825684, + "learning_rate": 1.5739587900043843e-06, + "loss": 11.7344, + "step": 97100 + }, + { + "epoch": 42.220809562618854, + "grad_norm": 16.485198974609375, + "learning_rate": 1.5695747479175802e-06, + "loss": 11.7412, + "step": 97150 + }, + { + "epoch": 42.24254278728606, + "grad_norm": 8.366280555725098, + "learning_rate": 1.5651907058307762e-06, + "loss": 11.7441, + "step": 97200 + }, + { + "epoch": 42.26427601195327, + "grad_norm": 4.64595365524292, + "learning_rate": 1.560806663743972e-06, + "loss": 11.7412, + "step": 97250 + }, + { + "epoch": 42.28600923662049, + "grad_norm": 8.422093391418457, + "learning_rate": 1.5564226216571678e-06, + "loss": 11.7501, + "step": 97300 + }, + { + "epoch": 42.307742461287695, + "grad_norm": 7.655717849731445, + "learning_rate": 1.552038579570364e-06, + "loss": 11.7379, + "step": 97350 + }, + { + "epoch": 42.329475685954904, + "grad_norm": 5.5334272384643555, + "learning_rate": 1.54765453748356e-06, + "loss": 11.7498, + "step": 97400 + }, + { + "epoch": 42.35120891062211, + "grad_norm": 23.848054885864258, + "learning_rate": 1.5432704953967559e-06, + "loss": 11.7518, + "step": 97450 + }, + { + "epoch": 42.37294213528932, + "grad_norm": 12.059696197509766, + "learning_rate": 1.5388864533099519e-06, + "loss": 11.7456, + "step": 97500 + }, + { + "epoch": 42.39467535995654, + "grad_norm": 5.756223678588867, + "learning_rate": 1.534502411223148e-06, + "loss": 11.7399, + "step": 97550 + }, + { + "epoch": 42.416408584623746, + "grad_norm": 3.3854639530181885, + "learning_rate": 1.530118369136344e-06, + "loss": 11.7399, + "step": 97600 + }, + { + "epoch": 42.438141809290954, + "grad_norm": 33.99950408935547, + "learning_rate": 1.5257343270495397e-06, + "loss": 11.7448, + "step": 97650 + }, + { + "epoch": 42.45987503395816, + "grad_norm": 12.245558738708496, + "learning_rate": 1.5213502849627358e-06, + "loss": 11.7508, + "step": 97700 + }, + { + "epoch": 42.48160825862537, + "grad_norm": 13.875435829162598, + "learning_rate": 1.5169662428759318e-06, + "loss": 11.7354, + "step": 97750 + }, + { + "epoch": 42.50334148329259, + "grad_norm": 4.234302997589111, + "learning_rate": 1.5125822007891278e-06, + "loss": 11.7334, + "step": 97800 + }, + { + "epoch": 42.525074707959796, + "grad_norm": 6.025676727294922, + "learning_rate": 1.5081981587023236e-06, + "loss": 11.7598, + "step": 97850 + }, + { + "epoch": 42.546807932627004, + "grad_norm": 6.693691253662109, + "learning_rate": 1.5038141166155196e-06, + "loss": 11.7353, + "step": 97900 + }, + { + "epoch": 42.56854115729421, + "grad_norm": 15.162924766540527, + "learning_rate": 1.4994300745287155e-06, + "loss": 11.7358, + "step": 97950 + }, + { + "epoch": 42.59027438196142, + "grad_norm": 14.031683921813965, + "learning_rate": 1.4950460324419117e-06, + "loss": 11.7534, + "step": 98000 + }, + { + "epoch": 42.61200760662863, + "grad_norm": 5.764448642730713, + "learning_rate": 1.4906619903551075e-06, + "loss": 11.7335, + "step": 98050 + }, + { + "epoch": 42.633740831295846, + "grad_norm": 4.945387840270996, + "learning_rate": 1.4862779482683035e-06, + "loss": 11.7459, + "step": 98100 + }, + { + "epoch": 42.655474055963055, + "grad_norm": 5.07288932800293, + "learning_rate": 1.4818939061814993e-06, + "loss": 11.7434, + "step": 98150 + }, + { + "epoch": 42.67720728063026, + "grad_norm": 5.957432270050049, + "learning_rate": 1.4775098640946956e-06, + "loss": 11.7353, + "step": 98200 + }, + { + "epoch": 42.69894050529747, + "grad_norm": 11.405596733093262, + "learning_rate": 1.4731258220078914e-06, + "loss": 11.7419, + "step": 98250 + }, + { + "epoch": 42.72067372996468, + "grad_norm": 5.61083984375, + "learning_rate": 1.4687417799210874e-06, + "loss": 11.7435, + "step": 98300 + }, + { + "epoch": 42.742406954631896, + "grad_norm": 5.795932292938232, + "learning_rate": 1.4643577378342832e-06, + "loss": 11.7536, + "step": 98350 + }, + { + "epoch": 42.764140179299105, + "grad_norm": 14.968274116516113, + "learning_rate": 1.4599736957474792e-06, + "loss": 11.7532, + "step": 98400 + }, + { + "epoch": 42.78587340396631, + "grad_norm": 4.567286491394043, + "learning_rate": 1.4555896536606753e-06, + "loss": 11.7412, + "step": 98450 + }, + { + "epoch": 42.80760662863352, + "grad_norm": 3.640240430831909, + "learning_rate": 1.4512056115738713e-06, + "loss": 11.7526, + "step": 98500 + }, + { + "epoch": 42.82933985330073, + "grad_norm": 7.847878456115723, + "learning_rate": 1.446821569487067e-06, + "loss": 11.7427, + "step": 98550 + }, + { + "epoch": 42.851073077967946, + "grad_norm": 9.818808555603027, + "learning_rate": 1.4424375274002631e-06, + "loss": 11.7491, + "step": 98600 + }, + { + "epoch": 42.872806302635155, + "grad_norm": 6.466832637786865, + "learning_rate": 1.4380534853134591e-06, + "loss": 11.7487, + "step": 98650 + }, + { + "epoch": 42.89453952730236, + "grad_norm": 12.53541374206543, + "learning_rate": 1.4336694432266552e-06, + "loss": 11.7451, + "step": 98700 + }, + { + "epoch": 42.91627275196957, + "grad_norm": 9.651721000671387, + "learning_rate": 1.429285401139851e-06, + "loss": 11.7455, + "step": 98750 + }, + { + "epoch": 42.93800597663678, + "grad_norm": 7.363193511962891, + "learning_rate": 1.424901359053047e-06, + "loss": 11.7442, + "step": 98800 + }, + { + "epoch": 42.959739201304, + "grad_norm": 6.456315040588379, + "learning_rate": 1.4205173169662432e-06, + "loss": 11.7378, + "step": 98850 + }, + { + "epoch": 42.981472425971205, + "grad_norm": 4.924785137176514, + "learning_rate": 1.416133274879439e-06, + "loss": 11.7338, + "step": 98900 + }, + { + "epoch": 43.00304265145341, + "grad_norm": 12.623988151550293, + "learning_rate": 1.4117492327926348e-06, + "loss": 11.6662, + "step": 98950 + }, + { + "epoch": 43.02477587612062, + "grad_norm": 9.521653175354004, + "learning_rate": 1.4073651907058309e-06, + "loss": 11.7329, + "step": 99000 + }, + { + "epoch": 43.04650910078783, + "grad_norm": 4.056196212768555, + "learning_rate": 1.4029811486190267e-06, + "loss": 11.7467, + "step": 99050 + }, + { + "epoch": 43.06824232545504, + "grad_norm": 3.722097396850586, + "learning_rate": 1.398597106532223e-06, + "loss": 11.735, + "step": 99100 + }, + { + "epoch": 43.08997555012225, + "grad_norm": 3.6029582023620605, + "learning_rate": 1.3942130644454187e-06, + "loss": 11.7395, + "step": 99150 + }, + { + "epoch": 43.111708774789456, + "grad_norm": 4.464113712310791, + "learning_rate": 1.3898290223586147e-06, + "loss": 11.7417, + "step": 99200 + }, + { + "epoch": 43.13344199945667, + "grad_norm": 7.3181986808776855, + "learning_rate": 1.3854449802718106e-06, + "loss": 11.7556, + "step": 99250 + }, + { + "epoch": 43.15517522412388, + "grad_norm": 12.537137031555176, + "learning_rate": 1.3810609381850068e-06, + "loss": 11.7485, + "step": 99300 + }, + { + "epoch": 43.17690844879109, + "grad_norm": 14.284486770629883, + "learning_rate": 1.3766768960982028e-06, + "loss": 11.7374, + "step": 99350 + }, + { + "epoch": 43.1986416734583, + "grad_norm": 3.9522204399108887, + "learning_rate": 1.3722928540113986e-06, + "loss": 11.7394, + "step": 99400 + }, + { + "epoch": 43.220374898125506, + "grad_norm": 3.287987232208252, + "learning_rate": 1.3679088119245944e-06, + "loss": 11.7358, + "step": 99450 + }, + { + "epoch": 43.24210812279272, + "grad_norm": 5.351846694946289, + "learning_rate": 1.3635247698377905e-06, + "loss": 11.7402, + "step": 99500 + }, + { + "epoch": 43.26384134745993, + "grad_norm": 13.18363094329834, + "learning_rate": 1.3591407277509867e-06, + "loss": 11.7343, + "step": 99550 + }, + { + "epoch": 43.28557457212714, + "grad_norm": 2.8352560997009277, + "learning_rate": 1.3547566856641825e-06, + "loss": 11.7408, + "step": 99600 + }, + { + "epoch": 43.30730779679435, + "grad_norm": 4.404722690582275, + "learning_rate": 1.3503726435773783e-06, + "loss": 11.7488, + "step": 99650 + }, + { + "epoch": 43.329041021461556, + "grad_norm": 5.89795446395874, + "learning_rate": 1.3459886014905743e-06, + "loss": 11.7417, + "step": 99700 + }, + { + "epoch": 43.35077424612877, + "grad_norm": 3.792579412460327, + "learning_rate": 1.3416045594037706e-06, + "loss": 11.742, + "step": 99750 + }, + { + "epoch": 43.37250747079598, + "grad_norm": 6.295860290527344, + "learning_rate": 1.3372205173169664e-06, + "loss": 11.7419, + "step": 99800 + }, + { + "epoch": 43.39424069546319, + "grad_norm": 4.617618560791016, + "learning_rate": 1.3328364752301624e-06, + "loss": 11.7388, + "step": 99850 + }, + { + "epoch": 43.4159739201304, + "grad_norm": 3.46701717376709, + "learning_rate": 1.3284524331433582e-06, + "loss": 11.739, + "step": 99900 + }, + { + "epoch": 43.437707144797606, + "grad_norm": 5.266394138336182, + "learning_rate": 1.3240683910565544e-06, + "loss": 11.7336, + "step": 99950 + }, + { + "epoch": 43.45944036946482, + "grad_norm": 4.314075946807861, + "learning_rate": 1.3196843489697503e-06, + "loss": 11.7505, + "step": 100000 + }, + { + "epoch": 43.45944036946482, + "eval_cer": 0.07396960981570859, + "eval_loss": 2.4215219020843506, + "eval_runtime": 398.4535, + "eval_samples_per_second": 13.567, + "eval_steps_per_second": 3.393, + "eval_wer": 0.22507457999685979, + "step": 100000 + }, + { + "epoch": 43.48117359413203, + "grad_norm": 4.137760639190674, + "learning_rate": 1.3153003068829463e-06, + "loss": 11.7369, + "step": 100050 + }, + { + "epoch": 43.50290681879924, + "grad_norm": 16.71613311767578, + "learning_rate": 1.310916264796142e-06, + "loss": 11.7308, + "step": 100100 + }, + { + "epoch": 43.52464004346645, + "grad_norm": 3.2521612644195557, + "learning_rate": 1.306532222709338e-06, + "loss": 11.7266, + "step": 100150 + }, + { + "epoch": 43.54637326813366, + "grad_norm": 5.732780456542969, + "learning_rate": 1.3021481806225341e-06, + "loss": 11.7389, + "step": 100200 + }, + { + "epoch": 43.56810649280087, + "grad_norm": 6.189255714416504, + "learning_rate": 1.2977641385357302e-06, + "loss": 11.7341, + "step": 100250 + }, + { + "epoch": 43.58983971746808, + "grad_norm": 4.808958053588867, + "learning_rate": 1.293380096448926e-06, + "loss": 11.739, + "step": 100300 + }, + { + "epoch": 43.61157294213529, + "grad_norm": 13.771133422851562, + "learning_rate": 1.288996054362122e-06, + "loss": 11.734, + "step": 100350 + }, + { + "epoch": 43.6333061668025, + "grad_norm": 5.9765400886535645, + "learning_rate": 1.284612012275318e-06, + "loss": 11.7398, + "step": 100400 + }, + { + "epoch": 43.65503939146971, + "grad_norm": 4.513848304748535, + "learning_rate": 1.280227970188514e-06, + "loss": 11.7321, + "step": 100450 + }, + { + "epoch": 43.67677261613692, + "grad_norm": 3.8837485313415527, + "learning_rate": 1.2758439281017098e-06, + "loss": 11.7257, + "step": 100500 + }, + { + "epoch": 43.69850584080413, + "grad_norm": 5.340435028076172, + "learning_rate": 1.2714598860149059e-06, + "loss": 11.7429, + "step": 100550 + }, + { + "epoch": 43.72023906547134, + "grad_norm": 5.877197742462158, + "learning_rate": 1.2670758439281017e-06, + "loss": 11.728, + "step": 100600 + }, + { + "epoch": 43.74197229013855, + "grad_norm": 11.274967193603516, + "learning_rate": 1.262691801841298e-06, + "loss": 11.7314, + "step": 100650 + }, + { + "epoch": 43.76370551480576, + "grad_norm": 7.218222141265869, + "learning_rate": 1.2583077597544937e-06, + "loss": 11.726, + "step": 100700 + }, + { + "epoch": 43.78543873947297, + "grad_norm": 3.588624954223633, + "learning_rate": 1.2539237176676897e-06, + "loss": 11.7394, + "step": 100750 + }, + { + "epoch": 43.80717196414018, + "grad_norm": 10.603730201721191, + "learning_rate": 1.2495396755808858e-06, + "loss": 11.7442, + "step": 100800 + }, + { + "epoch": 43.82890518880739, + "grad_norm": 4.9444379806518555, + "learning_rate": 1.2451556334940816e-06, + "loss": 11.7318, + "step": 100850 + }, + { + "epoch": 43.8506384134746, + "grad_norm": 16.76546287536621, + "learning_rate": 1.2407715914072776e-06, + "loss": 11.731, + "step": 100900 + }, + { + "epoch": 43.87237163814181, + "grad_norm": 3.7398102283477783, + "learning_rate": 1.2363875493204736e-06, + "loss": 11.7348, + "step": 100950 + }, + { + "epoch": 43.894104862809016, + "grad_norm": 4.954889297485352, + "learning_rate": 1.2320035072336696e-06, + "loss": 11.7333, + "step": 101000 + }, + { + "epoch": 43.91583808747623, + "grad_norm": 3.635148286819458, + "learning_rate": 1.2276194651468654e-06, + "loss": 11.7388, + "step": 101050 + }, + { + "epoch": 43.93757131214344, + "grad_norm": 9.620790481567383, + "learning_rate": 1.2232354230600615e-06, + "loss": 11.7254, + "step": 101100 + }, + { + "epoch": 43.95930453681065, + "grad_norm": 4.13824462890625, + "learning_rate": 1.2188513809732575e-06, + "loss": 11.7366, + "step": 101150 + }, + { + "epoch": 43.98103776147786, + "grad_norm": 6.983582973480225, + "learning_rate": 1.2144673388864533e-06, + "loss": 11.7287, + "step": 101200 + }, + { + "epoch": 44.002607986960065, + "grad_norm": 4.2465691566467285, + "learning_rate": 1.2100832967996493e-06, + "loss": 11.649, + "step": 101250 + }, + { + "epoch": 44.024341211627274, + "grad_norm": 17.737117767333984, + "learning_rate": 1.2056992547128453e-06, + "loss": 11.7302, + "step": 101300 + }, + { + "epoch": 44.04607443629448, + "grad_norm": 4.739099502563477, + "learning_rate": 1.2013152126260414e-06, + "loss": 11.75, + "step": 101350 + }, + { + "epoch": 44.0678076609617, + "grad_norm": 3.695364236831665, + "learning_rate": 1.1969311705392372e-06, + "loss": 11.7399, + "step": 101400 + }, + { + "epoch": 44.08954088562891, + "grad_norm": 6.371554851531982, + "learning_rate": 1.1925471284524332e-06, + "loss": 11.7314, + "step": 101450 + }, + { + "epoch": 44.111274110296115, + "grad_norm": 24.45339584350586, + "learning_rate": 1.1881630863656292e-06, + "loss": 11.7215, + "step": 101500 + }, + { + "epoch": 44.133007334963324, + "grad_norm": 5.760914325714111, + "learning_rate": 1.1837790442788252e-06, + "loss": 11.7376, + "step": 101550 + }, + { + "epoch": 44.15474055963053, + "grad_norm": 4.051183700561523, + "learning_rate": 1.179395002192021e-06, + "loss": 11.7265, + "step": 101600 + }, + { + "epoch": 44.17647378429775, + "grad_norm": 5.363418102264404, + "learning_rate": 1.175010960105217e-06, + "loss": 11.7367, + "step": 101650 + }, + { + "epoch": 44.19820700896496, + "grad_norm": 3.471618890762329, + "learning_rate": 1.170626918018413e-06, + "loss": 11.7384, + "step": 101700 + }, + { + "epoch": 44.219940233632165, + "grad_norm": 7.958500385284424, + "learning_rate": 1.166242875931609e-06, + "loss": 11.7332, + "step": 101750 + }, + { + "epoch": 44.241673458299374, + "grad_norm": 25.589679718017578, + "learning_rate": 1.161858833844805e-06, + "loss": 11.7298, + "step": 101800 + }, + { + "epoch": 44.26340668296658, + "grad_norm": 11.511533737182617, + "learning_rate": 1.157474791758001e-06, + "loss": 11.7272, + "step": 101850 + }, + { + "epoch": 44.2851399076338, + "grad_norm": 4.467309474945068, + "learning_rate": 1.153090749671197e-06, + "loss": 11.7184, + "step": 101900 + }, + { + "epoch": 44.30687313230101, + "grad_norm": 4.863615989685059, + "learning_rate": 1.1487067075843928e-06, + "loss": 11.7395, + "step": 101950 + }, + { + "epoch": 44.328606356968216, + "grad_norm": 6.230271816253662, + "learning_rate": 1.1443226654975888e-06, + "loss": 11.7373, + "step": 102000 + }, + { + "epoch": 44.350339581635424, + "grad_norm": 4.7530517578125, + "learning_rate": 1.1399386234107848e-06, + "loss": 11.7472, + "step": 102050 + }, + { + "epoch": 44.37207280630263, + "grad_norm": 22.007238388061523, + "learning_rate": 1.1355545813239809e-06, + "loss": 11.7212, + "step": 102100 + }, + { + "epoch": 44.39380603096985, + "grad_norm": 6.660682678222656, + "learning_rate": 1.1311705392371767e-06, + "loss": 11.7316, + "step": 102150 + }, + { + "epoch": 44.41553925563706, + "grad_norm": 5.25778865814209, + "learning_rate": 1.126786497150373e-06, + "loss": 11.7305, + "step": 102200 + }, + { + "epoch": 44.437272480304266, + "grad_norm": 5.097360134124756, + "learning_rate": 1.1224024550635687e-06, + "loss": 11.7314, + "step": 102250 + }, + { + "epoch": 44.459005704971474, + "grad_norm": 4.272281169891357, + "learning_rate": 1.1180184129767645e-06, + "loss": 11.7419, + "step": 102300 + }, + { + "epoch": 44.48073892963868, + "grad_norm": 6.060675144195557, + "learning_rate": 1.1136343708899605e-06, + "loss": 11.7367, + "step": 102350 + }, + { + "epoch": 44.50247215430589, + "grad_norm": 5.883248329162598, + "learning_rate": 1.1092503288031566e-06, + "loss": 11.7368, + "step": 102400 + }, + { + "epoch": 44.52420537897311, + "grad_norm": 6.329914093017578, + "learning_rate": 1.1048662867163526e-06, + "loss": 11.7303, + "step": 102450 + }, + { + "epoch": 44.545938603640316, + "grad_norm": 6.62354850769043, + "learning_rate": 1.1004822446295484e-06, + "loss": 11.7375, + "step": 102500 + }, + { + "epoch": 44.567671828307525, + "grad_norm": 10.634700775146484, + "learning_rate": 1.0960982025427446e-06, + "loss": 11.7295, + "step": 102550 + }, + { + "epoch": 44.58940505297473, + "grad_norm": 2.787297487258911, + "learning_rate": 1.0917141604559404e-06, + "loss": 11.7311, + "step": 102600 + }, + { + "epoch": 44.61113827764194, + "grad_norm": 4.915313720703125, + "learning_rate": 1.0873301183691365e-06, + "loss": 11.7368, + "step": 102650 + }, + { + "epoch": 44.63287150230916, + "grad_norm": 13.359769821166992, + "learning_rate": 1.0829460762823325e-06, + "loss": 11.7417, + "step": 102700 + }, + { + "epoch": 44.654604726976366, + "grad_norm": 3.9991888999938965, + "learning_rate": 1.0785620341955285e-06, + "loss": 11.7392, + "step": 102750 + }, + { + "epoch": 44.676337951643575, + "grad_norm": 3.797086238861084, + "learning_rate": 1.0741779921087243e-06, + "loss": 11.7242, + "step": 102800 + }, + { + "epoch": 44.69807117631078, + "grad_norm": 6.608884811401367, + "learning_rate": 1.0697939500219203e-06, + "loss": 11.7231, + "step": 102850 + }, + { + "epoch": 44.71980440097799, + "grad_norm": 10.230695724487305, + "learning_rate": 1.0654099079351164e-06, + "loss": 11.7325, + "step": 102900 + }, + { + "epoch": 44.74153762564521, + "grad_norm": 4.2929301261901855, + "learning_rate": 1.0610258658483122e-06, + "loss": 11.7303, + "step": 102950 + }, + { + "epoch": 44.763270850312416, + "grad_norm": 5.952197074890137, + "learning_rate": 1.0566418237615082e-06, + "loss": 11.7291, + "step": 103000 + }, + { + "epoch": 44.785004074979625, + "grad_norm": 10.304634094238281, + "learning_rate": 1.0522577816747042e-06, + "loss": 11.7452, + "step": 103050 + }, + { + "epoch": 44.806737299646834, + "grad_norm": 4.195600509643555, + "learning_rate": 1.0478737395879002e-06, + "loss": 11.744, + "step": 103100 + }, + { + "epoch": 44.82847052431404, + "grad_norm": 3.8358092308044434, + "learning_rate": 1.043489697501096e-06, + "loss": 11.7301, + "step": 103150 + }, + { + "epoch": 44.85020374898126, + "grad_norm": 13.731127738952637, + "learning_rate": 1.039105655414292e-06, + "loss": 11.7318, + "step": 103200 + }, + { + "epoch": 44.87193697364847, + "grad_norm": 4.197743892669678, + "learning_rate": 1.034721613327488e-06, + "loss": 11.7318, + "step": 103250 + }, + { + "epoch": 44.893670198315675, + "grad_norm": 8.494996070861816, + "learning_rate": 1.030337571240684e-06, + "loss": 11.7357, + "step": 103300 + }, + { + "epoch": 44.915403422982884, + "grad_norm": 3.6425983905792236, + "learning_rate": 1.02595352915388e-06, + "loss": 11.729, + "step": 103350 + }, + { + "epoch": 44.93713664765009, + "grad_norm": 4.333567142486572, + "learning_rate": 1.021569487067076e-06, + "loss": 11.7339, + "step": 103400 + }, + { + "epoch": 44.95886987231731, + "grad_norm": 4.8357930183410645, + "learning_rate": 1.017185444980272e-06, + "loss": 11.735, + "step": 103450 + }, + { + "epoch": 44.98060309698452, + "grad_norm": 8.409868240356445, + "learning_rate": 1.0128014028934678e-06, + "loss": 11.7336, + "step": 103500 + }, + { + "epoch": 45.002173322466724, + "grad_norm": 3.9856626987457275, + "learning_rate": 1.0084173608066638e-06, + "loss": 11.653, + "step": 103550 + }, + { + "epoch": 45.02390654713393, + "grad_norm": 4.529376029968262, + "learning_rate": 1.0040333187198598e-06, + "loss": 11.7279, + "step": 103600 + }, + { + "epoch": 45.04563977180114, + "grad_norm": 4.276280403137207, + "learning_rate": 9.996492766330559e-07, + "loss": 11.7362, + "step": 103650 + }, + { + "epoch": 45.06737299646835, + "grad_norm": 4.415678024291992, + "learning_rate": 9.952652345462517e-07, + "loss": 11.7194, + "step": 103700 + }, + { + "epoch": 45.08910622113556, + "grad_norm": 4.513889789581299, + "learning_rate": 9.908811924594477e-07, + "loss": 11.7178, + "step": 103750 + }, + { + "epoch": 45.11083944580277, + "grad_norm": 5.300011157989502, + "learning_rate": 9.864971503726437e-07, + "loss": 11.727, + "step": 103800 + }, + { + "epoch": 45.13257267046998, + "grad_norm": 3.8258767127990723, + "learning_rate": 9.821131082858395e-07, + "loss": 11.7328, + "step": 103850 + }, + { + "epoch": 45.15430589513719, + "grad_norm": 7.767271995544434, + "learning_rate": 9.777290661990355e-07, + "loss": 11.7202, + "step": 103900 + }, + { + "epoch": 45.1760391198044, + "grad_norm": 3.230754852294922, + "learning_rate": 9.733450241122316e-07, + "loss": 11.7268, + "step": 103950 + }, + { + "epoch": 45.19777234447161, + "grad_norm": 3.8269119262695312, + "learning_rate": 9.689609820254276e-07, + "loss": 11.7249, + "step": 104000 + }, + { + "epoch": 45.21950556913882, + "grad_norm": 5.030121803283691, + "learning_rate": 9.645769399386234e-07, + "loss": 11.7181, + "step": 104050 + }, + { + "epoch": 45.24123879380603, + "grad_norm": 4.850019931793213, + "learning_rate": 9.601928978518194e-07, + "loss": 11.7272, + "step": 104100 + }, + { + "epoch": 45.26297201847324, + "grad_norm": 6.58116340637207, + "learning_rate": 9.558088557650154e-07, + "loss": 11.7237, + "step": 104150 + }, + { + "epoch": 45.28470524314045, + "grad_norm": 20.67346954345703, + "learning_rate": 9.514248136782115e-07, + "loss": 11.7302, + "step": 104200 + }, + { + "epoch": 45.30643846780766, + "grad_norm": 3.362128973007202, + "learning_rate": 9.470407715914074e-07, + "loss": 11.7209, + "step": 104250 + }, + { + "epoch": 45.32817169247487, + "grad_norm": 7.51302433013916, + "learning_rate": 9.426567295046034e-07, + "loss": 11.7303, + "step": 104300 + }, + { + "epoch": 45.349904917142084, + "grad_norm": 4.610814094543457, + "learning_rate": 9.382726874177993e-07, + "loss": 11.7177, + "step": 104350 + }, + { + "epoch": 45.37163814180929, + "grad_norm": 13.158862113952637, + "learning_rate": 9.338886453309952e-07, + "loss": 11.7227, + "step": 104400 + }, + { + "epoch": 45.3933713664765, + "grad_norm": 4.248621940612793, + "learning_rate": 9.295046032441913e-07, + "loss": 11.7428, + "step": 104450 + }, + { + "epoch": 45.41510459114371, + "grad_norm": 3.553060531616211, + "learning_rate": 9.251205611573872e-07, + "loss": 11.7294, + "step": 104500 + }, + { + "epoch": 45.43683781581092, + "grad_norm": 5.807036399841309, + "learning_rate": 9.207365190705832e-07, + "loss": 11.7284, + "step": 104550 + }, + { + "epoch": 45.458571040478134, + "grad_norm": 13.629132270812988, + "learning_rate": 9.163524769837791e-07, + "loss": 11.7301, + "step": 104600 + }, + { + "epoch": 45.48030426514534, + "grad_norm": 4.1011857986450195, + "learning_rate": 9.119684348969751e-07, + "loss": 11.7319, + "step": 104650 + }, + { + "epoch": 45.50203748981255, + "grad_norm": 10.649341583251953, + "learning_rate": 9.075843928101711e-07, + "loss": 11.7333, + "step": 104700 + }, + { + "epoch": 45.52377071447976, + "grad_norm": 20.217660903930664, + "learning_rate": 9.032003507233671e-07, + "loss": 11.7319, + "step": 104750 + }, + { + "epoch": 45.54550393914697, + "grad_norm": 7.371703624725342, + "learning_rate": 8.98816308636563e-07, + "loss": 11.7223, + "step": 104800 + }, + { + "epoch": 45.567237163814184, + "grad_norm": 6.1061110496521, + "learning_rate": 8.94432266549759e-07, + "loss": 11.7134, + "step": 104850 + }, + { + "epoch": 45.58897038848139, + "grad_norm": 3.3697314262390137, + "learning_rate": 8.900482244629549e-07, + "loss": 11.7206, + "step": 104900 + }, + { + "epoch": 45.6107036131486, + "grad_norm": 5.704832077026367, + "learning_rate": 8.856641823761508e-07, + "loss": 11.7343, + "step": 104950 + }, + { + "epoch": 45.63243683781581, + "grad_norm": 5.612690448760986, + "learning_rate": 8.812801402893469e-07, + "loss": 11.7228, + "step": 105000 + }, + { + "epoch": 45.65417006248302, + "grad_norm": 4.661070823669434, + "learning_rate": 8.768960982025428e-07, + "loss": 11.7344, + "step": 105050 + }, + { + "epoch": 45.675903287150234, + "grad_norm": 4.922998905181885, + "learning_rate": 8.725120561157388e-07, + "loss": 11.7161, + "step": 105100 + }, + { + "epoch": 45.69763651181744, + "grad_norm": 6.320181369781494, + "learning_rate": 8.681280140289347e-07, + "loss": 11.7451, + "step": 105150 + }, + { + "epoch": 45.71936973648465, + "grad_norm": 6.543067455291748, + "learning_rate": 8.637439719421307e-07, + "loss": 11.7187, + "step": 105200 + }, + { + "epoch": 45.74110296115186, + "grad_norm": 7.506560802459717, + "learning_rate": 8.593599298553267e-07, + "loss": 11.7368, + "step": 105250 + }, + { + "epoch": 45.76283618581907, + "grad_norm": 6.871926307678223, + "learning_rate": 8.549758877685227e-07, + "loss": 11.7347, + "step": 105300 + }, + { + "epoch": 45.784569410486284, + "grad_norm": 4.491659641265869, + "learning_rate": 8.505918456817186e-07, + "loss": 11.7379, + "step": 105350 + }, + { + "epoch": 45.80630263515349, + "grad_norm": 21.81031036376953, + "learning_rate": 8.462078035949146e-07, + "loss": 11.7307, + "step": 105400 + }, + { + "epoch": 45.8280358598207, + "grad_norm": 20.492307662963867, + "learning_rate": 8.418237615081105e-07, + "loss": 11.7268, + "step": 105450 + }, + { + "epoch": 45.84976908448791, + "grad_norm": 7.620596408843994, + "learning_rate": 8.374397194213065e-07, + "loss": 11.7384, + "step": 105500 + }, + { + "epoch": 45.87150230915512, + "grad_norm": 4.937099456787109, + "learning_rate": 8.330556773345025e-07, + "loss": 11.7266, + "step": 105550 + }, + { + "epoch": 45.89323553382233, + "grad_norm": 3.815049409866333, + "learning_rate": 8.286716352476984e-07, + "loss": 11.7281, + "step": 105600 + }, + { + "epoch": 45.91496875848954, + "grad_norm": 9.32738208770752, + "learning_rate": 8.242875931608944e-07, + "loss": 11.7424, + "step": 105650 + }, + { + "epoch": 45.93670198315675, + "grad_norm": 12.112308502197266, + "learning_rate": 8.199035510740903e-07, + "loss": 11.729, + "step": 105700 + }, + { + "epoch": 45.95843520782396, + "grad_norm": 4.76987361907959, + "learning_rate": 8.155195089872864e-07, + "loss": 11.7292, + "step": 105750 + }, + { + "epoch": 45.98016843249117, + "grad_norm": 11.38598346710205, + "learning_rate": 8.111354669004823e-07, + "loss": 11.7356, + "step": 105800 + }, + { + "epoch": 46.00173865797338, + "grad_norm": 18.5734806060791, + "learning_rate": 8.067514248136783e-07, + "loss": 11.6526, + "step": 105850 + }, + { + "epoch": 46.023471882640585, + "grad_norm": 3.3094968795776367, + "learning_rate": 8.023673827268742e-07, + "loss": 11.7246, + "step": 105900 + }, + { + "epoch": 46.045205107307794, + "grad_norm": 10.625943183898926, + "learning_rate": 7.979833406400702e-07, + "loss": 11.7197, + "step": 105950 + }, + { + "epoch": 46.06693833197501, + "grad_norm": 11.11587142944336, + "learning_rate": 7.935992985532662e-07, + "loss": 11.712, + "step": 106000 + }, + { + "epoch": 46.08867155664222, + "grad_norm": 5.9816083908081055, + "learning_rate": 7.892152564664621e-07, + "loss": 11.7291, + "step": 106050 + }, + { + "epoch": 46.11040478130943, + "grad_norm": 5.810311317443848, + "learning_rate": 7.848312143796581e-07, + "loss": 11.72, + "step": 106100 + }, + { + "epoch": 46.132138005976635, + "grad_norm": 9.10987377166748, + "learning_rate": 7.80447172292854e-07, + "loss": 11.7341, + "step": 106150 + }, + { + "epoch": 46.153871230643844, + "grad_norm": 3.9713680744171143, + "learning_rate": 7.7606313020605e-07, + "loss": 11.728, + "step": 106200 + }, + { + "epoch": 46.17560445531106, + "grad_norm": 10.883817672729492, + "learning_rate": 7.716790881192459e-07, + "loss": 11.7321, + "step": 106250 + }, + { + "epoch": 46.19733767997827, + "grad_norm": 3.261399745941162, + "learning_rate": 7.67295046032442e-07, + "loss": 11.7197, + "step": 106300 + }, + { + "epoch": 46.21907090464548, + "grad_norm": 3.867229461669922, + "learning_rate": 7.629110039456379e-07, + "loss": 11.7311, + "step": 106350 + }, + { + "epoch": 46.240804129312686, + "grad_norm": 5.125184059143066, + "learning_rate": 7.585269618588339e-07, + "loss": 11.7357, + "step": 106400 + }, + { + "epoch": 46.262537353979894, + "grad_norm": 3.271857500076294, + "learning_rate": 7.541429197720298e-07, + "loss": 11.7182, + "step": 106450 + }, + { + "epoch": 46.28427057864711, + "grad_norm": 2.972466230392456, + "learning_rate": 7.49758877685226e-07, + "loss": 11.7244, + "step": 106500 + }, + { + "epoch": 46.30600380331432, + "grad_norm": 10.320878028869629, + "learning_rate": 7.453748355984218e-07, + "loss": 11.7296, + "step": 106550 + }, + { + "epoch": 46.32773702798153, + "grad_norm": 7.7540483474731445, + "learning_rate": 7.409907935116177e-07, + "loss": 11.7175, + "step": 106600 + }, + { + "epoch": 46.349470252648736, + "grad_norm": 5.142116546630859, + "learning_rate": 7.366067514248137e-07, + "loss": 11.7324, + "step": 106650 + }, + { + "epoch": 46.371203477315944, + "grad_norm": 3.158510446548462, + "learning_rate": 7.322227093380096e-07, + "loss": 11.7292, + "step": 106700 + }, + { + "epoch": 46.39293670198316, + "grad_norm": 3.982985258102417, + "learning_rate": 7.278386672512057e-07, + "loss": 11.7214, + "step": 106750 + }, + { + "epoch": 46.41466992665037, + "grad_norm": 3.4331562519073486, + "learning_rate": 7.234546251644016e-07, + "loss": 11.7263, + "step": 106800 + }, + { + "epoch": 46.43640315131758, + "grad_norm": 5.8017401695251465, + "learning_rate": 7.190705830775977e-07, + "loss": 11.7349, + "step": 106850 + }, + { + "epoch": 46.458136375984786, + "grad_norm": 4.63163948059082, + "learning_rate": 7.146865409907935e-07, + "loss": 11.7241, + "step": 106900 + }, + { + "epoch": 46.479869600651995, + "grad_norm": 4.267096996307373, + "learning_rate": 7.103024989039896e-07, + "loss": 11.7189, + "step": 106950 + }, + { + "epoch": 46.5016028253192, + "grad_norm": 4.0522871017456055, + "learning_rate": 7.059184568171855e-07, + "loss": 11.7209, + "step": 107000 + }, + { + "epoch": 46.52333604998642, + "grad_norm": 5.3363142013549805, + "learning_rate": 7.015344147303816e-07, + "loss": 11.7084, + "step": 107050 + }, + { + "epoch": 46.54506927465363, + "grad_norm": 4.059858322143555, + "learning_rate": 6.971503726435775e-07, + "loss": 11.7125, + "step": 107100 + }, + { + "epoch": 46.566802499320836, + "grad_norm": 3.252812623977661, + "learning_rate": 6.927663305567733e-07, + "loss": 11.7268, + "step": 107150 + }, + { + "epoch": 46.588535723988045, + "grad_norm": 4.82172966003418, + "learning_rate": 6.883822884699694e-07, + "loss": 11.7139, + "step": 107200 + }, + { + "epoch": 46.61026894865525, + "grad_norm": 8.201459884643555, + "learning_rate": 6.839982463831653e-07, + "loss": 11.7298, + "step": 107250 + }, + { + "epoch": 46.63200217332247, + "grad_norm": 3.159785747528076, + "learning_rate": 6.796142042963614e-07, + "loss": 11.7209, + "step": 107300 + }, + { + "epoch": 46.65373539798968, + "grad_norm": 11.2830171585083, + "learning_rate": 6.752301622095573e-07, + "loss": 11.7288, + "step": 107350 + }, + { + "epoch": 46.67546862265689, + "grad_norm": 4.074632167816162, + "learning_rate": 6.708461201227533e-07, + "loss": 11.7174, + "step": 107400 + }, + { + "epoch": 46.697201847324095, + "grad_norm": 12.465502738952637, + "learning_rate": 6.664620780359492e-07, + "loss": 11.7186, + "step": 107450 + }, + { + "epoch": 46.718935071991304, + "grad_norm": 4.864065647125244, + "learning_rate": 6.620780359491452e-07, + "loss": 11.7249, + "step": 107500 + }, + { + "epoch": 46.74066829665852, + "grad_norm": 7.3475341796875, + "learning_rate": 6.576939938623411e-07, + "loss": 11.7212, + "step": 107550 + }, + { + "epoch": 46.76240152132573, + "grad_norm": 6.0634565353393555, + "learning_rate": 6.533099517755372e-07, + "loss": 11.7342, + "step": 107600 + }, + { + "epoch": 46.78413474599294, + "grad_norm": 6.251104831695557, + "learning_rate": 6.489259096887331e-07, + "loss": 11.7225, + "step": 107650 + }, + { + "epoch": 46.805867970660145, + "grad_norm": 5.822422027587891, + "learning_rate": 6.44541867601929e-07, + "loss": 11.7272, + "step": 107700 + }, + { + "epoch": 46.827601195327354, + "grad_norm": 8.700297355651855, + "learning_rate": 6.40157825515125e-07, + "loss": 11.7297, + "step": 107750 + }, + { + "epoch": 46.84933441999457, + "grad_norm": 5.136385917663574, + "learning_rate": 6.357737834283209e-07, + "loss": 11.726, + "step": 107800 + }, + { + "epoch": 46.87106764466178, + "grad_norm": 5.658658981323242, + "learning_rate": 6.31389741341517e-07, + "loss": 11.7237, + "step": 107850 + }, + { + "epoch": 46.89280086932899, + "grad_norm": 6.1630353927612305, + "learning_rate": 6.270056992547129e-07, + "loss": 11.728, + "step": 107900 + }, + { + "epoch": 46.914534093996195, + "grad_norm": 11.851746559143066, + "learning_rate": 6.226216571679088e-07, + "loss": 11.7218, + "step": 107950 + }, + { + "epoch": 46.936267318663404, + "grad_norm": 3.989478588104248, + "learning_rate": 6.182376150811048e-07, + "loss": 11.7246, + "step": 108000 + }, + { + "epoch": 46.95800054333062, + "grad_norm": 10.78637981414795, + "learning_rate": 6.138535729943007e-07, + "loss": 11.7238, + "step": 108050 + }, + { + "epoch": 46.97973376799783, + "grad_norm": 11.566459655761719, + "learning_rate": 6.094695309074968e-07, + "loss": 11.7193, + "step": 108100 + }, + { + "epoch": 47.001303993480036, + "grad_norm": 9.378003120422363, + "learning_rate": 6.050854888206927e-07, + "loss": 11.6363, + "step": 108150 + }, + { + "epoch": 47.023037218147245, + "grad_norm": 5.3512091636657715, + "learning_rate": 6.007014467338887e-07, + "loss": 11.7135, + "step": 108200 + }, + { + "epoch": 47.04477044281445, + "grad_norm": 10.854682922363281, + "learning_rate": 5.963174046470846e-07, + "loss": 11.7218, + "step": 108250 + }, + { + "epoch": 47.06650366748166, + "grad_norm": 3.557173728942871, + "learning_rate": 5.919333625602806e-07, + "loss": 11.7216, + "step": 108300 + }, + { + "epoch": 47.08823689214887, + "grad_norm": 4.374483585357666, + "learning_rate": 5.875493204734767e-07, + "loss": 11.722, + "step": 108350 + }, + { + "epoch": 47.10997011681608, + "grad_norm": 4.371666431427002, + "learning_rate": 5.831652783866726e-07, + "loss": 11.7127, + "step": 108400 + }, + { + "epoch": 47.131703341483295, + "grad_norm": 6.458693504333496, + "learning_rate": 5.787812362998686e-07, + "loss": 11.7278, + "step": 108450 + }, + { + "epoch": 47.1534365661505, + "grad_norm": 20.349096298217773, + "learning_rate": 5.743971942130644e-07, + "loss": 11.7243, + "step": 108500 + }, + { + "epoch": 47.17516979081771, + "grad_norm": 24.32076072692871, + "learning_rate": 5.700131521262604e-07, + "loss": 11.7229, + "step": 108550 + }, + { + "epoch": 47.19690301548492, + "grad_norm": 3.82700252532959, + "learning_rate": 5.656291100394565e-07, + "loss": 11.7084, + "step": 108600 + }, + { + "epoch": 47.21863624015213, + "grad_norm": 3.007939338684082, + "learning_rate": 5.612450679526524e-07, + "loss": 11.7192, + "step": 108650 + }, + { + "epoch": 47.240369464819345, + "grad_norm": 4.3855299949646, + "learning_rate": 5.568610258658484e-07, + "loss": 11.7216, + "step": 108700 + }, + { + "epoch": 47.262102689486554, + "grad_norm": 3.7610890865325928, + "learning_rate": 5.524769837790443e-07, + "loss": 11.7107, + "step": 108750 + }, + { + "epoch": 47.28383591415376, + "grad_norm": 5.21887731552124, + "learning_rate": 5.480929416922403e-07, + "loss": 11.7165, + "step": 108800 + }, + { + "epoch": 47.30556913882097, + "grad_norm": 4.387558460235596, + "learning_rate": 5.437088996054362e-07, + "loss": 11.7145, + "step": 108850 + }, + { + "epoch": 47.32730236348818, + "grad_norm": 2.78105092048645, + "learning_rate": 5.393248575186323e-07, + "loss": 11.7132, + "step": 108900 + }, + { + "epoch": 47.349035588155395, + "grad_norm": 6.782215595245361, + "learning_rate": 5.349408154318282e-07, + "loss": 11.7313, + "step": 108950 + }, + { + "epoch": 47.370768812822604, + "grad_norm": 4.510542392730713, + "learning_rate": 5.305567733450242e-07, + "loss": 11.7191, + "step": 109000 + }, + { + "epoch": 47.39250203748981, + "grad_norm": 5.735984802246094, + "learning_rate": 5.261727312582201e-07, + "loss": 11.7178, + "step": 109050 + }, + { + "epoch": 47.41423526215702, + "grad_norm": 9.908329963684082, + "learning_rate": 5.21788689171416e-07, + "loss": 11.7219, + "step": 109100 + }, + { + "epoch": 47.43596848682423, + "grad_norm": 5.137279987335205, + "learning_rate": 5.174046470846121e-07, + "loss": 11.7231, + "step": 109150 + }, + { + "epoch": 47.457701711491445, + "grad_norm": 15.530988693237305, + "learning_rate": 5.13020604997808e-07, + "loss": 11.7219, + "step": 109200 + }, + { + "epoch": 47.479434936158654, + "grad_norm": 5.565670490264893, + "learning_rate": 5.08636562911004e-07, + "loss": 11.7231, + "step": 109250 + }, + { + "epoch": 47.50116816082586, + "grad_norm": 3.390558958053589, + "learning_rate": 5.042525208241999e-07, + "loss": 11.7143, + "step": 109300 + }, + { + "epoch": 47.52290138549307, + "grad_norm": 3.168869972229004, + "learning_rate": 4.998684787373959e-07, + "loss": 11.7107, + "step": 109350 + }, + { + "epoch": 47.54463461016028, + "grad_norm": 4.391485691070557, + "learning_rate": 4.954844366505919e-07, + "loss": 11.7171, + "step": 109400 + }, + { + "epoch": 47.566367834827496, + "grad_norm": 10.428187370300293, + "learning_rate": 4.911003945637879e-07, + "loss": 11.7214, + "step": 109450 + }, + { + "epoch": 47.588101059494704, + "grad_norm": 8.480759620666504, + "learning_rate": 4.867163524769838e-07, + "loss": 11.7284, + "step": 109500 + }, + { + "epoch": 47.60983428416191, + "grad_norm": 8.282448768615723, + "learning_rate": 4.823323103901798e-07, + "loss": 11.723, + "step": 109550 + }, + { + "epoch": 47.63156750882912, + "grad_norm": 3.495969295501709, + "learning_rate": 4.779482683033757e-07, + "loss": 11.7213, + "step": 109600 + }, + { + "epoch": 47.65330073349633, + "grad_norm": 4.484890937805176, + "learning_rate": 4.735642262165717e-07, + "loss": 11.7149, + "step": 109650 + }, + { + "epoch": 47.675033958163546, + "grad_norm": 11.390790939331055, + "learning_rate": 4.6918018412976767e-07, + "loss": 11.7138, + "step": 109700 + }, + { + "epoch": 47.696767182830754, + "grad_norm": 3.9627888202667236, + "learning_rate": 4.6479614204296364e-07, + "loss": 11.722, + "step": 109750 + }, + { + "epoch": 47.71850040749796, + "grad_norm": 5.796283721923828, + "learning_rate": 4.604120999561596e-07, + "loss": 11.7182, + "step": 109800 + }, + { + "epoch": 47.74023363216517, + "grad_norm": 8.347150802612305, + "learning_rate": 4.560280578693556e-07, + "loss": 11.7257, + "step": 109850 + }, + { + "epoch": 47.76196685683238, + "grad_norm": 18.176475524902344, + "learning_rate": 4.5164401578255155e-07, + "loss": 11.7289, + "step": 109900 + }, + { + "epoch": 47.78370008149959, + "grad_norm": 4.672854900360107, + "learning_rate": 4.472599736957475e-07, + "loss": 11.7134, + "step": 109950 + }, + { + "epoch": 47.805433306166805, + "grad_norm": 4.16023588180542, + "learning_rate": 4.428759316089435e-07, + "loss": 11.7233, + "step": 110000 + }, + { + "epoch": 47.805433306166805, + "eval_cer": 0.07368261883895177, + "eval_loss": 2.426945209503174, + "eval_runtime": 398.6555, + "eval_samples_per_second": 13.561, + "eval_steps_per_second": 3.391, + "eval_wer": 0.224462238970011, + "step": 110000 + }, + { + "epoch": 47.82716653083401, + "grad_norm": 6.881287097930908, + "learning_rate": 4.3849188952213946e-07, + "loss": 11.7274, + "step": 110050 + }, + { + "epoch": 47.84889975550122, + "grad_norm": 22.432987213134766, + "learning_rate": 4.3410784743533543e-07, + "loss": 11.7251, + "step": 110100 + }, + { + "epoch": 47.87063298016843, + "grad_norm": 5.372289180755615, + "learning_rate": 4.2972380534853134e-07, + "loss": 11.7208, + "step": 110150 + }, + { + "epoch": 47.89236620483564, + "grad_norm": 7.969860553741455, + "learning_rate": 4.253397632617273e-07, + "loss": 11.714, + "step": 110200 + }, + { + "epoch": 47.914099429502855, + "grad_norm": 4.329230308532715, + "learning_rate": 4.209557211749233e-07, + "loss": 11.7203, + "step": 110250 + }, + { + "epoch": 47.93583265417006, + "grad_norm": 5.077062606811523, + "learning_rate": 4.1657167908811925e-07, + "loss": 11.7147, + "step": 110300 + }, + { + "epoch": 47.95756587883727, + "grad_norm": 10.139983177185059, + "learning_rate": 4.121876370013152e-07, + "loss": 11.7224, + "step": 110350 + }, + { + "epoch": 47.97929910350448, + "grad_norm": 7.735840320587158, + "learning_rate": 4.078035949145112e-07, + "loss": 11.7244, + "step": 110400 + }, + { + "epoch": 48.00086932898669, + "grad_norm": 3.7973625659942627, + "learning_rate": 4.0341955282770716e-07, + "loss": 11.6379, + "step": 110450 + }, + { + "epoch": 48.0226025536539, + "grad_norm": 30.568002700805664, + "learning_rate": 3.990355107409032e-07, + "loss": 11.7251, + "step": 110500 + }, + { + "epoch": 48.044335778321106, + "grad_norm": 4.356334209442139, + "learning_rate": 3.9465146865409915e-07, + "loss": 11.7165, + "step": 110550 + }, + { + "epoch": 48.06606900298832, + "grad_norm": 2.87776780128479, + "learning_rate": 3.902674265672951e-07, + "loss": 11.7094, + "step": 110600 + }, + { + "epoch": 48.08780222765553, + "grad_norm": 7.523682117462158, + "learning_rate": 3.858833844804911e-07, + "loss": 11.7341, + "step": 110650 + }, + { + "epoch": 48.10953545232274, + "grad_norm": 4.565247535705566, + "learning_rate": 3.8149934239368695e-07, + "loss": 11.7107, + "step": 110700 + }, + { + "epoch": 48.13126867698995, + "grad_norm": 8.14168643951416, + "learning_rate": 3.77115300306883e-07, + "loss": 11.708, + "step": 110750 + }, + { + "epoch": 48.153001901657156, + "grad_norm": 9.361733436584473, + "learning_rate": 3.7273125822007895e-07, + "loss": 11.7357, + "step": 110800 + }, + { + "epoch": 48.17473512632437, + "grad_norm": 3.413947105407715, + "learning_rate": 3.683472161332749e-07, + "loss": 11.7106, + "step": 110850 + }, + { + "epoch": 48.19646835099158, + "grad_norm": 3.5155258178710938, + "learning_rate": 3.639631740464709e-07, + "loss": 11.7122, + "step": 110900 + }, + { + "epoch": 48.21820157565879, + "grad_norm": 3.1602470874786377, + "learning_rate": 3.5957913195966685e-07, + "loss": 11.7176, + "step": 110950 + }, + { + "epoch": 48.239934800326, + "grad_norm": 9.386332511901855, + "learning_rate": 3.551950898728628e-07, + "loss": 11.724, + "step": 111000 + }, + { + "epoch": 48.261668024993206, + "grad_norm": 4.479788780212402, + "learning_rate": 3.508110477860588e-07, + "loss": 11.7121, + "step": 111050 + }, + { + "epoch": 48.28340124966042, + "grad_norm": 5.899557590484619, + "learning_rate": 3.4642700569925476e-07, + "loss": 11.7212, + "step": 111100 + }, + { + "epoch": 48.30513447432763, + "grad_norm": 11.247237205505371, + "learning_rate": 3.4204296361245073e-07, + "loss": 11.7196, + "step": 111150 + }, + { + "epoch": 48.32686769899484, + "grad_norm": 5.190598487854004, + "learning_rate": 3.376589215256467e-07, + "loss": 11.7078, + "step": 111200 + }, + { + "epoch": 48.34860092366205, + "grad_norm": 4.109508037567139, + "learning_rate": 3.332748794388426e-07, + "loss": 11.7155, + "step": 111250 + }, + { + "epoch": 48.370334148329256, + "grad_norm": 3.0162370204925537, + "learning_rate": 3.288908373520386e-07, + "loss": 11.7168, + "step": 111300 + }, + { + "epoch": 48.39206737299647, + "grad_norm": 11.546656608581543, + "learning_rate": 3.2450679526523456e-07, + "loss": 11.7252, + "step": 111350 + }, + { + "epoch": 48.41380059766368, + "grad_norm": 3.388889789581299, + "learning_rate": 3.201227531784305e-07, + "loss": 11.7268, + "step": 111400 + }, + { + "epoch": 48.43553382233089, + "grad_norm": 6.033073902130127, + "learning_rate": 3.157387110916265e-07, + "loss": 11.7127, + "step": 111450 + }, + { + "epoch": 48.4572670469981, + "grad_norm": 4.543982028961182, + "learning_rate": 3.1135466900482246e-07, + "loss": 11.7272, + "step": 111500 + }, + { + "epoch": 48.479000271665306, + "grad_norm": 9.906218528747559, + "learning_rate": 3.0697062691801843e-07, + "loss": 11.7268, + "step": 111550 + }, + { + "epoch": 48.500733496332515, + "grad_norm": 7.095948696136475, + "learning_rate": 3.025865848312144e-07, + "loss": 11.712, + "step": 111600 + }, + { + "epoch": 48.52246672099973, + "grad_norm": 9.97701644897461, + "learning_rate": 2.9820254274441037e-07, + "loss": 11.722, + "step": 111650 + }, + { + "epoch": 48.54419994566694, + "grad_norm": 4.398223876953125, + "learning_rate": 2.9381850065760634e-07, + "loss": 11.7101, + "step": 111700 + }, + { + "epoch": 48.56593317033415, + "grad_norm": 3.77424693107605, + "learning_rate": 2.894344585708023e-07, + "loss": 11.7272, + "step": 111750 + }, + { + "epoch": 48.58766639500136, + "grad_norm": 17.39592933654785, + "learning_rate": 2.850504164839983e-07, + "loss": 11.7154, + "step": 111800 + }, + { + "epoch": 48.609399619668565, + "grad_norm": 3.8219528198242188, + "learning_rate": 2.8066637439719425e-07, + "loss": 11.7156, + "step": 111850 + }, + { + "epoch": 48.63113284433578, + "grad_norm": 9.067111015319824, + "learning_rate": 2.7628233231039017e-07, + "loss": 11.7133, + "step": 111900 + }, + { + "epoch": 48.65286606900299, + "grad_norm": 15.224953651428223, + "learning_rate": 2.7189829022358614e-07, + "loss": 11.7166, + "step": 111950 + }, + { + "epoch": 48.6745992936702, + "grad_norm": 4.944436073303223, + "learning_rate": 2.675142481367821e-07, + "loss": 11.7152, + "step": 112000 + }, + { + "epoch": 48.69633251833741, + "grad_norm": 11.312178611755371, + "learning_rate": 2.6313020604997813e-07, + "loss": 11.7157, + "step": 112050 + }, + { + "epoch": 48.718065743004615, + "grad_norm": 5.6469011306762695, + "learning_rate": 2.587461639631741e-07, + "loss": 11.7188, + "step": 112100 + }, + { + "epoch": 48.73979896767183, + "grad_norm": 3.34533429145813, + "learning_rate": 2.5436212187637007e-07, + "loss": 11.7275, + "step": 112150 + }, + { + "epoch": 48.76153219233904, + "grad_norm": 9.967689514160156, + "learning_rate": 2.49978079789566e-07, + "loss": 11.717, + "step": 112200 + }, + { + "epoch": 48.78326541700625, + "grad_norm": 5.482551574707031, + "learning_rate": 2.4559403770276195e-07, + "loss": 11.7111, + "step": 112250 + }, + { + "epoch": 48.80499864167346, + "grad_norm": 4.191429615020752, + "learning_rate": 2.412099956159579e-07, + "loss": 11.7112, + "step": 112300 + }, + { + "epoch": 48.826731866340666, + "grad_norm": 4.112410068511963, + "learning_rate": 2.3682595352915392e-07, + "loss": 11.7252, + "step": 112350 + }, + { + "epoch": 48.84846509100788, + "grad_norm": 4.255959510803223, + "learning_rate": 2.3244191144234989e-07, + "loss": 11.7149, + "step": 112400 + }, + { + "epoch": 48.87019831567509, + "grad_norm": 8.20151424407959, + "learning_rate": 2.2805786935554583e-07, + "loss": 11.7073, + "step": 112450 + }, + { + "epoch": 48.8919315403423, + "grad_norm": 4.13128137588501, + "learning_rate": 2.236738272687418e-07, + "loss": 11.72, + "step": 112500 + }, + { + "epoch": 48.91366476500951, + "grad_norm": 6.540150165557861, + "learning_rate": 2.1928978518193777e-07, + "loss": 11.707, + "step": 112550 + }, + { + "epoch": 48.935397989676716, + "grad_norm": 10.835039138793945, + "learning_rate": 2.1490574309513374e-07, + "loss": 11.7185, + "step": 112600 + }, + { + "epoch": 48.95713121434393, + "grad_norm": 11.767996788024902, + "learning_rate": 2.105217010083297e-07, + "loss": 11.7273, + "step": 112650 + }, + { + "epoch": 48.97886443901114, + "grad_norm": 7.164200305938721, + "learning_rate": 2.0613765892152568e-07, + "loss": 11.7255, + "step": 112700 + }, + { + "epoch": 49.00043466449335, + "grad_norm": 3.889307737350464, + "learning_rate": 2.0175361683472162e-07, + "loss": 11.6305, + "step": 112750 + }, + { + "epoch": 49.022167889160556, + "grad_norm": 3.3905019760131836, + "learning_rate": 1.973695747479176e-07, + "loss": 11.7146, + "step": 112800 + }, + { + "epoch": 49.043901113827765, + "grad_norm": 10.843219757080078, + "learning_rate": 1.9298553266111356e-07, + "loss": 11.7074, + "step": 112850 + }, + { + "epoch": 49.065634338494974, + "grad_norm": 6.1026082038879395, + "learning_rate": 1.8860149057430953e-07, + "loss": 11.7187, + "step": 112900 + }, + { + "epoch": 49.08736756316218, + "grad_norm": 12.958758354187012, + "learning_rate": 1.842174484875055e-07, + "loss": 11.7198, + "step": 112950 + }, + { + "epoch": 49.10910078782939, + "grad_norm": 7.045960426330566, + "learning_rate": 1.7983340640070144e-07, + "loss": 11.7171, + "step": 113000 + }, + { + "epoch": 49.13083401249661, + "grad_norm": 8.215546607971191, + "learning_rate": 1.754493643138974e-07, + "loss": 11.7161, + "step": 113050 + }, + { + "epoch": 49.152567237163815, + "grad_norm": 3.4392971992492676, + "learning_rate": 1.710653222270934e-07, + "loss": 11.7033, + "step": 113100 + }, + { + "epoch": 49.174300461831024, + "grad_norm": 4.333184242248535, + "learning_rate": 1.6668128014028937e-07, + "loss": 11.7221, + "step": 113150 + }, + { + "epoch": 49.19603368649823, + "grad_norm": 8.148516654968262, + "learning_rate": 1.6229723805348534e-07, + "loss": 11.7099, + "step": 113200 + }, + { + "epoch": 49.21776691116544, + "grad_norm": 10.723722457885742, + "learning_rate": 1.579131959666813e-07, + "loss": 11.7253, + "step": 113250 + }, + { + "epoch": 49.23950013583266, + "grad_norm": 5.778897285461426, + "learning_rate": 1.5352915387987726e-07, + "loss": 11.7058, + "step": 113300 + }, + { + "epoch": 49.261233360499865, + "grad_norm": 5.398443698883057, + "learning_rate": 1.4914511179307322e-07, + "loss": 11.7219, + "step": 113350 + }, + { + "epoch": 49.282966585167074, + "grad_norm": 3.614530324935913, + "learning_rate": 1.447610697062692e-07, + "loss": 11.7105, + "step": 113400 + }, + { + "epoch": 49.30469980983428, + "grad_norm": 4.205718040466309, + "learning_rate": 1.4037702761946516e-07, + "loss": 11.7128, + "step": 113450 + }, + { + "epoch": 49.32643303450149, + "grad_norm": 5.203486442565918, + "learning_rate": 1.3599298553266113e-07, + "loss": 11.7145, + "step": 113500 + }, + { + "epoch": 49.34816625916871, + "grad_norm": 3.4985852241516113, + "learning_rate": 1.316089434458571e-07, + "loss": 11.7212, + "step": 113550 + }, + { + "epoch": 49.369899483835916, + "grad_norm": 9.43883991241455, + "learning_rate": 1.2722490135905305e-07, + "loss": 11.7124, + "step": 113600 + }, + { + "epoch": 49.391632708503124, + "grad_norm": 7.489180088043213, + "learning_rate": 1.2284085927224901e-07, + "loss": 11.7207, + "step": 113650 + }, + { + "epoch": 49.41336593317033, + "grad_norm": 9.499123573303223, + "learning_rate": 1.18456817185445e-07, + "loss": 11.7147, + "step": 113700 + }, + { + "epoch": 49.43509915783754, + "grad_norm": 7.789849758148193, + "learning_rate": 1.1407277509864095e-07, + "loss": 11.7171, + "step": 113750 + }, + { + "epoch": 49.45683238250476, + "grad_norm": 9.22687816619873, + "learning_rate": 1.0968873301183692e-07, + "loss": 11.7071, + "step": 113800 + }, + { + "epoch": 49.478565607171966, + "grad_norm": 5.999274253845215, + "learning_rate": 1.0530469092503289e-07, + "loss": 11.7137, + "step": 113850 + }, + { + "epoch": 49.500298831839174, + "grad_norm": 9.73884391784668, + "learning_rate": 1.0092064883822885e-07, + "loss": 11.7201, + "step": 113900 + }, + { + "epoch": 49.52203205650638, + "grad_norm": 9.630657196044922, + "learning_rate": 9.653660675142482e-08, + "loss": 11.7212, + "step": 113950 + }, + { + "epoch": 49.54376528117359, + "grad_norm": 4.612308979034424, + "learning_rate": 9.21525646646208e-08, + "loss": 11.7247, + "step": 114000 + }, + { + "epoch": 49.56549850584081, + "grad_norm": 2.9876091480255127, + "learning_rate": 8.776852257781676e-08, + "loss": 11.7146, + "step": 114050 + }, + { + "epoch": 49.587231730508016, + "grad_norm": 4.555498123168945, + "learning_rate": 8.338448049101273e-08, + "loss": 11.7073, + "step": 114100 + }, + { + "epoch": 49.608964955175225, + "grad_norm": 5.015764236450195, + "learning_rate": 7.900043840420868e-08, + "loss": 11.7119, + "step": 114150 + }, + { + "epoch": 49.63069817984243, + "grad_norm": 5.208141326904297, + "learning_rate": 7.461639631740465e-08, + "loss": 11.7178, + "step": 114200 + }, + { + "epoch": 49.65243140450964, + "grad_norm": 5.420982837677002, + "learning_rate": 7.023235423060062e-08, + "loss": 11.7187, + "step": 114250 + }, + { + "epoch": 49.67416462917686, + "grad_norm": 6.694780349731445, + "learning_rate": 6.584831214379659e-08, + "loss": 11.7102, + "step": 114300 + }, + { + "epoch": 49.695897853844066, + "grad_norm": 4.203577995300293, + "learning_rate": 6.146427005699255e-08, + "loss": 11.7142, + "step": 114350 + }, + { + "epoch": 49.717631078511275, + "grad_norm": 3.1716277599334717, + "learning_rate": 5.7080227970188515e-08, + "loss": 11.7139, + "step": 114400 + }, + { + "epoch": 49.73936430317848, + "grad_norm": 3.897326946258545, + "learning_rate": 5.269618588338448e-08, + "loss": 11.712, + "step": 114450 + }, + { + "epoch": 49.76109752784569, + "grad_norm": 13.347712516784668, + "learning_rate": 4.8312143796580454e-08, + "loss": 11.7155, + "step": 114500 + }, + { + "epoch": 49.78283075251291, + "grad_norm": 6.420513153076172, + "learning_rate": 4.392810170977642e-08, + "loss": 11.7286, + "step": 114550 + }, + { + "epoch": 49.804563977180116, + "grad_norm": 5.6966447830200195, + "learning_rate": 3.954405962297238e-08, + "loss": 11.7078, + "step": 114600 + }, + { + "epoch": 49.826297201847325, + "grad_norm": 5.481497287750244, + "learning_rate": 3.516001753616835e-08, + "loss": 11.7061, + "step": 114650 + }, + { + "epoch": 49.84803042651453, + "grad_norm": 6.5728840827941895, + "learning_rate": 3.077597544936432e-08, + "loss": 11.7121, + "step": 114700 + }, + { + "epoch": 49.86976365118174, + "grad_norm": 7.013606071472168, + "learning_rate": 2.639193336256028e-08, + "loss": 11.7165, + "step": 114750 + }, + { + "epoch": 49.89149687584895, + "grad_norm": 8.17546272277832, + "learning_rate": 2.200789127575625e-08, + "loss": 11.7226, + "step": 114800 + }, + { + "epoch": 49.91323010051617, + "grad_norm": 11.53906536102295, + "learning_rate": 1.7623849188952217e-08, + "loss": 11.7159, + "step": 114850 + }, + { + "epoch": 49.934963325183375, + "grad_norm": 3.259451389312744, + "learning_rate": 1.3239807102148183e-08, + "loss": 11.7256, + "step": 114900 + }, + { + "epoch": 49.956696549850584, + "grad_norm": 9.84170913696289, + "learning_rate": 8.855765015344147e-09, + "loss": 11.7094, + "step": 114950 + }, + { + "epoch": 49.97842977451779, + "grad_norm": 3.992011070251465, + "learning_rate": 4.471722928540115e-09, + "loss": 11.7083, + "step": 115000 + }, + { + "epoch": 50.0, + "grad_norm": 7.805567264556885, + "learning_rate": 8.768084173608068e-11, + "loss": 11.6432, + "step": 115050 + } + ], + "logging_steps": 50, + "max_steps": 115050, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 10000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}