{ "best_metric": 8.422011887887493e-06, "best_model_checkpoint": "/home/zls/schreifmaschinn/error_correction/g2p/model/lux_g2p/checkpoint-272000", "epoch": 50.0, "eval_steps": 1000, "global_step": 423800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.005899008966493629, "grad_norm": 4.3086838722229, "learning_rate": 5e-06, "loss": 1.95, "step": 50 }, { "epoch": 0.011798017932987258, "grad_norm": 0.9673639535903931, "learning_rate": 1e-05, "loss": 0.8595, "step": 100 }, { "epoch": 0.017697026899480887, "grad_norm": 0.5772698521614075, "learning_rate": 1.5e-05, "loss": 0.2998, "step": 150 }, { "epoch": 0.023596035865974516, "grad_norm": 0.4547168016433716, "learning_rate": 2e-05, "loss": 0.2069, "step": 200 }, { "epoch": 0.029495044832468145, "grad_norm": 0.5016191005706787, "learning_rate": 2.5e-05, "loss": 0.1865, "step": 250 }, { "epoch": 0.03539405379896177, "grad_norm": 0.42556145787239075, "learning_rate": 3e-05, "loss": 0.1681, "step": 300 }, { "epoch": 0.041293062765455406, "grad_norm": 0.6279575824737549, "learning_rate": 3.5e-05, "loss": 0.1607, "step": 350 }, { "epoch": 0.04719207173194903, "grad_norm": 0.38945791125297546, "learning_rate": 4e-05, "loss": 0.1476, "step": 400 }, { "epoch": 0.053091080698442664, "grad_norm": 0.36734408140182495, "learning_rate": 4.5e-05, "loss": 0.1369, "step": 450 }, { "epoch": 0.05899008966493629, "grad_norm": 0.4662855565547943, "learning_rate": 5e-05, "loss": 0.1381, "step": 500 }, { "epoch": 0.06488909863142991, "grad_norm": 0.46729612350463867, "learning_rate": 5.500000000000001e-05, "loss": 0.1328, "step": 550 }, { "epoch": 0.07078810759792355, "grad_norm": 0.37650012969970703, "learning_rate": 6e-05, "loss": 0.1256, "step": 600 }, { "epoch": 0.07668711656441718, "grad_norm": 0.7119417190551758, "learning_rate": 6.500000000000001e-05, "loss": 0.1242, "step": 650 }, { "epoch": 0.08258612553091081, "grad_norm": 0.4225035607814789, "learning_rate": 7e-05, "loss": 0.1202, "step": 700 }, { "epoch": 0.08848513449740443, "grad_norm": 0.5111592411994934, "learning_rate": 7.500000000000001e-05, "loss": 0.1151, "step": 750 }, { "epoch": 0.09438414346389806, "grad_norm": 0.41761940717697144, "learning_rate": 8e-05, "loss": 0.106, "step": 800 }, { "epoch": 0.1002831524303917, "grad_norm": 0.29943224787712097, "learning_rate": 8.5e-05, "loss": 0.1129, "step": 850 }, { "epoch": 0.10618216139688533, "grad_norm": 0.3935537338256836, "learning_rate": 9e-05, "loss": 0.1066, "step": 900 }, { "epoch": 0.11208117036337895, "grad_norm": 0.5790311694145203, "learning_rate": 9.5e-05, "loss": 0.11, "step": 950 }, { "epoch": 0.11798017932987258, "grad_norm": 0.4721543788909912, "learning_rate": 0.0001, "loss": 0.1081, "step": 1000 }, { "epoch": 0.11798017932987258, "eval_cer": 0.1351118760757315, "eval_loss": 0.08026490360498428, "eval_runtime": 2.2795, "eval_samples_per_second": 43.87, "eval_steps_per_second": 1.755, "eval_wer": 0.48, "step": 1000 }, { "epoch": 0.12387918829636621, "grad_norm": 0.6494184136390686, "learning_rate": 9.999999654927938e-05, "loss": 0.1001, "step": 1050 }, { "epoch": 0.12977819726285983, "grad_norm": 0.5411050915718079, "learning_rate": 9.999998619711795e-05, "loss": 0.095, "step": 1100 }, { "epoch": 0.13567720622935348, "grad_norm": 0.326639860868454, "learning_rate": 9.999996894351718e-05, "loss": 0.0947, "step": 1150 }, { "epoch": 0.1415762151958471, "grad_norm": 0.3582422733306885, "learning_rate": 9.999994478847943e-05, "loss": 0.0984, "step": 1200 }, { "epoch": 0.14747522416234074, "grad_norm": 0.38673850893974304, "learning_rate": 9.999991373200803e-05, "loss": 0.0956, "step": 1250 }, { "epoch": 0.15337423312883436, "grad_norm": 0.2861815392971039, "learning_rate": 9.999987577410728e-05, "loss": 0.0904, "step": 1300 }, { "epoch": 0.15927324209532798, "grad_norm": 0.3025553822517395, "learning_rate": 9.999983091478242e-05, "loss": 0.0906, "step": 1350 }, { "epoch": 0.16517225106182162, "grad_norm": 0.3377445340156555, "learning_rate": 9.999977915403962e-05, "loss": 0.0876, "step": 1400 }, { "epoch": 0.17107126002831524, "grad_norm": 0.5099515318870544, "learning_rate": 9.999972049188606e-05, "loss": 0.0805, "step": 1450 }, { "epoch": 0.17697026899480886, "grad_norm": 0.34481361508369446, "learning_rate": 9.99996549283298e-05, "loss": 0.0864, "step": 1500 }, { "epoch": 0.1828692779613025, "grad_norm": 0.2774967551231384, "learning_rate": 9.999958246337991e-05, "loss": 0.0867, "step": 1550 }, { "epoch": 0.18876828692779613, "grad_norm": 0.40599194169044495, "learning_rate": 9.999950309704639e-05, "loss": 0.0849, "step": 1600 }, { "epoch": 0.19466729589428977, "grad_norm": 0.30036336183547974, "learning_rate": 9.99994168293402e-05, "loss": 0.0807, "step": 1650 }, { "epoch": 0.2005663048607834, "grad_norm": 0.3344661593437195, "learning_rate": 9.999932366027325e-05, "loss": 0.08, "step": 1700 }, { "epoch": 0.206465313827277, "grad_norm": 0.393759161233902, "learning_rate": 9.999922358985837e-05, "loss": 0.0771, "step": 1750 }, { "epoch": 0.21236432279377065, "grad_norm": 0.22220578789710999, "learning_rate": 9.99991166181094e-05, "loss": 0.0769, "step": 1800 }, { "epoch": 0.21826333176026427, "grad_norm": 0.2425561398267746, "learning_rate": 9.999900274504111e-05, "loss": 0.0757, "step": 1850 }, { "epoch": 0.2241623407267579, "grad_norm": 0.37354230880737305, "learning_rate": 9.99988819706692e-05, "loss": 0.0784, "step": 1900 }, { "epoch": 0.23006134969325154, "grad_norm": 0.27697110176086426, "learning_rate": 9.999875429501034e-05, "loss": 0.076, "step": 1950 }, { "epoch": 0.23596035865974516, "grad_norm": 0.2599647045135498, "learning_rate": 9.999861971808216e-05, "loss": 0.0737, "step": 2000 }, { "epoch": 0.23596035865974516, "eval_cer": 0.11617900172117039, "eval_loss": 0.0890830010175705, "eval_runtime": 2.2035, "eval_samples_per_second": 45.383, "eval_steps_per_second": 1.815, "eval_wer": 0.45, "step": 2000 }, { "epoch": 0.2418593676262388, "grad_norm": 0.23670758306980133, "learning_rate": 9.999847823990325e-05, "loss": 0.0713, "step": 2050 }, { "epoch": 0.24775837659273242, "grad_norm": 0.3982628881931305, "learning_rate": 9.999832986049311e-05, "loss": 0.0771, "step": 2100 }, { "epoch": 0.25365738555922607, "grad_norm": 0.21133634448051453, "learning_rate": 9.999817457987223e-05, "loss": 0.0701, "step": 2150 }, { "epoch": 0.25955639452571966, "grad_norm": 0.2520991563796997, "learning_rate": 9.999801239806208e-05, "loss": 0.0755, "step": 2200 }, { "epoch": 0.2654554034922133, "grad_norm": 0.2903538942337036, "learning_rate": 9.999784331508499e-05, "loss": 0.0709, "step": 2250 }, { "epoch": 0.27135441245870695, "grad_norm": 0.31319159269332886, "learning_rate": 9.999766733096432e-05, "loss": 0.0717, "step": 2300 }, { "epoch": 0.27725342142520054, "grad_norm": 0.6461114287376404, "learning_rate": 9.999748444572437e-05, "loss": 0.0701, "step": 2350 }, { "epoch": 0.2831524303916942, "grad_norm": 0.3441803455352783, "learning_rate": 9.999729465939036e-05, "loss": 0.0729, "step": 2400 }, { "epoch": 0.28905143935818783, "grad_norm": 0.366977721452713, "learning_rate": 9.999709797198851e-05, "loss": 0.0688, "step": 2450 }, { "epoch": 0.2949504483246815, "grad_norm": 0.4182654023170471, "learning_rate": 9.999689438354597e-05, "loss": 0.0655, "step": 2500 }, { "epoch": 0.30084945729117507, "grad_norm": 0.35922497510910034, "learning_rate": 9.999668389409082e-05, "loss": 0.0677, "step": 2550 }, { "epoch": 0.3067484662576687, "grad_norm": 0.5008236765861511, "learning_rate": 9.999646650365214e-05, "loss": 0.063, "step": 2600 }, { "epoch": 0.31264747522416236, "grad_norm": 0.23329763114452362, "learning_rate": 9.999624221225992e-05, "loss": 0.0658, "step": 2650 }, { "epoch": 0.31854648419065595, "grad_norm": 0.2723117172718048, "learning_rate": 9.99960110199451e-05, "loss": 0.0613, "step": 2700 }, { "epoch": 0.3244454931571496, "grad_norm": 0.3102664053440094, "learning_rate": 9.999577292673963e-05, "loss": 0.061, "step": 2750 }, { "epoch": 0.33034450212364325, "grad_norm": 0.2409907579421997, "learning_rate": 9.999552793267634e-05, "loss": 0.0654, "step": 2800 }, { "epoch": 0.33624351109013684, "grad_norm": 0.3023812472820282, "learning_rate": 9.999527603778907e-05, "loss": 0.0655, "step": 2850 }, { "epoch": 0.3421425200566305, "grad_norm": 0.44546541571617126, "learning_rate": 9.999501724211257e-05, "loss": 0.0683, "step": 2900 }, { "epoch": 0.34804152902312413, "grad_norm": 0.3413434326648712, "learning_rate": 9.999475154568258e-05, "loss": 0.0616, "step": 2950 }, { "epoch": 0.3539405379896177, "grad_norm": 0.48283523321151733, "learning_rate": 9.999447894853577e-05, "loss": 0.0641, "step": 3000 }, { "epoch": 0.3539405379896177, "eval_cer": 0.11876075731497418, "eval_loss": 0.07166880369186401, "eval_runtime": 2.1803, "eval_samples_per_second": 45.866, "eval_steps_per_second": 1.835, "eval_wer": 0.43, "step": 3000 }, { "epoch": 0.35983954695611137, "grad_norm": 1.0307611227035522, "learning_rate": 9.999419945070976e-05, "loss": 0.0584, "step": 3050 }, { "epoch": 0.365738555922605, "grad_norm": 0.29957911372184753, "learning_rate": 9.999391305224312e-05, "loss": 0.0631, "step": 3100 }, { "epoch": 0.3716375648890986, "grad_norm": 0.27810344099998474, "learning_rate": 9.99936197531754e-05, "loss": 0.0606, "step": 3150 }, { "epoch": 0.37753657385559225, "grad_norm": 0.2573353946208954, "learning_rate": 9.999331955354708e-05, "loss": 0.0578, "step": 3200 }, { "epoch": 0.3834355828220859, "grad_norm": 0.35335010290145874, "learning_rate": 9.999301245339957e-05, "loss": 0.0603, "step": 3250 }, { "epoch": 0.38933459178857954, "grad_norm": 0.23269380629062653, "learning_rate": 9.99926984527753e-05, "loss": 0.0545, "step": 3300 }, { "epoch": 0.39523360075507313, "grad_norm": 0.26433321833610535, "learning_rate": 9.999237755171759e-05, "loss": 0.0589, "step": 3350 }, { "epoch": 0.4011326097215668, "grad_norm": 0.3034251630306244, "learning_rate": 9.999204975027073e-05, "loss": 0.0569, "step": 3400 }, { "epoch": 0.4070316186880604, "grad_norm": 0.3338973820209503, "learning_rate": 9.999171504847997e-05, "loss": 0.0614, "step": 3450 }, { "epoch": 0.412930627654554, "grad_norm": 0.2721070349216461, "learning_rate": 9.999137344639151e-05, "loss": 0.0622, "step": 3500 }, { "epoch": 0.41882963662104766, "grad_norm": 0.3947318196296692, "learning_rate": 9.99910249440525e-05, "loss": 0.0613, "step": 3550 }, { "epoch": 0.4247286455875413, "grad_norm": 0.29970213770866394, "learning_rate": 9.999066954151104e-05, "loss": 0.0571, "step": 3600 }, { "epoch": 0.4306276545540349, "grad_norm": 0.32868343591690063, "learning_rate": 9.99903072388162e-05, "loss": 0.0565, "step": 3650 }, { "epoch": 0.43652666352052855, "grad_norm": 0.5285988450050354, "learning_rate": 9.998993803601797e-05, "loss": 0.056, "step": 3700 }, { "epoch": 0.4424256724870222, "grad_norm": 0.2713000774383545, "learning_rate": 9.998956193316731e-05, "loss": 0.0525, "step": 3750 }, { "epoch": 0.4483246814535158, "grad_norm": 0.4543825387954712, "learning_rate": 9.998917893031616e-05, "loss": 0.056, "step": 3800 }, { "epoch": 0.45422369042000943, "grad_norm": 0.30238986015319824, "learning_rate": 9.998878902751735e-05, "loss": 0.0529, "step": 3850 }, { "epoch": 0.4601226993865031, "grad_norm": 0.27346181869506836, "learning_rate": 9.998839222482473e-05, "loss": 0.0602, "step": 3900 }, { "epoch": 0.4660217083529967, "grad_norm": 0.3461918532848358, "learning_rate": 9.998798852229302e-05, "loss": 0.0577, "step": 3950 }, { "epoch": 0.4719207173194903, "grad_norm": 0.37500637769699097, "learning_rate": 9.998757791997802e-05, "loss": 0.0516, "step": 4000 }, { "epoch": 0.4719207173194903, "eval_cer": 0.11359724612736662, "eval_loss": 0.041855134069919586, "eval_runtime": 2.1633, "eval_samples_per_second": 46.226, "eval_steps_per_second": 1.849, "eval_wer": 0.42, "step": 4000 }, { "epoch": 0.47781972628598396, "grad_norm": 0.37757042050361633, "learning_rate": 9.998716041793633e-05, "loss": 0.0516, "step": 4050 }, { "epoch": 0.4837187352524776, "grad_norm": 0.3043478727340698, "learning_rate": 9.998673601622563e-05, "loss": 0.0538, "step": 4100 }, { "epoch": 0.4896177442189712, "grad_norm": 0.332469642162323, "learning_rate": 9.998630471490446e-05, "loss": 0.0522, "step": 4150 }, { "epoch": 0.49551675318546484, "grad_norm": 0.21647360920906067, "learning_rate": 9.998586651403239e-05, "loss": 0.0529, "step": 4200 }, { "epoch": 0.5014157621519585, "grad_norm": 0.32291537523269653, "learning_rate": 9.998542141366988e-05, "loss": 0.0552, "step": 4250 }, { "epoch": 0.5073147711184521, "grad_norm": 0.23638127744197845, "learning_rate": 9.998496941387836e-05, "loss": 0.0539, "step": 4300 }, { "epoch": 0.5132137800849458, "grad_norm": 0.9142728447914124, "learning_rate": 9.998451051472026e-05, "loss": 0.053, "step": 4350 }, { "epoch": 0.5191127890514393, "grad_norm": 0.4890826344490051, "learning_rate": 9.998404471625886e-05, "loss": 0.0502, "step": 4400 }, { "epoch": 0.525011798017933, "grad_norm": 0.15029209852218628, "learning_rate": 9.99835720185585e-05, "loss": 0.0513, "step": 4450 }, { "epoch": 0.5309108069844266, "grad_norm": 0.3534867465496063, "learning_rate": 9.998309242168442e-05, "loss": 0.056, "step": 4500 }, { "epoch": 0.5368098159509203, "grad_norm": 0.33083680272102356, "learning_rate": 9.99826059257028e-05, "loss": 0.0565, "step": 4550 }, { "epoch": 0.5427088249174139, "grad_norm": 0.2789449393749237, "learning_rate": 9.99821125306808e-05, "loss": 0.0524, "step": 4600 }, { "epoch": 0.5486078338839075, "grad_norm": 0.2851982116699219, "learning_rate": 9.998161223668652e-05, "loss": 0.0539, "step": 4650 }, { "epoch": 0.5545068428504011, "grad_norm": 0.29760098457336426, "learning_rate": 9.998110504378903e-05, "loss": 0.053, "step": 4700 }, { "epoch": 0.5604058518168947, "grad_norm": 0.3834746778011322, "learning_rate": 9.998059095205831e-05, "loss": 0.0511, "step": 4750 }, { "epoch": 0.5663048607833884, "grad_norm": 0.25649213790893555, "learning_rate": 9.998006996156536e-05, "loss": 0.0508, "step": 4800 }, { "epoch": 0.572203869749882, "grad_norm": 0.3015453517436981, "learning_rate": 9.997954207238204e-05, "loss": 0.0545, "step": 4850 }, { "epoch": 0.5781028787163757, "grad_norm": 0.2435445487499237, "learning_rate": 9.997900728458126e-05, "loss": 0.0476, "step": 4900 }, { "epoch": 0.5840018876828693, "grad_norm": 0.30065733194351196, "learning_rate": 9.99784655982368e-05, "loss": 0.0496, "step": 4950 }, { "epoch": 0.589900896649363, "grad_norm": 0.3495642840862274, "learning_rate": 9.997791701342347e-05, "loss": 0.0515, "step": 5000 }, { "epoch": 0.589900896649363, "eval_cer": 0.10327022375215146, "eval_loss": 0.024764902889728546, "eval_runtime": 2.1847, "eval_samples_per_second": 45.772, "eval_steps_per_second": 1.831, "eval_wer": 0.34, "step": 5000 }, { "epoch": 0.5957999056158565, "grad_norm": 0.2593576908111572, "learning_rate": 9.997736153021695e-05, "loss": 0.0487, "step": 5050 }, { "epoch": 0.6016989145823501, "grad_norm": 0.15432050824165344, "learning_rate": 9.997679914869392e-05, "loss": 0.053, "step": 5100 }, { "epoch": 0.6075979235488438, "grad_norm": 0.24346163868904114, "learning_rate": 9.997622986893203e-05, "loss": 0.0485, "step": 5150 }, { "epoch": 0.6134969325153374, "grad_norm": 0.48757168650627136, "learning_rate": 9.997565369100983e-05, "loss": 0.0536, "step": 5200 }, { "epoch": 0.6193959414818311, "grad_norm": 0.5282140970230103, "learning_rate": 9.997507061500686e-05, "loss": 0.0495, "step": 5250 }, { "epoch": 0.6252949504483247, "grad_norm": 0.22188489139080048, "learning_rate": 9.997448064100362e-05, "loss": 0.0477, "step": 5300 }, { "epoch": 0.6311939594148183, "grad_norm": 0.2522975206375122, "learning_rate": 9.997388376908149e-05, "loss": 0.0471, "step": 5350 }, { "epoch": 0.6370929683813119, "grad_norm": 0.24585746228694916, "learning_rate": 9.997327999932291e-05, "loss": 0.0435, "step": 5400 }, { "epoch": 0.6429919773478056, "grad_norm": 0.29621371626853943, "learning_rate": 9.997266933181119e-05, "loss": 0.047, "step": 5450 }, { "epoch": 0.6488909863142992, "grad_norm": 0.35991647839546204, "learning_rate": 9.997205176663063e-05, "loss": 0.0492, "step": 5500 }, { "epoch": 0.6547899952807928, "grad_norm": 0.2336813360452652, "learning_rate": 9.997142730386647e-05, "loss": 0.0449, "step": 5550 }, { "epoch": 0.6606890042472865, "grad_norm": 0.3465854823589325, "learning_rate": 9.99707959436049e-05, "loss": 0.0473, "step": 5600 }, { "epoch": 0.6665880132137801, "grad_norm": 0.23588506877422333, "learning_rate": 9.997015768593307e-05, "loss": 0.0468, "step": 5650 }, { "epoch": 0.6724870221802737, "grad_norm": 0.28135815262794495, "learning_rate": 9.996951253093907e-05, "loss": 0.0494, "step": 5700 }, { "epoch": 0.6783860311467673, "grad_norm": 0.3338300287723541, "learning_rate": 9.996886047871197e-05, "loss": 0.0501, "step": 5750 }, { "epoch": 0.684285040113261, "grad_norm": 0.37827351689338684, "learning_rate": 9.996820152934176e-05, "loss": 0.0466, "step": 5800 }, { "epoch": 0.6901840490797546, "grad_norm": 0.3812226355075836, "learning_rate": 9.996753568291938e-05, "loss": 0.0504, "step": 5850 }, { "epoch": 0.6960830580462483, "grad_norm": 0.31608110666275024, "learning_rate": 9.996686293953675e-05, "loss": 0.0391, "step": 5900 }, { "epoch": 0.7019820670127419, "grad_norm": 0.2291274219751358, "learning_rate": 9.996618329928673e-05, "loss": 0.0433, "step": 5950 }, { "epoch": 0.7078810759792354, "grad_norm": 0.3444606065750122, "learning_rate": 9.996549676226312e-05, "loss": 0.0446, "step": 6000 }, { "epoch": 0.7078810759792354, "eval_cer": 0.10843373493975904, "eval_loss": 0.02725200355052948, "eval_runtime": 2.2259, "eval_samples_per_second": 44.925, "eval_steps_per_second": 1.797, "eval_wer": 0.39, "step": 6000 }, { "epoch": 0.7137800849457291, "grad_norm": 0.26951658725738525, "learning_rate": 9.99648033285607e-05, "loss": 0.0467, "step": 6050 }, { "epoch": 0.7196790939122227, "grad_norm": 0.216227725148201, "learning_rate": 9.996410299827517e-05, "loss": 0.0455, "step": 6100 }, { "epoch": 0.7255781028787164, "grad_norm": 0.31747373938560486, "learning_rate": 9.99633957715032e-05, "loss": 0.0437, "step": 6150 }, { "epoch": 0.73147711184521, "grad_norm": 0.32416319847106934, "learning_rate": 9.996268164834239e-05, "loss": 0.0465, "step": 6200 }, { "epoch": 0.7373761208117037, "grad_norm": 0.3747732639312744, "learning_rate": 9.996196062889133e-05, "loss": 0.0408, "step": 6250 }, { "epoch": 0.7432751297781972, "grad_norm": 0.24955621361732483, "learning_rate": 9.996123271324955e-05, "loss": 0.0409, "step": 6300 }, { "epoch": 0.7491741387446909, "grad_norm": 0.3525790572166443, "learning_rate": 9.99604979015175e-05, "loss": 0.0435, "step": 6350 }, { "epoch": 0.7550731477111845, "grad_norm": 0.2634985148906708, "learning_rate": 9.995975619379662e-05, "loss": 0.0461, "step": 6400 }, { "epoch": 0.7609721566776781, "grad_norm": 0.47639200091362, "learning_rate": 9.995900759018928e-05, "loss": 0.044, "step": 6450 }, { "epoch": 0.7668711656441718, "grad_norm": 0.31180036067962646, "learning_rate": 9.99582520907988e-05, "loss": 0.0447, "step": 6500 }, { "epoch": 0.7727701746106654, "grad_norm": 0.13355787098407745, "learning_rate": 9.995748969572949e-05, "loss": 0.043, "step": 6550 }, { "epoch": 0.7786691835771591, "grad_norm": 0.18307660520076752, "learning_rate": 9.995672040508655e-05, "loss": 0.0422, "step": 6600 }, { "epoch": 0.7845681925436526, "grad_norm": 0.3828769028186798, "learning_rate": 9.995594421897619e-05, "loss": 0.042, "step": 6650 }, { "epoch": 0.7904672015101463, "grad_norm": 0.40888863801956177, "learning_rate": 9.995516113750553e-05, "loss": 0.0448, "step": 6700 }, { "epoch": 0.7963662104766399, "grad_norm": 0.22636084258556366, "learning_rate": 9.995437116078266e-05, "loss": 0.0435, "step": 6750 }, { "epoch": 0.8022652194431336, "grad_norm": 0.3601085841655731, "learning_rate": 9.995357428891663e-05, "loss": 0.039, "step": 6800 }, { "epoch": 0.8081642284096272, "grad_norm": 0.2372651994228363, "learning_rate": 9.995277052201741e-05, "loss": 0.0451, "step": 6850 }, { "epoch": 0.8140632373761209, "grad_norm": 0.21245214343070984, "learning_rate": 9.995195986019597e-05, "loss": 0.0454, "step": 6900 }, { "epoch": 0.8199622463426144, "grad_norm": 0.2504885792732239, "learning_rate": 9.995114230356419e-05, "loss": 0.0429, "step": 6950 }, { "epoch": 0.825861255309108, "grad_norm": 0.30643174052238464, "learning_rate": 9.995031785223492e-05, "loss": 0.0449, "step": 7000 }, { "epoch": 0.825861255309108, "eval_cer": 0.10327022375215146, "eval_loss": 0.023871298879384995, "eval_runtime": 2.235, "eval_samples_per_second": 44.743, "eval_steps_per_second": 1.79, "eval_wer": 0.37, "step": 7000 }, { "epoch": 0.8317602642756017, "grad_norm": 0.16818273067474365, "learning_rate": 9.994948650632194e-05, "loss": 0.0439, "step": 7050 }, { "epoch": 0.8376592732420953, "grad_norm": 0.3805319666862488, "learning_rate": 9.994864826594002e-05, "loss": 0.0434, "step": 7100 }, { "epoch": 0.843558282208589, "grad_norm": 0.24599145352840424, "learning_rate": 9.994780313120486e-05, "loss": 0.0417, "step": 7150 }, { "epoch": 0.8494572911750826, "grad_norm": 0.22905611991882324, "learning_rate": 9.994695110223311e-05, "loss": 0.044, "step": 7200 }, { "epoch": 0.8553563001415763, "grad_norm": 0.3210940659046173, "learning_rate": 9.994609217914238e-05, "loss": 0.0413, "step": 7250 }, { "epoch": 0.8612553091080698, "grad_norm": 0.24706777930259705, "learning_rate": 9.994522636205122e-05, "loss": 0.0419, "step": 7300 }, { "epoch": 0.8671543180745634, "grad_norm": 0.24649979174137115, "learning_rate": 9.994435365107913e-05, "loss": 0.0391, "step": 7350 }, { "epoch": 0.8730533270410571, "grad_norm": 0.35052457451820374, "learning_rate": 9.994347404634657e-05, "loss": 0.0437, "step": 7400 }, { "epoch": 0.8789523360075507, "grad_norm": 0.2783133387565613, "learning_rate": 9.994258754797497e-05, "loss": 0.0399, "step": 7450 }, { "epoch": 0.8848513449740444, "grad_norm": 0.3220801055431366, "learning_rate": 9.994169415608668e-05, "loss": 0.0367, "step": 7500 }, { "epoch": 0.890750353940538, "grad_norm": 0.5136809945106506, "learning_rate": 9.994079387080501e-05, "loss": 0.0397, "step": 7550 }, { "epoch": 0.8966493629070316, "grad_norm": 0.28629156947135925, "learning_rate": 9.993988669225423e-05, "loss": 0.0399, "step": 7600 }, { "epoch": 0.9025483718735252, "grad_norm": 0.23435695469379425, "learning_rate": 9.993897262055956e-05, "loss": 0.0414, "step": 7650 }, { "epoch": 0.9084473808400189, "grad_norm": 0.1372460573911667, "learning_rate": 9.993805165584715e-05, "loss": 0.0403, "step": 7700 }, { "epoch": 0.9143463898065125, "grad_norm": 0.19384533166885376, "learning_rate": 9.993712379824416e-05, "loss": 0.0399, "step": 7750 }, { "epoch": 0.9202453987730062, "grad_norm": 0.33051830530166626, "learning_rate": 9.993618904787862e-05, "loss": 0.0392, "step": 7800 }, { "epoch": 0.9261444077394998, "grad_norm": 0.2025747001171112, "learning_rate": 9.993524740487957e-05, "loss": 0.0361, "step": 7850 }, { "epoch": 0.9320434167059934, "grad_norm": 0.30955347418785095, "learning_rate": 9.993429886937697e-05, "loss": 0.0344, "step": 7900 }, { "epoch": 0.937942425672487, "grad_norm": 0.34803396463394165, "learning_rate": 9.993334344150177e-05, "loss": 0.04, "step": 7950 }, { "epoch": 0.9438414346389806, "grad_norm": 0.3648673892021179, "learning_rate": 9.993238112138583e-05, "loss": 0.0401, "step": 8000 }, { "epoch": 0.9438414346389806, "eval_cer": 0.10068846815834767, "eval_loss": 0.01909806579351425, "eval_runtime": 2.1815, "eval_samples_per_second": 45.84, "eval_steps_per_second": 1.834, "eval_wer": 0.36, "step": 8000 }, { "epoch": 0.9497404436054743, "grad_norm": 0.27408143877983093, "learning_rate": 9.993141190916197e-05, "loss": 0.0391, "step": 8050 }, { "epoch": 0.9556394525719679, "grad_norm": 0.2739901542663574, "learning_rate": 9.9930435804964e-05, "loss": 0.0394, "step": 8100 }, { "epoch": 0.9615384615384616, "grad_norm": 0.14460420608520508, "learning_rate": 9.992945280892662e-05, "loss": 0.0395, "step": 8150 }, { "epoch": 0.9674374705049552, "grad_norm": 0.32737722992897034, "learning_rate": 9.992846292118554e-05, "loss": 0.0395, "step": 8200 }, { "epoch": 0.9733364794714487, "grad_norm": 0.2795390188694, "learning_rate": 9.992746614187736e-05, "loss": 0.0393, "step": 8250 }, { "epoch": 0.9792354884379424, "grad_norm": 0.340096116065979, "learning_rate": 9.992646247113968e-05, "loss": 0.0406, "step": 8300 }, { "epoch": 0.985134497404436, "grad_norm": 0.21861062943935394, "learning_rate": 9.992545190911105e-05, "loss": 0.0365, "step": 8350 }, { "epoch": 0.9910335063709297, "grad_norm": 0.23980876803398132, "learning_rate": 9.992443445593092e-05, "loss": 0.0377, "step": 8400 }, { "epoch": 0.9969325153374233, "grad_norm": 0.17273221909999847, "learning_rate": 9.992341011173976e-05, "loss": 0.0416, "step": 8450 }, { "epoch": 1.002831524303917, "grad_norm": 0.288761705160141, "learning_rate": 9.992237887667894e-05, "loss": 0.0342, "step": 8500 }, { "epoch": 1.0087305332704106, "grad_norm": 0.2893179953098297, "learning_rate": 9.992134075089084e-05, "loss": 0.0329, "step": 8550 }, { "epoch": 1.0146295422369043, "grad_norm": 0.11127787828445435, "learning_rate": 9.99202957345187e-05, "loss": 0.0345, "step": 8600 }, { "epoch": 1.020528551203398, "grad_norm": 0.18411527574062347, "learning_rate": 9.991924382770678e-05, "loss": 0.0325, "step": 8650 }, { "epoch": 1.0264275601698916, "grad_norm": 0.2529762089252472, "learning_rate": 9.991818503060027e-05, "loss": 0.0369, "step": 8700 }, { "epoch": 1.032326569136385, "grad_norm": 0.32149606943130493, "learning_rate": 9.991711934334533e-05, "loss": 0.0352, "step": 8750 }, { "epoch": 1.0382255781028786, "grad_norm": 0.2691158950328827, "learning_rate": 9.991604676608906e-05, "loss": 0.0365, "step": 8800 }, { "epoch": 1.0441245870693723, "grad_norm": 0.19842961430549622, "learning_rate": 9.991496729897947e-05, "loss": 0.0367, "step": 8850 }, { "epoch": 1.050023596035866, "grad_norm": 0.2741820514202118, "learning_rate": 9.99138809421656e-05, "loss": 0.0366, "step": 8900 }, { "epoch": 1.0559226050023596, "grad_norm": 0.15229485929012299, "learning_rate": 9.991278769579737e-05, "loss": 0.0378, "step": 8950 }, { "epoch": 1.0618216139688532, "grad_norm": 0.23324362933635712, "learning_rate": 9.991168756002569e-05, "loss": 0.0358, "step": 9000 }, { "epoch": 1.0618216139688532, "eval_cer": 0.0981067125645439, "eval_loss": 0.01606176421046257, "eval_runtime": 2.2801, "eval_samples_per_second": 43.857, "eval_steps_per_second": 1.754, "eval_wer": 0.34, "step": 9000 }, { "epoch": 1.0677206229353469, "grad_norm": 0.3811362385749817, "learning_rate": 9.991058053500242e-05, "loss": 0.0318, "step": 9050 }, { "epoch": 1.0736196319018405, "grad_norm": 0.33233338594436646, "learning_rate": 9.990946662088033e-05, "loss": 0.0363, "step": 9100 }, { "epoch": 1.0795186408683342, "grad_norm": 0.3473774790763855, "learning_rate": 9.990834581781321e-05, "loss": 0.0365, "step": 9150 }, { "epoch": 1.0854176498348278, "grad_norm": 0.3394800126552582, "learning_rate": 9.990721812595574e-05, "loss": 0.0351, "step": 9200 }, { "epoch": 1.0913166588013214, "grad_norm": 0.1568611115217209, "learning_rate": 9.990608354546359e-05, "loss": 0.0335, "step": 9250 }, { "epoch": 1.097215667767815, "grad_norm": 0.3291234076023102, "learning_rate": 9.990494207649332e-05, "loss": 0.0345, "step": 9300 }, { "epoch": 1.1031146767343087, "grad_norm": 0.30404916405677795, "learning_rate": 9.990379371920255e-05, "loss": 0.0363, "step": 9350 }, { "epoch": 1.1090136857008022, "grad_norm": 0.33030709624290466, "learning_rate": 9.990263847374976e-05, "loss": 0.033, "step": 9400 }, { "epoch": 1.1149126946672958, "grad_norm": 0.27173009514808655, "learning_rate": 9.990147634029439e-05, "loss": 0.0346, "step": 9450 }, { "epoch": 1.1208117036337895, "grad_norm": 0.707815408706665, "learning_rate": 9.990030731899687e-05, "loss": 0.0382, "step": 9500 }, { "epoch": 1.126710712600283, "grad_norm": 0.3682543635368347, "learning_rate": 9.989913141001854e-05, "loss": 0.0327, "step": 9550 }, { "epoch": 1.1326097215667768, "grad_norm": 0.26110076904296875, "learning_rate": 9.989794861352173e-05, "loss": 0.0345, "step": 9600 }, { "epoch": 1.1385087305332704, "grad_norm": 0.14703848958015442, "learning_rate": 9.98967589296697e-05, "loss": 0.0351, "step": 9650 }, { "epoch": 1.144407739499764, "grad_norm": 0.21733184158802032, "learning_rate": 9.989556235862663e-05, "loss": 0.0333, "step": 9700 }, { "epoch": 1.1503067484662577, "grad_norm": 0.13245633244514465, "learning_rate": 9.989435890055771e-05, "loss": 0.0318, "step": 9750 }, { "epoch": 1.1562057574327513, "grad_norm": 0.2690601944923401, "learning_rate": 9.989314855562906e-05, "loss": 0.0309, "step": 9800 }, { "epoch": 1.162104766399245, "grad_norm": 0.3457838296890259, "learning_rate": 9.989193132400771e-05, "loss": 0.033, "step": 9850 }, { "epoch": 1.1680037753657386, "grad_norm": 0.20404575765132904, "learning_rate": 9.989070720586168e-05, "loss": 0.0324, "step": 9900 }, { "epoch": 1.1739027843322323, "grad_norm": 0.30778729915618896, "learning_rate": 9.988947620135995e-05, "loss": 0.0345, "step": 9950 }, { "epoch": 1.1798017932987257, "grad_norm": 0.763800323009491, "learning_rate": 9.988823831067244e-05, "loss": 0.0322, "step": 10000 }, { "epoch": 1.1798017932987257, "eval_cer": 0.09896729776247848, "eval_loss": 0.019596915692090988, "eval_runtime": 2.2855, "eval_samples_per_second": 43.753, "eval_steps_per_second": 1.75, "eval_wer": 0.33, "step": 10000 }, { "epoch": 1.1857008022652193, "grad_norm": 0.23348116874694824, "learning_rate": 9.988699353396999e-05, "loss": 0.0314, "step": 10050 }, { "epoch": 1.191599811231713, "grad_norm": 0.1548302173614502, "learning_rate": 9.988574187142444e-05, "loss": 0.0326, "step": 10100 }, { "epoch": 1.1974988201982066, "grad_norm": 0.13651826977729797, "learning_rate": 9.988448332320852e-05, "loss": 0.0328, "step": 10150 }, { "epoch": 1.2033978291647003, "grad_norm": 0.2875508964061737, "learning_rate": 9.988321788949598e-05, "loss": 0.032, "step": 10200 }, { "epoch": 1.209296838131194, "grad_norm": 0.207173153758049, "learning_rate": 9.988194557046146e-05, "loss": 0.0368, "step": 10250 }, { "epoch": 1.2151958470976876, "grad_norm": 0.3794412612915039, "learning_rate": 9.988066636628062e-05, "loss": 0.0319, "step": 10300 }, { "epoch": 1.2210948560641812, "grad_norm": 0.27503910660743713, "learning_rate": 9.987938027712997e-05, "loss": 0.0328, "step": 10350 }, { "epoch": 1.2269938650306749, "grad_norm": 0.34333643317222595, "learning_rate": 9.987808730318708e-05, "loss": 0.0366, "step": 10400 }, { "epoch": 1.2328928739971685, "grad_norm": 0.20849387347698212, "learning_rate": 9.987678744463038e-05, "loss": 0.0356, "step": 10450 }, { "epoch": 1.2387918829636622, "grad_norm": 0.25269147753715515, "learning_rate": 9.98754807016393e-05, "loss": 0.0326, "step": 10500 }, { "epoch": 1.2446908919301558, "grad_norm": 0.2711004316806793, "learning_rate": 9.987416707439422e-05, "loss": 0.0334, "step": 10550 }, { "epoch": 1.2505899008966495, "grad_norm": 0.21813423931598663, "learning_rate": 9.987284656307644e-05, "loss": 0.0335, "step": 10600 }, { "epoch": 1.256488909863143, "grad_norm": 0.26618748903274536, "learning_rate": 9.987151916786823e-05, "loss": 0.0307, "step": 10650 }, { "epoch": 1.2623879188296367, "grad_norm": 0.21977819502353668, "learning_rate": 9.987018488895284e-05, "loss": 0.0361, "step": 10700 }, { "epoch": 1.2682869277961302, "grad_norm": 0.08298060297966003, "learning_rate": 9.98688437265144e-05, "loss": 0.0341, "step": 10750 }, { "epoch": 1.2741859367626238, "grad_norm": 0.1592985838651657, "learning_rate": 9.986749568073803e-05, "loss": 0.0348, "step": 10800 }, { "epoch": 1.2800849457291175, "grad_norm": 0.2745717763900757, "learning_rate": 9.986614075180982e-05, "loss": 0.0319, "step": 10850 }, { "epoch": 1.285983954695611, "grad_norm": 0.33985647559165955, "learning_rate": 9.986477893991679e-05, "loss": 0.0315, "step": 10900 }, { "epoch": 1.2918829636621048, "grad_norm": 0.43426215648651123, "learning_rate": 9.986341024524689e-05, "loss": 0.0328, "step": 10950 }, { "epoch": 1.2977819726285984, "grad_norm": 0.28979501128196716, "learning_rate": 9.986203466798906e-05, "loss": 0.032, "step": 11000 }, { "epoch": 1.2977819726285984, "eval_cer": 0.09208261617900172, "eval_loss": 0.013813009485602379, "eval_runtime": 2.2053, "eval_samples_per_second": 45.346, "eval_steps_per_second": 1.814, "eval_wer": 0.32, "step": 11000 }, { "epoch": 1.303680981595092, "grad_norm": 0.4027746319770813, "learning_rate": 9.986065220833317e-05, "loss": 0.0288, "step": 11050 }, { "epoch": 1.3095799905615857, "grad_norm": 0.19136713445186615, "learning_rate": 9.985926286647e-05, "loss": 0.0293, "step": 11100 }, { "epoch": 1.3154789995280793, "grad_norm": 0.23453480005264282, "learning_rate": 9.985786664259135e-05, "loss": 0.0347, "step": 11150 }, { "epoch": 1.321378008494573, "grad_norm": 0.401072233915329, "learning_rate": 9.985646353688996e-05, "loss": 0.0332, "step": 11200 }, { "epoch": 1.3272770174610664, "grad_norm": 0.19985853135585785, "learning_rate": 9.985505354955947e-05, "loss": 0.0336, "step": 11250 }, { "epoch": 1.33317602642756, "grad_norm": 0.28821101784706116, "learning_rate": 9.98536366807945e-05, "loss": 0.0335, "step": 11300 }, { "epoch": 1.3390750353940537, "grad_norm": 0.14771975576877594, "learning_rate": 9.985221293079064e-05, "loss": 0.0297, "step": 11350 }, { "epoch": 1.3449740443605473, "grad_norm": 0.3232012689113617, "learning_rate": 9.985078229974437e-05, "loss": 0.0321, "step": 11400 }, { "epoch": 1.350873053327041, "grad_norm": 0.18989653885364532, "learning_rate": 9.984934478785319e-05, "loss": 0.0303, "step": 11450 }, { "epoch": 1.3567720622935346, "grad_norm": 0.2724584639072418, "learning_rate": 9.984790039531551e-05, "loss": 0.0347, "step": 11500 }, { "epoch": 1.3626710712600283, "grad_norm": 0.3047110140323639, "learning_rate": 9.984644912233068e-05, "loss": 0.0318, "step": 11550 }, { "epoch": 1.368570080226522, "grad_norm": 0.6176072359085083, "learning_rate": 9.984499096909904e-05, "loss": 0.0347, "step": 11600 }, { "epoch": 1.3744690891930156, "grad_norm": 0.27084144949913025, "learning_rate": 9.984352593582188e-05, "loss": 0.029, "step": 11650 }, { "epoch": 1.3803680981595092, "grad_norm": 0.32600638270378113, "learning_rate": 9.984205402270138e-05, "loss": 0.0296, "step": 11700 }, { "epoch": 1.3862671071260029, "grad_norm": 0.2904922068119049, "learning_rate": 9.984057522994069e-05, "loss": 0.0287, "step": 11750 }, { "epoch": 1.3921661160924965, "grad_norm": 0.18054021894931793, "learning_rate": 9.983908955774397e-05, "loss": 0.0286, "step": 11800 }, { "epoch": 1.3980651250589902, "grad_norm": 0.14627861976623535, "learning_rate": 9.983759700631625e-05, "loss": 0.0306, "step": 11850 }, { "epoch": 1.4039641340254838, "grad_norm": 0.23287317156791687, "learning_rate": 9.983609757586358e-05, "loss": 0.0322, "step": 11900 }, { "epoch": 1.4098631429919775, "grad_norm": 0.20879879593849182, "learning_rate": 9.983459126659289e-05, "loss": 0.0306, "step": 11950 }, { "epoch": 1.415762151958471, "grad_norm": 0.2134602814912796, "learning_rate": 9.983307807871212e-05, "loss": 0.0311, "step": 12000 }, { "epoch": 1.415762151958471, "eval_cer": 0.09208261617900172, "eval_loss": 0.01577083393931389, "eval_runtime": 2.1849, "eval_samples_per_second": 45.768, "eval_steps_per_second": 1.831, "eval_wer": 0.31, "step": 12000 }, { "epoch": 1.4216611609249645, "grad_norm": 0.22136972844600677, "learning_rate": 9.983155801243013e-05, "loss": 0.0288, "step": 12050 }, { "epoch": 1.4275601698914582, "grad_norm": 0.2627451717853546, "learning_rate": 9.983003106795672e-05, "loss": 0.0302, "step": 12100 }, { "epoch": 1.4334591788579518, "grad_norm": 0.21031834185123444, "learning_rate": 9.982849724550265e-05, "loss": 0.0295, "step": 12150 }, { "epoch": 1.4393581878244455, "grad_norm": 0.19847562909126282, "learning_rate": 9.982695654527965e-05, "loss": 0.029, "step": 12200 }, { "epoch": 1.4452571967909391, "grad_norm": 0.25535327196121216, "learning_rate": 9.982540896750037e-05, "loss": 0.0303, "step": 12250 }, { "epoch": 1.4511562057574328, "grad_norm": 0.2538614273071289, "learning_rate": 9.982385451237843e-05, "loss": 0.0287, "step": 12300 }, { "epoch": 1.4570552147239264, "grad_norm": 0.2381352186203003, "learning_rate": 9.982229318012836e-05, "loss": 0.0306, "step": 12350 }, { "epoch": 1.46295422369042, "grad_norm": 0.21333597600460052, "learning_rate": 9.982072497096571e-05, "loss": 0.0292, "step": 12400 }, { "epoch": 1.4688532326569137, "grad_norm": 0.2663681209087372, "learning_rate": 9.981914988510692e-05, "loss": 0.0296, "step": 12450 }, { "epoch": 1.4747522416234073, "grad_norm": 0.20956964790821075, "learning_rate": 9.981756792276938e-05, "loss": 0.0323, "step": 12500 }, { "epoch": 1.4806512505899008, "grad_norm": 0.22547583281993866, "learning_rate": 9.981597908417148e-05, "loss": 0.0312, "step": 12550 }, { "epoch": 1.4865502595563944, "grad_norm": 0.2970660626888275, "learning_rate": 9.98143833695325e-05, "loss": 0.031, "step": 12600 }, { "epoch": 1.492449268522888, "grad_norm": 0.23790109157562256, "learning_rate": 9.98127807790727e-05, "loss": 0.0292, "step": 12650 }, { "epoch": 1.4983482774893817, "grad_norm": 0.27162548899650574, "learning_rate": 9.981117131301329e-05, "loss": 0.0303, "step": 12700 }, { "epoch": 1.5042472864558754, "grad_norm": 0.20036889612674713, "learning_rate": 9.980955497157642e-05, "loss": 0.0305, "step": 12750 }, { "epoch": 1.510146295422369, "grad_norm": 0.3152737617492676, "learning_rate": 9.980793175498517e-05, "loss": 0.0304, "step": 12800 }, { "epoch": 1.5160453043888626, "grad_norm": 0.3746805489063263, "learning_rate": 9.980630166346363e-05, "loss": 0.0283, "step": 12850 }, { "epoch": 1.5219443133553563, "grad_norm": 0.15511943399906158, "learning_rate": 9.98046646972368e-05, "loss": 0.0332, "step": 12900 }, { "epoch": 1.52784332232185, "grad_norm": 0.21905876696109772, "learning_rate": 9.980302085653057e-05, "loss": 0.029, "step": 12950 }, { "epoch": 1.5337423312883436, "grad_norm": 0.17221002280712128, "learning_rate": 9.98013701415719e-05, "loss": 0.0271, "step": 13000 }, { "epoch": 1.5337423312883436, "eval_cer": 0.09208261617900172, "eval_loss": 0.009780151769518852, "eval_runtime": 2.1734, "eval_samples_per_second": 46.01, "eval_steps_per_second": 1.84, "eval_wer": 0.29, "step": 13000 }, { "epoch": 1.5396413402548372, "grad_norm": 0.2333035171031952, "learning_rate": 9.979971255258861e-05, "loss": 0.0308, "step": 13050 }, { "epoch": 1.5455403492213309, "grad_norm": 0.47443726658821106, "learning_rate": 9.979804808980949e-05, "loss": 0.03, "step": 13100 }, { "epoch": 1.5514393581878245, "grad_norm": 0.24127380549907684, "learning_rate": 9.97963767534643e-05, "loss": 0.029, "step": 13150 }, { "epoch": 1.5573383671543182, "grad_norm": 0.42585575580596924, "learning_rate": 9.979469854378373e-05, "loss": 0.0296, "step": 13200 }, { "epoch": 1.5632373761208118, "grad_norm": 0.20392440259456635, "learning_rate": 9.97930134609994e-05, "loss": 0.0284, "step": 13250 }, { "epoch": 1.5691363850873055, "grad_norm": 0.20332570374011993, "learning_rate": 9.979132150534392e-05, "loss": 0.0273, "step": 13300 }, { "epoch": 1.575035394053799, "grad_norm": 0.16174079477787018, "learning_rate": 9.978962267705083e-05, "loss": 0.0275, "step": 13350 }, { "epoch": 1.5809344030202928, "grad_norm": 0.11664371937513351, "learning_rate": 9.978791697635461e-05, "loss": 0.0305, "step": 13400 }, { "epoch": 1.5868334119867862, "grad_norm": 0.2521206736564636, "learning_rate": 9.978620440349071e-05, "loss": 0.0285, "step": 13450 }, { "epoch": 1.5927324209532798, "grad_norm": 0.3063543736934662, "learning_rate": 9.978448495869548e-05, "loss": 0.028, "step": 13500 }, { "epoch": 1.5986314299197735, "grad_norm": 0.30261752009391785, "learning_rate": 9.97827586422063e-05, "loss": 0.027, "step": 13550 }, { "epoch": 1.6045304388862671, "grad_norm": 0.3109934628009796, "learning_rate": 9.97810254542614e-05, "loss": 0.0304, "step": 13600 }, { "epoch": 1.6104294478527608, "grad_norm": 0.19108925759792328, "learning_rate": 9.977928539510006e-05, "loss": 0.0306, "step": 13650 }, { "epoch": 1.6163284568192544, "grad_norm": 0.10402306169271469, "learning_rate": 9.97775384649624e-05, "loss": 0.0276, "step": 13700 }, { "epoch": 1.6222274657857478, "grad_norm": 0.18916277587413788, "learning_rate": 9.977578466408962e-05, "loss": 0.0258, "step": 13750 }, { "epoch": 1.6281264747522415, "grad_norm": 0.23477470874786377, "learning_rate": 9.977402399272373e-05, "loss": 0.0268, "step": 13800 }, { "epoch": 1.6340254837187351, "grad_norm": 0.16325120627880096, "learning_rate": 9.977225645110779e-05, "loss": 0.0299, "step": 13850 }, { "epoch": 1.6399244926852288, "grad_norm": 0.3316574692726135, "learning_rate": 9.977048203948576e-05, "loss": 0.0298, "step": 13900 }, { "epoch": 1.6458235016517224, "grad_norm": 0.1764078587293625, "learning_rate": 9.976870075810257e-05, "loss": 0.0274, "step": 13950 }, { "epoch": 1.651722510618216, "grad_norm": 0.34297141432762146, "learning_rate": 9.976691260720407e-05, "loss": 0.0275, "step": 14000 }, { "epoch": 1.651722510618216, "eval_cer": 0.0963855421686747, "eval_loss": 0.01401150319725275, "eval_runtime": 2.2128, "eval_samples_per_second": 45.191, "eval_steps_per_second": 1.808, "eval_wer": 0.32, "step": 14000 }, { "epoch": 1.6576215195847097, "grad_norm": 0.15491081774234772, "learning_rate": 9.976511758703709e-05, "loss": 0.0311, "step": 14050 }, { "epoch": 1.6635205285512034, "grad_norm": 0.2914509177207947, "learning_rate": 9.976331569784939e-05, "loss": 0.0287, "step": 14100 }, { "epoch": 1.669419537517697, "grad_norm": 0.14534637331962585, "learning_rate": 9.976150693988968e-05, "loss": 0.0257, "step": 14150 }, { "epoch": 1.6753185464841907, "grad_norm": 0.4253580570220947, "learning_rate": 9.975969131340763e-05, "loss": 0.0313, "step": 14200 }, { "epoch": 1.6812175554506843, "grad_norm": 0.2504117786884308, "learning_rate": 9.975786881865385e-05, "loss": 0.0276, "step": 14250 }, { "epoch": 1.687116564417178, "grad_norm": 0.2849898636341095, "learning_rate": 9.975603945587987e-05, "loss": 0.0274, "step": 14300 }, { "epoch": 1.6930155733836716, "grad_norm": 0.20301619172096252, "learning_rate": 9.975420322533822e-05, "loss": 0.0269, "step": 14350 }, { "epoch": 1.6989145823501652, "grad_norm": 0.284266859292984, "learning_rate": 9.975236012728235e-05, "loss": 0.027, "step": 14400 }, { "epoch": 1.7048135913166589, "grad_norm": 0.2545609474182129, "learning_rate": 9.975051016196667e-05, "loss": 0.0312, "step": 14450 }, { "epoch": 1.7107126002831525, "grad_norm": 0.29602906107902527, "learning_rate": 9.97486533296465e-05, "loss": 0.0256, "step": 14500 }, { "epoch": 1.7166116092496462, "grad_norm": 0.3529340624809265, "learning_rate": 9.974678963057816e-05, "loss": 0.0267, "step": 14550 }, { "epoch": 1.7225106182161398, "grad_norm": 0.392654687166214, "learning_rate": 9.974491906501887e-05, "loss": 0.0309, "step": 14600 }, { "epoch": 1.7284096271826335, "grad_norm": 0.2909203767776489, "learning_rate": 9.974304163322685e-05, "loss": 0.0292, "step": 14650 }, { "epoch": 1.734308636149127, "grad_norm": 0.23058158159255981, "learning_rate": 9.974115733546123e-05, "loss": 0.0317, "step": 14700 }, { "epoch": 1.7402076451156205, "grad_norm": 0.3606225252151489, "learning_rate": 9.973926617198211e-05, "loss": 0.0307, "step": 14750 }, { "epoch": 1.7461066540821142, "grad_norm": 0.26749250292778015, "learning_rate": 9.973736814305049e-05, "loss": 0.0271, "step": 14800 }, { "epoch": 1.7520056630486078, "grad_norm": 1.0425125360488892, "learning_rate": 9.973546324892837e-05, "loss": 0.0289, "step": 14850 }, { "epoch": 1.7579046720151015, "grad_norm": 0.16870065033435822, "learning_rate": 9.973355148987871e-05, "loss": 0.0267, "step": 14900 }, { "epoch": 1.7638036809815951, "grad_norm": 0.19599223136901855, "learning_rate": 9.973163286616534e-05, "loss": 0.0288, "step": 14950 }, { "epoch": 1.7697026899480888, "grad_norm": 0.1774931252002716, "learning_rate": 9.972970737805311e-05, "loss": 0.03, "step": 15000 }, { "epoch": 1.7697026899480888, "eval_cer": 0.09552495697074011, "eval_loss": 0.014414637349545956, "eval_runtime": 2.1841, "eval_samples_per_second": 45.786, "eval_steps_per_second": 1.831, "eval_wer": 0.32, "step": 15000 }, { "epoch": 1.7756016989145822, "grad_norm": 0.22810228168964386, "learning_rate": 9.97277750258078e-05, "loss": 0.0266, "step": 15050 }, { "epoch": 1.7815007078810758, "grad_norm": 0.10548947006464005, "learning_rate": 9.972583580969612e-05, "loss": 0.0267, "step": 15100 }, { "epoch": 1.7873997168475695, "grad_norm": 0.18184469640254974, "learning_rate": 9.972388972998573e-05, "loss": 0.0274, "step": 15150 }, { "epoch": 1.7932987258140631, "grad_norm": 0.1906701922416687, "learning_rate": 9.972193678694526e-05, "loss": 0.0306, "step": 15200 }, { "epoch": 1.7991977347805568, "grad_norm": 0.4813360571861267, "learning_rate": 9.971997698084426e-05, "loss": 0.0278, "step": 15250 }, { "epoch": 1.8050967437470504, "grad_norm": 0.17697583138942719, "learning_rate": 9.971801031195325e-05, "loss": 0.0265, "step": 15300 }, { "epoch": 1.810995752713544, "grad_norm": 0.2601419985294342, "learning_rate": 9.971603678054366e-05, "loss": 0.0274, "step": 15350 }, { "epoch": 1.8168947616800377, "grad_norm": 0.2494155466556549, "learning_rate": 9.971405638688794e-05, "loss": 0.0289, "step": 15400 }, { "epoch": 1.8227937706465314, "grad_norm": 0.21668747067451477, "learning_rate": 9.971206913125942e-05, "loss": 0.0267, "step": 15450 }, { "epoch": 1.828692779613025, "grad_norm": 0.17656803131103516, "learning_rate": 9.97100750139324e-05, "loss": 0.0276, "step": 15500 }, { "epoch": 1.8345917885795187, "grad_norm": 0.22041048109531403, "learning_rate": 9.97080740351821e-05, "loss": 0.0258, "step": 15550 }, { "epoch": 1.8404907975460123, "grad_norm": 0.222555473446846, "learning_rate": 9.970606619528475e-05, "loss": 0.0279, "step": 15600 }, { "epoch": 1.846389806512506, "grad_norm": 0.2823334336280823, "learning_rate": 9.970405149451746e-05, "loss": 0.0264, "step": 15650 }, { "epoch": 1.8522888154789996, "grad_norm": 0.2626365125179291, "learning_rate": 9.970202993315836e-05, "loss": 0.0284, "step": 15700 }, { "epoch": 1.8581878244454932, "grad_norm": 0.2865154445171356, "learning_rate": 9.970000151148644e-05, "loss": 0.0276, "step": 15750 }, { "epoch": 1.8640868334119869, "grad_norm": 0.2892071008682251, "learning_rate": 9.96979662297817e-05, "loss": 0.0292, "step": 15800 }, { "epoch": 1.8699858423784805, "grad_norm": 0.17935973405838013, "learning_rate": 9.969592408832504e-05, "loss": 0.0251, "step": 15850 }, { "epoch": 1.8758848513449742, "grad_norm": 0.2128317505121231, "learning_rate": 9.969387508739839e-05, "loss": 0.0264, "step": 15900 }, { "epoch": 1.8817838603114678, "grad_norm": 0.24335935711860657, "learning_rate": 9.969181922728453e-05, "loss": 0.0259, "step": 15950 }, { "epoch": 1.8876828692779613, "grad_norm": 0.32472994923591614, "learning_rate": 9.968975650826722e-05, "loss": 0.0258, "step": 16000 }, { "epoch": 1.8876828692779613, "eval_cer": 0.09208261617900172, "eval_loss": 0.013121074065566063, "eval_runtime": 2.2228, "eval_samples_per_second": 44.987, "eval_steps_per_second": 1.799, "eval_wer": 0.3, "step": 16000 }, { "epoch": 1.893581878244455, "grad_norm": 0.2535249590873718, "learning_rate": 9.96876869306312e-05, "loss": 0.0283, "step": 16050 }, { "epoch": 1.8994808872109485, "grad_norm": 0.30313000082969666, "learning_rate": 9.968561049466214e-05, "loss": 0.0285, "step": 16100 }, { "epoch": 1.9053798961774422, "grad_norm": 0.15397249162197113, "learning_rate": 9.968352720064662e-05, "loss": 0.0244, "step": 16150 }, { "epoch": 1.9112789051439358, "grad_norm": 0.3959738612174988, "learning_rate": 9.96814370488722e-05, "loss": 0.0276, "step": 16200 }, { "epoch": 1.9171779141104295, "grad_norm": 0.1830800324678421, "learning_rate": 9.967934003962739e-05, "loss": 0.0289, "step": 16250 }, { "epoch": 1.9230769230769231, "grad_norm": 0.16603486239910126, "learning_rate": 9.967723617320164e-05, "loss": 0.0251, "step": 16300 }, { "epoch": 1.9289759320434166, "grad_norm": 0.2711159586906433, "learning_rate": 9.967512544988533e-05, "loss": 0.0268, "step": 16350 }, { "epoch": 1.9348749410099102, "grad_norm": 0.3144698143005371, "learning_rate": 9.96730078699698e-05, "loss": 0.0235, "step": 16400 }, { "epoch": 1.9407739499764038, "grad_norm": 0.18914014101028442, "learning_rate": 9.967088343374738e-05, "loss": 0.0264, "step": 16450 }, { "epoch": 1.9466729589428975, "grad_norm": 0.2660939693450928, "learning_rate": 9.966875214151123e-05, "loss": 0.0263, "step": 16500 }, { "epoch": 1.9525719679093911, "grad_norm": 0.19323930144309998, "learning_rate": 9.96666139935556e-05, "loss": 0.025, "step": 16550 }, { "epoch": 1.9584709768758848, "grad_norm": 0.3013906478881836, "learning_rate": 9.966446899017558e-05, "loss": 0.0257, "step": 16600 }, { "epoch": 1.9643699858423784, "grad_norm": 0.29564106464385986, "learning_rate": 9.966231713166725e-05, "loss": 0.025, "step": 16650 }, { "epoch": 1.970268994808872, "grad_norm": 0.19747748970985413, "learning_rate": 9.966015841832761e-05, "loss": 0.027, "step": 16700 }, { "epoch": 1.9761680037753657, "grad_norm": 0.2750703692436218, "learning_rate": 9.965799285045465e-05, "loss": 0.0227, "step": 16750 }, { "epoch": 1.9820670127418594, "grad_norm": 0.24231897294521332, "learning_rate": 9.965582042834728e-05, "loss": 0.0265, "step": 16800 }, { "epoch": 1.987966021708353, "grad_norm": 0.27594277262687683, "learning_rate": 9.965364115230535e-05, "loss": 0.0271, "step": 16850 }, { "epoch": 1.9938650306748467, "grad_norm": 0.17951230704784393, "learning_rate": 9.965145502262965e-05, "loss": 0.0262, "step": 16900 }, { "epoch": 1.9997640396413403, "grad_norm": 0.1879325658082962, "learning_rate": 9.964926203962194e-05, "loss": 0.0247, "step": 16950 }, { "epoch": 2.005663048607834, "grad_norm": 0.347006231546402, "learning_rate": 9.964706220358492e-05, "loss": 0.0236, "step": 17000 }, { "epoch": 2.005663048607834, "eval_cer": 0.09552495697074011, "eval_loss": 0.009769821539521217, "eval_runtime": 2.2345, "eval_samples_per_second": 44.752, "eval_steps_per_second": 1.79, "eval_wer": 0.31, "step": 17000 }, { "epoch": 2.0115620575743276, "grad_norm": 0.29753196239471436, "learning_rate": 9.964485551482223e-05, "loss": 0.0232, "step": 17050 }, { "epoch": 2.0174610665408212, "grad_norm": 0.28065335750579834, "learning_rate": 9.964264197363845e-05, "loss": 0.0239, "step": 17100 }, { "epoch": 2.023360075507315, "grad_norm": 0.37776315212249756, "learning_rate": 9.964042158033912e-05, "loss": 0.0233, "step": 17150 }, { "epoch": 2.0292590844738085, "grad_norm": 0.08242235332727432, "learning_rate": 9.963819433523071e-05, "loss": 0.0231, "step": 17200 }, { "epoch": 2.035158093440302, "grad_norm": 0.12194682657718658, "learning_rate": 9.963596023862065e-05, "loss": 0.0225, "step": 17250 }, { "epoch": 2.041057102406796, "grad_norm": 0.09892211109399796, "learning_rate": 9.963371929081731e-05, "loss": 0.0225, "step": 17300 }, { "epoch": 2.0469561113732895, "grad_norm": 0.2097199261188507, "learning_rate": 9.963147149212999e-05, "loss": 0.0225, "step": 17350 }, { "epoch": 2.052855120339783, "grad_norm": 0.2907405197620392, "learning_rate": 9.962921684286897e-05, "loss": 0.0237, "step": 17400 }, { "epoch": 2.0587541293062763, "grad_norm": 0.17295144498348236, "learning_rate": 9.962695534334545e-05, "loss": 0.023, "step": 17450 }, { "epoch": 2.06465313827277, "grad_norm": 0.18815067410469055, "learning_rate": 9.96246869938716e-05, "loss": 0.0224, "step": 17500 }, { "epoch": 2.0705521472392636, "grad_norm": 0.36763620376586914, "learning_rate": 9.962241179476048e-05, "loss": 0.0221, "step": 17550 }, { "epoch": 2.0764511562057573, "grad_norm": 0.24360449612140656, "learning_rate": 9.962012974632615e-05, "loss": 0.0238, "step": 17600 }, { "epoch": 2.082350165172251, "grad_norm": 0.2514205574989319, "learning_rate": 9.961784084888361e-05, "loss": 0.0259, "step": 17650 }, { "epoch": 2.0882491741387446, "grad_norm": 0.2514944076538086, "learning_rate": 9.96155451027488e-05, "loss": 0.0225, "step": 17700 }, { "epoch": 2.094148183105238, "grad_norm": 0.20785681903362274, "learning_rate": 9.961324250823857e-05, "loss": 0.0228, "step": 17750 }, { "epoch": 2.100047192071732, "grad_norm": 0.15624016523361206, "learning_rate": 9.961093306567075e-05, "loss": 0.025, "step": 17800 }, { "epoch": 2.1059462010382255, "grad_norm": 0.17732572555541992, "learning_rate": 9.960861677536414e-05, "loss": 0.026, "step": 17850 }, { "epoch": 2.111845210004719, "grad_norm": 0.18303453922271729, "learning_rate": 9.960629363763842e-05, "loss": 0.0208, "step": 17900 }, { "epoch": 2.117744218971213, "grad_norm": 0.24401551485061646, "learning_rate": 9.960396365281427e-05, "loss": 0.0225, "step": 17950 }, { "epoch": 2.1236432279377064, "grad_norm": 0.18319082260131836, "learning_rate": 9.960162682121327e-05, "loss": 0.0242, "step": 18000 }, { "epoch": 2.1236432279377064, "eval_cer": 0.09208261617900172, "eval_loss": 0.011148546822369099, "eval_runtime": 2.2493, "eval_samples_per_second": 44.459, "eval_steps_per_second": 1.778, "eval_wer": 0.3, "step": 18000 }, { "epoch": 2.1295422369042, "grad_norm": 0.2401268631219864, "learning_rate": 9.959928314315803e-05, "loss": 0.0238, "step": 18050 }, { "epoch": 2.1354412458706937, "grad_norm": 0.14701007306575775, "learning_rate": 9.959693261897197e-05, "loss": 0.0231, "step": 18100 }, { "epoch": 2.1413402548371874, "grad_norm": 0.3174618184566498, "learning_rate": 9.959457524897958e-05, "loss": 0.0219, "step": 18150 }, { "epoch": 2.147239263803681, "grad_norm": 0.19903814792633057, "learning_rate": 9.959221103350623e-05, "loss": 0.0233, "step": 18200 }, { "epoch": 2.1531382727701747, "grad_norm": 0.1704159826040268, "learning_rate": 9.958983997287822e-05, "loss": 0.0239, "step": 18250 }, { "epoch": 2.1590372817366683, "grad_norm": 0.3617122173309326, "learning_rate": 9.958746206742289e-05, "loss": 0.0269, "step": 18300 }, { "epoch": 2.164936290703162, "grad_norm": 0.3484322428703308, "learning_rate": 9.95850773174684e-05, "loss": 0.0255, "step": 18350 }, { "epoch": 2.1708352996696556, "grad_norm": 0.2394183874130249, "learning_rate": 9.958268572334395e-05, "loss": 0.0221, "step": 18400 }, { "epoch": 2.1767343086361493, "grad_norm": 0.24473364651203156, "learning_rate": 9.958028728537963e-05, "loss": 0.0247, "step": 18450 }, { "epoch": 2.182633317602643, "grad_norm": 0.435761958360672, "learning_rate": 9.95778820039065e-05, "loss": 0.0221, "step": 18500 }, { "epoch": 2.1885323265691365, "grad_norm": 0.352704793214798, "learning_rate": 9.957546987925656e-05, "loss": 0.023, "step": 18550 }, { "epoch": 2.19443133553563, "grad_norm": 0.47879576683044434, "learning_rate": 9.957305091176274e-05, "loss": 0.025, "step": 18600 }, { "epoch": 2.200330344502124, "grad_norm": 0.14422573149204254, "learning_rate": 9.957062510175897e-05, "loss": 0.0248, "step": 18650 }, { "epoch": 2.2062293534686175, "grad_norm": 0.13963888585567474, "learning_rate": 9.956819244958003e-05, "loss": 0.0249, "step": 18700 }, { "epoch": 2.212128362435111, "grad_norm": 0.20249466598033905, "learning_rate": 9.95657529555617e-05, "loss": 0.0237, "step": 18750 }, { "epoch": 2.2180273714016043, "grad_norm": 0.3244359493255615, "learning_rate": 9.956330662004074e-05, "loss": 0.0225, "step": 18800 }, { "epoch": 2.223926380368098, "grad_norm": 0.20730607211589813, "learning_rate": 9.956085344335477e-05, "loss": 0.0224, "step": 18850 }, { "epoch": 2.2298253893345916, "grad_norm": 0.13428957760334015, "learning_rate": 9.955839342584244e-05, "loss": 0.0213, "step": 18900 }, { "epoch": 2.2357243983010853, "grad_norm": 0.2547999918460846, "learning_rate": 9.955592656784328e-05, "loss": 0.0249, "step": 18950 }, { "epoch": 2.241623407267579, "grad_norm": 0.1690271943807602, "learning_rate": 9.955345286969778e-05, "loss": 0.0245, "step": 19000 }, { "epoch": 2.241623407267579, "eval_cer": 0.08691910499139414, "eval_loss": 0.008073776960372925, "eval_runtime": 2.2968, "eval_samples_per_second": 43.539, "eval_steps_per_second": 1.742, "eval_wer": 0.28, "step": 19000 }, { "epoch": 2.2475224162340726, "grad_norm": 0.3503081500530243, "learning_rate": 9.95509723317474e-05, "loss": 0.0258, "step": 19050 }, { "epoch": 2.253421425200566, "grad_norm": 0.2886398434638977, "learning_rate": 9.954848495433452e-05, "loss": 0.0226, "step": 19100 }, { "epoch": 2.25932043416706, "grad_norm": 0.23994478583335876, "learning_rate": 9.954599073780246e-05, "loss": 0.0281, "step": 19150 }, { "epoch": 2.2652194431335535, "grad_norm": 0.08131328225135803, "learning_rate": 9.954348968249551e-05, "loss": 0.0228, "step": 19200 }, { "epoch": 2.271118452100047, "grad_norm": 0.545905351638794, "learning_rate": 9.954098178875888e-05, "loss": 0.0231, "step": 19250 }, { "epoch": 2.277017461066541, "grad_norm": 0.5131845474243164, "learning_rate": 9.953846705693875e-05, "loss": 0.0237, "step": 19300 }, { "epoch": 2.2829164700330344, "grad_norm": 0.30097338557243347, "learning_rate": 9.953594548738218e-05, "loss": 0.0234, "step": 19350 }, { "epoch": 2.288815478999528, "grad_norm": 0.2962517738342285, "learning_rate": 9.953341708043724e-05, "loss": 0.0227, "step": 19400 }, { "epoch": 2.2947144879660217, "grad_norm": 0.6431502103805542, "learning_rate": 9.953088183645294e-05, "loss": 0.0207, "step": 19450 }, { "epoch": 2.3006134969325154, "grad_norm": 0.24541136622428894, "learning_rate": 9.952833975577922e-05, "loss": 0.0216, "step": 19500 }, { "epoch": 2.306512505899009, "grad_norm": 0.2515818178653717, "learning_rate": 9.952579083876694e-05, "loss": 0.0212, "step": 19550 }, { "epoch": 2.3124115148655027, "grad_norm": 0.22928953170776367, "learning_rate": 9.952323508576791e-05, "loss": 0.0239, "step": 19600 }, { "epoch": 2.3183105238319963, "grad_norm": 0.33611559867858887, "learning_rate": 9.952067249713495e-05, "loss": 0.0256, "step": 19650 }, { "epoch": 2.32420953279849, "grad_norm": 0.1740807294845581, "learning_rate": 9.951810307322172e-05, "loss": 0.0261, "step": 19700 }, { "epoch": 2.3301085417649836, "grad_norm": 0.12017904967069626, "learning_rate": 9.95155268143829e-05, "loss": 0.0233, "step": 19750 }, { "epoch": 2.3360075507314773, "grad_norm": 0.17321817576885223, "learning_rate": 9.951294372097407e-05, "loss": 0.0236, "step": 19800 }, { "epoch": 2.341906559697971, "grad_norm": 0.3093399107456207, "learning_rate": 9.951035379335178e-05, "loss": 0.0217, "step": 19850 }, { "epoch": 2.3478055686644645, "grad_norm": 0.35511964559555054, "learning_rate": 9.950775703187354e-05, "loss": 0.0221, "step": 19900 }, { "epoch": 2.3537045776309578, "grad_norm": 0.22607597708702087, "learning_rate": 9.950515343689775e-05, "loss": 0.0239, "step": 19950 }, { "epoch": 2.3596035865974514, "grad_norm": 0.4332330822944641, "learning_rate": 9.950254300878378e-05, "loss": 0.0222, "step": 20000 }, { "epoch": 2.3596035865974514, "eval_cer": 0.09294320137693632, "eval_loss": 0.010067946277558804, "eval_runtime": 2.2233, "eval_samples_per_second": 44.977, "eval_steps_per_second": 1.799, "eval_wer": 0.29, "step": 20000 }, { "epoch": 2.365502595563945, "grad_norm": 0.146223247051239, "learning_rate": 9.949992574789195e-05, "loss": 0.0237, "step": 20050 }, { "epoch": 2.3714016045304387, "grad_norm": 0.1588299572467804, "learning_rate": 9.949730165458351e-05, "loss": 0.0212, "step": 20100 }, { "epoch": 2.3773006134969323, "grad_norm": 0.40931251645088196, "learning_rate": 9.949467072922069e-05, "loss": 0.022, "step": 20150 }, { "epoch": 2.383199622463426, "grad_norm": 0.19924218952655792, "learning_rate": 9.94920329721666e-05, "loss": 0.0221, "step": 20200 }, { "epoch": 2.3890986314299196, "grad_norm": 0.21348881721496582, "learning_rate": 9.948938838378535e-05, "loss": 0.0197, "step": 20250 }, { "epoch": 2.3949976403964133, "grad_norm": 0.23284979164600372, "learning_rate": 9.948673696444195e-05, "loss": 0.0198, "step": 20300 }, { "epoch": 2.400896649362907, "grad_norm": 0.1322377622127533, "learning_rate": 9.948407871450238e-05, "loss": 0.0229, "step": 20350 }, { "epoch": 2.4067956583294006, "grad_norm": 0.31828656792640686, "learning_rate": 9.948141363433355e-05, "loss": 0.0241, "step": 20400 }, { "epoch": 2.412694667295894, "grad_norm": 0.18166601657867432, "learning_rate": 9.947874172430334e-05, "loss": 0.0239, "step": 20450 }, { "epoch": 2.418593676262388, "grad_norm": 0.2601373493671417, "learning_rate": 9.947606298478051e-05, "loss": 0.0228, "step": 20500 }, { "epoch": 2.4244926852288815, "grad_norm": 0.3915237486362457, "learning_rate": 9.947337741613484e-05, "loss": 0.0237, "step": 20550 }, { "epoch": 2.430391694195375, "grad_norm": 0.21783220767974854, "learning_rate": 9.947068501873701e-05, "loss": 0.022, "step": 20600 }, { "epoch": 2.436290703161869, "grad_norm": 0.1357978880405426, "learning_rate": 9.946798579295864e-05, "loss": 0.0198, "step": 20650 }, { "epoch": 2.4421897121283624, "grad_norm": 0.27476632595062256, "learning_rate": 9.946527973917231e-05, "loss": 0.0218, "step": 20700 }, { "epoch": 2.448088721094856, "grad_norm": 1.0301671028137207, "learning_rate": 9.946256685775151e-05, "loss": 0.022, "step": 20750 }, { "epoch": 2.4539877300613497, "grad_norm": 0.2826835811138153, "learning_rate": 9.945984714907073e-05, "loss": 0.0216, "step": 20800 }, { "epoch": 2.4598867390278434, "grad_norm": 0.27107691764831543, "learning_rate": 9.945712061350534e-05, "loss": 0.0223, "step": 20850 }, { "epoch": 2.465785747994337, "grad_norm": 0.3123421370983124, "learning_rate": 9.945438725143169e-05, "loss": 0.0214, "step": 20900 }, { "epoch": 2.4716847569608307, "grad_norm": 0.6165755391120911, "learning_rate": 9.945164706322708e-05, "loss": 0.0198, "step": 20950 }, { "epoch": 2.4775837659273243, "grad_norm": 0.29761531949043274, "learning_rate": 9.94489000492697e-05, "loss": 0.0239, "step": 21000 }, { "epoch": 2.4775837659273243, "eval_cer": 0.09294320137693632, "eval_loss": 0.019773518666625023, "eval_runtime": 2.1817, "eval_samples_per_second": 45.836, "eval_steps_per_second": 1.833, "eval_wer": 0.31, "step": 21000 }, { "epoch": 2.483482774893818, "grad_norm": 0.24131856858730316, "learning_rate": 9.944614620993872e-05, "loss": 0.0195, "step": 21050 }, { "epoch": 2.4893817838603116, "grad_norm": 0.3136957585811615, "learning_rate": 9.94433855456143e-05, "loss": 0.0227, "step": 21100 }, { "epoch": 2.4952807928268053, "grad_norm": 0.43590936064720154, "learning_rate": 9.944061805667743e-05, "loss": 0.0248, "step": 21150 }, { "epoch": 2.501179801793299, "grad_norm": 0.20235379040241241, "learning_rate": 9.943784374351015e-05, "loss": 0.021, "step": 21200 }, { "epoch": 2.5070788107597926, "grad_norm": 0.2235686480998993, "learning_rate": 9.943506260649536e-05, "loss": 0.0227, "step": 21250 }, { "epoch": 2.512977819726286, "grad_norm": 0.26617613434791565, "learning_rate": 9.943227464601697e-05, "loss": 0.0225, "step": 21300 }, { "epoch": 2.51887682869278, "grad_norm": 0.17564472556114197, "learning_rate": 9.942947986245976e-05, "loss": 0.0207, "step": 21350 }, { "epoch": 2.5247758376592735, "grad_norm": 0.28593361377716064, "learning_rate": 9.942667825620952e-05, "loss": 0.0227, "step": 21400 }, { "epoch": 2.530674846625767, "grad_norm": 0.24651487171649933, "learning_rate": 9.942386982765294e-05, "loss": 0.0206, "step": 21450 }, { "epoch": 2.5365738555922603, "grad_norm": 0.30694714188575745, "learning_rate": 9.942105457717768e-05, "loss": 0.0218, "step": 21500 }, { "epoch": 2.542472864558754, "grad_norm": 0.2536924481391907, "learning_rate": 9.941823250517229e-05, "loss": 0.0207, "step": 21550 }, { "epoch": 2.5483718735252476, "grad_norm": 0.2498774379491806, "learning_rate": 9.941540361202635e-05, "loss": 0.0214, "step": 21600 }, { "epoch": 2.5542708824917413, "grad_norm": 0.3287333548069, "learning_rate": 9.941256789813027e-05, "loss": 0.0211, "step": 21650 }, { "epoch": 2.560169891458235, "grad_norm": 0.24165669083595276, "learning_rate": 9.940972536387551e-05, "loss": 0.0237, "step": 21700 }, { "epoch": 2.5660689004247286, "grad_norm": 0.45370692014694214, "learning_rate": 9.940687600965439e-05, "loss": 0.0237, "step": 21750 }, { "epoch": 2.571967909391222, "grad_norm": 0.2564535439014435, "learning_rate": 9.940401983586023e-05, "loss": 0.0234, "step": 21800 }, { "epoch": 2.577866918357716, "grad_norm": 0.138870507478714, "learning_rate": 9.940115684288725e-05, "loss": 0.0212, "step": 21850 }, { "epoch": 2.5837659273242095, "grad_norm": 0.09469731152057648, "learning_rate": 9.93982870311306e-05, "loss": 0.0219, "step": 21900 }, { "epoch": 2.589664936290703, "grad_norm": 0.19410641491413116, "learning_rate": 9.939541040098644e-05, "loss": 0.02, "step": 21950 }, { "epoch": 2.595563945257197, "grad_norm": 0.2105347365140915, "learning_rate": 9.939252695285181e-05, "loss": 0.023, "step": 22000 }, { "epoch": 2.595563945257197, "eval_cer": 0.08691910499139414, "eval_loss": 0.009623887948691845, "eval_runtime": 2.2416, "eval_samples_per_second": 44.61, "eval_steps_per_second": 1.784, "eval_wer": 0.28, "step": 22000 }, { "epoch": 2.6014629542236904, "grad_norm": 0.25737884640693665, "learning_rate": 9.93896366871247e-05, "loss": 0.0209, "step": 22050 }, { "epoch": 2.607361963190184, "grad_norm": 0.15564632415771484, "learning_rate": 9.938673960420407e-05, "loss": 0.0222, "step": 22100 }, { "epoch": 2.6132609721566777, "grad_norm": 0.31751951575279236, "learning_rate": 9.938383570448978e-05, "loss": 0.0198, "step": 22150 }, { "epoch": 2.6191599811231714, "grad_norm": 0.39207151532173157, "learning_rate": 9.938092498838266e-05, "loss": 0.0203, "step": 22200 }, { "epoch": 2.625058990089665, "grad_norm": 0.2689426839351654, "learning_rate": 9.937800745628446e-05, "loss": 0.0229, "step": 22250 }, { "epoch": 2.6309579990561587, "grad_norm": 0.20446930825710297, "learning_rate": 9.937508310859791e-05, "loss": 0.0221, "step": 22300 }, { "epoch": 2.6368570080226523, "grad_norm": 1.2481807470321655, "learning_rate": 9.937215194572664e-05, "loss": 0.0221, "step": 22350 }, { "epoch": 2.642756016989146, "grad_norm": 0.19901341199874878, "learning_rate": 9.936921396807525e-05, "loss": 0.0237, "step": 22400 }, { "epoch": 2.648655025955639, "grad_norm": 0.8121945261955261, "learning_rate": 9.936626917604923e-05, "loss": 0.0214, "step": 22450 }, { "epoch": 2.654554034922133, "grad_norm": 0.21878287196159363, "learning_rate": 9.936331757005508e-05, "loss": 0.0227, "step": 22500 }, { "epoch": 2.6604530438886265, "grad_norm": 0.5992307662963867, "learning_rate": 9.936035915050019e-05, "loss": 0.0192, "step": 22550 }, { "epoch": 2.66635205285512, "grad_norm": 0.17031100392341614, "learning_rate": 9.935739391779293e-05, "loss": 0.02, "step": 22600 }, { "epoch": 2.6722510618216138, "grad_norm": 0.27901551127433777, "learning_rate": 9.935442187234255e-05, "loss": 0.0226, "step": 22650 }, { "epoch": 2.6781500707881074, "grad_norm": 0.1835927516222, "learning_rate": 9.93514430145593e-05, "loss": 0.0222, "step": 22700 }, { "epoch": 2.684049079754601, "grad_norm": 0.05582818761467934, "learning_rate": 9.934845734485437e-05, "loss": 0.0217, "step": 22750 }, { "epoch": 2.6899480887210947, "grad_norm": 0.2651654779911041, "learning_rate": 9.93454648636398e-05, "loss": 0.0209, "step": 22800 }, { "epoch": 2.6958470976875883, "grad_norm": 0.24819524586200714, "learning_rate": 9.934246557132871e-05, "loss": 0.0208, "step": 22850 }, { "epoch": 2.701746106654082, "grad_norm": 0.2650342285633087, "learning_rate": 9.933945946833506e-05, "loss": 0.019, "step": 22900 }, { "epoch": 2.7076451156205756, "grad_norm": 0.055385831743478775, "learning_rate": 9.933644655507379e-05, "loss": 0.0203, "step": 22950 }, { "epoch": 2.7135441245870693, "grad_norm": 0.23982442915439606, "learning_rate": 9.933342683196075e-05, "loss": 0.0223, "step": 23000 }, { "epoch": 2.7135441245870693, "eval_cer": 0.08605851979345955, "eval_loss": 0.007503759115934372, "eval_runtime": 2.1969, "eval_samples_per_second": 45.52, "eval_steps_per_second": 1.821, "eval_wer": 0.27, "step": 23000 }, { "epoch": 2.719443133553563, "grad_norm": 0.13051073253154755, "learning_rate": 9.933040029941274e-05, "loss": 0.0207, "step": 23050 }, { "epoch": 2.7253421425200566, "grad_norm": 0.24101924896240234, "learning_rate": 9.932736695784755e-05, "loss": 0.0228, "step": 23100 }, { "epoch": 2.7312411514865502, "grad_norm": 0.23983432352542877, "learning_rate": 9.932432680768385e-05, "loss": 0.0189, "step": 23150 }, { "epoch": 2.737140160453044, "grad_norm": 0.2129448652267456, "learning_rate": 9.932127984934124e-05, "loss": 0.0211, "step": 23200 }, { "epoch": 2.7430391694195375, "grad_norm": 0.1288970708847046, "learning_rate": 9.931822608324033e-05, "loss": 0.0217, "step": 23250 }, { "epoch": 2.748938178386031, "grad_norm": 0.3226830065250397, "learning_rate": 9.93151655098026e-05, "loss": 0.0224, "step": 23300 }, { "epoch": 2.754837187352525, "grad_norm": 0.23506873846054077, "learning_rate": 9.931209812945052e-05, "loss": 0.0189, "step": 23350 }, { "epoch": 2.7607361963190185, "grad_norm": 0.17989467084407806, "learning_rate": 9.930902394260747e-05, "loss": 0.0216, "step": 23400 }, { "epoch": 2.766635205285512, "grad_norm": 0.3791234493255615, "learning_rate": 9.930594294969773e-05, "loss": 0.021, "step": 23450 }, { "epoch": 2.7725342142520057, "grad_norm": 0.4016015827655792, "learning_rate": 9.930285515114664e-05, "loss": 0.0213, "step": 23500 }, { "epoch": 2.7784332232184994, "grad_norm": 0.21645690500736237, "learning_rate": 9.929976054738038e-05, "loss": 0.0218, "step": 23550 }, { "epoch": 2.784332232184993, "grad_norm": 0.2584187090396881, "learning_rate": 9.929665913882606e-05, "loss": 0.0194, "step": 23600 }, { "epoch": 2.7902312411514867, "grad_norm": 0.30607786774635315, "learning_rate": 9.92935509259118e-05, "loss": 0.0214, "step": 23650 }, { "epoch": 2.7961302501179803, "grad_norm": 0.08108435571193695, "learning_rate": 9.929043590906662e-05, "loss": 0.0223, "step": 23700 }, { "epoch": 2.802029259084474, "grad_norm": 0.2786133289337158, "learning_rate": 9.928731408872046e-05, "loss": 0.0211, "step": 23750 }, { "epoch": 2.8079282680509676, "grad_norm": 0.3104282021522522, "learning_rate": 9.928418546530424e-05, "loss": 0.0215, "step": 23800 }, { "epoch": 2.8138272770174613, "grad_norm": 0.12867899239063263, "learning_rate": 9.928105003924982e-05, "loss": 0.0186, "step": 23850 }, { "epoch": 2.819726285983955, "grad_norm": 0.24856945872306824, "learning_rate": 9.927790781098992e-05, "loss": 0.0213, "step": 23900 }, { "epoch": 2.8256252949504486, "grad_norm": 0.16102224588394165, "learning_rate": 9.927475878095831e-05, "loss": 0.0229, "step": 23950 }, { "epoch": 2.831524303916942, "grad_norm": 0.2691654562950134, "learning_rate": 9.927160294958964e-05, "loss": 0.0207, "step": 24000 }, { "epoch": 2.831524303916942, "eval_cer": 0.09122203098106713, "eval_loss": 0.011951258406043053, "eval_runtime": 2.1473, "eval_samples_per_second": 46.57, "eval_steps_per_second": 1.863, "eval_wer": 0.3, "step": 24000 }, { "epoch": 2.837423312883436, "grad_norm": 0.31320613622665405, "learning_rate": 9.926844031731948e-05, "loss": 0.0224, "step": 24050 }, { "epoch": 2.843322321849929, "grad_norm": 0.21514493227005005, "learning_rate": 9.926527088458438e-05, "loss": 0.0212, "step": 24100 }, { "epoch": 2.8492213308164227, "grad_norm": 0.19298183917999268, "learning_rate": 9.926209465182182e-05, "loss": 0.0214, "step": 24150 }, { "epoch": 2.8551203397829163, "grad_norm": 0.11621813476085663, "learning_rate": 9.925891161947021e-05, "loss": 0.0207, "step": 24200 }, { "epoch": 2.86101934874941, "grad_norm": 0.31450212001800537, "learning_rate": 9.92557217879689e-05, "loss": 0.0201, "step": 24250 }, { "epoch": 2.8669183577159036, "grad_norm": 0.4044695198535919, "learning_rate": 9.925252515775816e-05, "loss": 0.0193, "step": 24300 }, { "epoch": 2.8728173666823973, "grad_norm": 0.23058685660362244, "learning_rate": 9.924932172927923e-05, "loss": 0.0204, "step": 24350 }, { "epoch": 2.878716375648891, "grad_norm": 0.247412770986557, "learning_rate": 9.92461115029743e-05, "loss": 0.0207, "step": 24400 }, { "epoch": 2.8846153846153846, "grad_norm": 0.22548078000545502, "learning_rate": 9.924289447928643e-05, "loss": 0.0202, "step": 24450 }, { "epoch": 2.8905143935818782, "grad_norm": 0.29411017894744873, "learning_rate": 9.923967065865969e-05, "loss": 0.019, "step": 24500 }, { "epoch": 2.896413402548372, "grad_norm": 0.1998167484998703, "learning_rate": 9.923644004153905e-05, "loss": 0.0231, "step": 24550 }, { "epoch": 2.9023124115148655, "grad_norm": 0.1764383465051651, "learning_rate": 9.923320262837041e-05, "loss": 0.0232, "step": 24600 }, { "epoch": 2.908211420481359, "grad_norm": 0.14665581285953522, "learning_rate": 9.922995841960068e-05, "loss": 0.0204, "step": 24650 }, { "epoch": 2.914110429447853, "grad_norm": 0.18278200924396515, "learning_rate": 9.92267074156776e-05, "loss": 0.0203, "step": 24700 }, { "epoch": 2.9200094384143465, "grad_norm": 0.16964095830917358, "learning_rate": 9.922344961704994e-05, "loss": 0.0192, "step": 24750 }, { "epoch": 2.92590844738084, "grad_norm": 0.33906465768814087, "learning_rate": 9.922018502416736e-05, "loss": 0.0205, "step": 24800 }, { "epoch": 2.9318074563473338, "grad_norm": 0.24378934502601624, "learning_rate": 9.921691363748044e-05, "loss": 0.0192, "step": 24850 }, { "epoch": 2.9377064653138274, "grad_norm": 0.19611741602420807, "learning_rate": 9.921363545744075e-05, "loss": 0.0215, "step": 24900 }, { "epoch": 2.943605474280321, "grad_norm": 0.7192788124084473, "learning_rate": 9.921035048450079e-05, "loss": 0.0215, "step": 24950 }, { "epoch": 2.9495044832468147, "grad_norm": 0.1382417529821396, "learning_rate": 9.920705871911395e-05, "loss": 0.0199, "step": 25000 }, { "epoch": 2.9495044832468147, "eval_cer": 0.09122203098106713, "eval_loss": 0.008967660367488861, "eval_runtime": 2.3849, "eval_samples_per_second": 41.931, "eval_steps_per_second": 1.677, "eval_wer": 0.29, "step": 25000 }, { "epoch": 2.955403492213308, "grad_norm": 0.33995887637138367, "learning_rate": 9.92037601617346e-05, "loss": 0.0198, "step": 25050 }, { "epoch": 2.9613025011798015, "grad_norm": 0.28309497237205505, "learning_rate": 9.920045481281803e-05, "loss": 0.0206, "step": 25100 }, { "epoch": 2.967201510146295, "grad_norm": 0.15581589937210083, "learning_rate": 9.91971426728205e-05, "loss": 0.0196, "step": 25150 }, { "epoch": 2.973100519112789, "grad_norm": 0.20929265022277832, "learning_rate": 9.919382374219914e-05, "loss": 0.0215, "step": 25200 }, { "epoch": 2.9789995280792825, "grad_norm": 0.30622991919517517, "learning_rate": 9.919049802141209e-05, "loss": 0.0204, "step": 25250 }, { "epoch": 2.984898537045776, "grad_norm": 0.5458083748817444, "learning_rate": 9.918716551091838e-05, "loss": 0.0193, "step": 25300 }, { "epoch": 2.9907975460122698, "grad_norm": 0.1345265507698059, "learning_rate": 9.918382621117801e-05, "loss": 0.0211, "step": 25350 }, { "epoch": 2.9966965549787634, "grad_norm": 0.8017371296882629, "learning_rate": 9.918048012265186e-05, "loss": 0.0234, "step": 25400 }, { "epoch": 3.002595563945257, "grad_norm": 0.1199745237827301, "learning_rate": 9.917712724580184e-05, "loss": 0.0197, "step": 25450 }, { "epoch": 3.0084945729117507, "grad_norm": 0.18644732236862183, "learning_rate": 9.91737675810907e-05, "loss": 0.0171, "step": 25500 }, { "epoch": 3.0143935818782444, "grad_norm": 0.14219047129154205, "learning_rate": 9.917040112898219e-05, "loss": 0.0186, "step": 25550 }, { "epoch": 3.020292590844738, "grad_norm": 0.14848843216896057, "learning_rate": 9.916702788994097e-05, "loss": 0.0168, "step": 25600 }, { "epoch": 3.0261915998112316, "grad_norm": 0.15549512207508087, "learning_rate": 9.916364786443266e-05, "loss": 0.0162, "step": 25650 }, { "epoch": 3.0320906087777253, "grad_norm": 0.1141747236251831, "learning_rate": 9.916026105292379e-05, "loss": 0.018, "step": 25700 }, { "epoch": 3.037989617744219, "grad_norm": 0.20695440471172333, "learning_rate": 9.915686745588184e-05, "loss": 0.0162, "step": 25750 }, { "epoch": 3.0438886267107126, "grad_norm": 0.17314468324184418, "learning_rate": 9.91534670737752e-05, "loss": 0.0153, "step": 25800 }, { "epoch": 3.0497876356772062, "grad_norm": 0.333112508058548, "learning_rate": 9.915005990707325e-05, "loss": 0.0187, "step": 25850 }, { "epoch": 3.0556866446437, "grad_norm": 0.1861029863357544, "learning_rate": 9.914664595624627e-05, "loss": 0.0207, "step": 25900 }, { "epoch": 3.0615856536101935, "grad_norm": 0.23570506274700165, "learning_rate": 9.91432252217655e-05, "loss": 0.0176, "step": 25950 }, { "epoch": 3.067484662576687, "grad_norm": 0.19323711097240448, "learning_rate": 9.913979770410306e-05, "loss": 0.0178, "step": 26000 }, { "epoch": 3.067484662576687, "eval_cer": 0.09208261617900172, "eval_loss": 0.008801817893981934, "eval_runtime": 2.2091, "eval_samples_per_second": 45.267, "eval_steps_per_second": 1.811, "eval_wer": 0.3, "step": 26000 }, { "epoch": 3.073383671543181, "grad_norm": 0.2380620241165161, "learning_rate": 9.91363634037321e-05, "loss": 0.0179, "step": 26050 }, { "epoch": 3.0792826805096745, "grad_norm": 0.12958146631717682, "learning_rate": 9.91329223211266e-05, "loss": 0.0216, "step": 26100 }, { "epoch": 3.085181689476168, "grad_norm": 0.3352428078651428, "learning_rate": 9.912947445676155e-05, "loss": 0.0174, "step": 26150 }, { "epoch": 3.0910806984426618, "grad_norm": 0.08368784934282303, "learning_rate": 9.912601981111286e-05, "loss": 0.0181, "step": 26200 }, { "epoch": 3.0969797074091554, "grad_norm": 0.25886860489845276, "learning_rate": 9.912255838465736e-05, "loss": 0.0169, "step": 26250 }, { "epoch": 3.102878716375649, "grad_norm": 0.24723957479000092, "learning_rate": 9.911909017787284e-05, "loss": 0.0192, "step": 26300 }, { "epoch": 3.1087777253421427, "grad_norm": 0.4060457646846771, "learning_rate": 9.9115615191238e-05, "loss": 0.0208, "step": 26350 }, { "epoch": 3.1146767343086363, "grad_norm": 0.17600201070308685, "learning_rate": 9.911213342523249e-05, "loss": 0.0198, "step": 26400 }, { "epoch": 3.12057574327513, "grad_norm": 0.5180116295814514, "learning_rate": 9.910864488033692e-05, "loss": 0.02, "step": 26450 }, { "epoch": 3.1264747522416236, "grad_norm": 0.08653631806373596, "learning_rate": 9.910514955703275e-05, "loss": 0.0172, "step": 26500 }, { "epoch": 3.132373761208117, "grad_norm": 0.3698040246963501, "learning_rate": 9.91016474558025e-05, "loss": 0.0203, "step": 26550 }, { "epoch": 3.1382727701746105, "grad_norm": 0.31282907724380493, "learning_rate": 9.909813857712951e-05, "loss": 0.0184, "step": 26600 }, { "epoch": 3.144171779141104, "grad_norm": 0.2593541741371155, "learning_rate": 9.909462292149815e-05, "loss": 0.0187, "step": 26650 }, { "epoch": 3.1500707881075978, "grad_norm": 0.2277897149324417, "learning_rate": 9.909110048939365e-05, "loss": 0.0165, "step": 26700 }, { "epoch": 3.1559697970740914, "grad_norm": 0.1804780513048172, "learning_rate": 9.908757128130223e-05, "loss": 0.0191, "step": 26750 }, { "epoch": 3.161868806040585, "grad_norm": 0.23499223589897156, "learning_rate": 9.9084035297711e-05, "loss": 0.0182, "step": 26800 }, { "epoch": 3.1677678150070787, "grad_norm": 0.296973317861557, "learning_rate": 9.908049253910804e-05, "loss": 0.0177, "step": 26850 }, { "epoch": 3.1736668239735724, "grad_norm": 0.2470119297504425, "learning_rate": 9.907694300598237e-05, "loss": 0.0181, "step": 26900 }, { "epoch": 3.179565832940066, "grad_norm": 0.2032470703125, "learning_rate": 9.907338669882389e-05, "loss": 0.0158, "step": 26950 }, { "epoch": 3.1854648419065597, "grad_norm": 0.2168714851140976, "learning_rate": 9.90698236181235e-05, "loss": 0.0197, "step": 27000 }, { "epoch": 3.1854648419065597, "eval_cer": 0.08950086058519793, "eval_loss": 0.006441373378038406, "eval_runtime": 2.3463, "eval_samples_per_second": 42.619, "eval_steps_per_second": 1.705, "eval_wer": 0.29, "step": 27000 }, { "epoch": 3.1913638508730533, "grad_norm": 0.13209465146064758, "learning_rate": 9.906625376437299e-05, "loss": 0.019, "step": 27050 }, { "epoch": 3.197262859839547, "grad_norm": 0.2844199240207672, "learning_rate": 9.906267713806514e-05, "loss": 0.0177, "step": 27100 }, { "epoch": 3.2031618688060406, "grad_norm": 0.2612094283103943, "learning_rate": 9.905909373969358e-05, "loss": 0.0199, "step": 27150 }, { "epoch": 3.2090608777725342, "grad_norm": 0.3857910633087158, "learning_rate": 9.905550356975293e-05, "loss": 0.0166, "step": 27200 }, { "epoch": 3.214959886739028, "grad_norm": 0.24177134037017822, "learning_rate": 9.905190662873878e-05, "loss": 0.0161, "step": 27250 }, { "epoch": 3.2208588957055215, "grad_norm": 0.32567471265792847, "learning_rate": 9.904830291714755e-05, "loss": 0.0196, "step": 27300 }, { "epoch": 3.226757904672015, "grad_norm": 0.25344160199165344, "learning_rate": 9.904469243547671e-05, "loss": 0.019, "step": 27350 }, { "epoch": 3.232656913638509, "grad_norm": 0.17714621126651764, "learning_rate": 9.904107518422458e-05, "loss": 0.019, "step": 27400 }, { "epoch": 3.2385559226050025, "grad_norm": 0.12746074795722961, "learning_rate": 9.903745116389045e-05, "loss": 0.0175, "step": 27450 }, { "epoch": 3.244454931571496, "grad_norm": 0.2168128490447998, "learning_rate": 9.903382037497455e-05, "loss": 0.0202, "step": 27500 }, { "epoch": 3.2503539405379898, "grad_norm": 0.153797909617424, "learning_rate": 9.903018281797802e-05, "loss": 0.0167, "step": 27550 }, { "epoch": 3.2562529495044834, "grad_norm": 0.19680702686309814, "learning_rate": 9.902653849340295e-05, "loss": 0.0199, "step": 27600 }, { "epoch": 3.262151958470977, "grad_norm": 0.1762671023607254, "learning_rate": 9.902288740175238e-05, "loss": 0.0196, "step": 27650 }, { "epoch": 3.2680509674374703, "grad_norm": 0.24244007468223572, "learning_rate": 9.901922954353024e-05, "loss": 0.0181, "step": 27700 }, { "epoch": 3.273949976403964, "grad_norm": 0.17854192852973938, "learning_rate": 9.901556491924143e-05, "loss": 0.0196, "step": 27750 }, { "epoch": 3.2798489853704575, "grad_norm": 0.9413911700248718, "learning_rate": 9.901189352939177e-05, "loss": 0.0185, "step": 27800 }, { "epoch": 3.285747994336951, "grad_norm": 0.17392738163471222, "learning_rate": 9.900821537448803e-05, "loss": 0.0153, "step": 27850 }, { "epoch": 3.291647003303445, "grad_norm": 0.16491706669330597, "learning_rate": 9.90045304550379e-05, "loss": 0.0206, "step": 27900 }, { "epoch": 3.2975460122699385, "grad_norm": 0.2642422020435333, "learning_rate": 9.900083877155e-05, "loss": 0.0177, "step": 27950 }, { "epoch": 3.303445021236432, "grad_norm": 0.2945953607559204, "learning_rate": 9.899714032453387e-05, "loss": 0.0174, "step": 28000 }, { "epoch": 3.303445021236432, "eval_cer": 0.08691910499139414, "eval_loss": 0.007374964188784361, "eval_runtime": 2.4102, "eval_samples_per_second": 41.491, "eval_steps_per_second": 1.66, "eval_wer": 0.27, "step": 28000 }, { "epoch": 3.309344030202926, "grad_norm": 0.25405895709991455, "learning_rate": 9.899343511450003e-05, "loss": 0.0197, "step": 28050 }, { "epoch": 3.3152430391694194, "grad_norm": 0.16957220435142517, "learning_rate": 9.898972314195988e-05, "loss": 0.0193, "step": 28100 }, { "epoch": 3.321142048135913, "grad_norm": 0.2851770222187042, "learning_rate": 9.898600440742583e-05, "loss": 0.0184, "step": 28150 }, { "epoch": 3.3270410571024067, "grad_norm": 0.12498116493225098, "learning_rate": 9.89822789114111e-05, "loss": 0.0171, "step": 28200 }, { "epoch": 3.3329400660689004, "grad_norm": 0.4427048861980438, "learning_rate": 9.897854665442999e-05, "loss": 0.0166, "step": 28250 }, { "epoch": 3.338839075035394, "grad_norm": 0.30732157826423645, "learning_rate": 9.89748076369976e-05, "loss": 0.0187, "step": 28300 }, { "epoch": 3.3447380840018877, "grad_norm": 0.20184077322483063, "learning_rate": 9.897106185963003e-05, "loss": 0.0187, "step": 28350 }, { "epoch": 3.3506370929683813, "grad_norm": 0.09524688869714737, "learning_rate": 9.896730932284435e-05, "loss": 0.0179, "step": 28400 }, { "epoch": 3.356536101934875, "grad_norm": 0.043076831847429276, "learning_rate": 9.896355002715847e-05, "loss": 0.0172, "step": 28450 }, { "epoch": 3.3624351109013686, "grad_norm": 0.1258549988269806, "learning_rate": 9.895978397309132e-05, "loss": 0.017, "step": 28500 }, { "epoch": 3.3683341198678622, "grad_norm": 0.22885805368423462, "learning_rate": 9.895601116116267e-05, "loss": 0.0168, "step": 28550 }, { "epoch": 3.374233128834356, "grad_norm": 0.1810322105884552, "learning_rate": 9.895223159189332e-05, "loss": 0.0178, "step": 28600 }, { "epoch": 3.3801321378008495, "grad_norm": 0.16579902172088623, "learning_rate": 9.894844526580497e-05, "loss": 0.0167, "step": 28650 }, { "epoch": 3.386031146767343, "grad_norm": 0.3273639380931854, "learning_rate": 9.89446521834202e-05, "loss": 0.0156, "step": 28700 }, { "epoch": 3.391930155733837, "grad_norm": 0.16774491965770721, "learning_rate": 9.89408523452626e-05, "loss": 0.0188, "step": 28750 }, { "epoch": 3.3978291647003305, "grad_norm": 0.24268968403339386, "learning_rate": 9.893704575185664e-05, "loss": 0.0197, "step": 28800 }, { "epoch": 3.403728173666824, "grad_norm": 0.30409112572669983, "learning_rate": 9.893323240372774e-05, "loss": 0.0188, "step": 28850 }, { "epoch": 3.4096271826333178, "grad_norm": 0.22620835900306702, "learning_rate": 9.892941230140226e-05, "loss": 0.017, "step": 28900 }, { "epoch": 3.4155261915998114, "grad_norm": 0.12474960088729858, "learning_rate": 9.892558544540748e-05, "loss": 0.0174, "step": 28950 }, { "epoch": 3.421425200566305, "grad_norm": 0.2350797951221466, "learning_rate": 9.892175183627161e-05, "loss": 0.019, "step": 29000 }, { "epoch": 3.421425200566305, "eval_cer": 0.08864027538726334, "eval_loss": 0.005455040372908115, "eval_runtime": 2.2288, "eval_samples_per_second": 44.867, "eval_steps_per_second": 1.795, "eval_wer": 0.28, "step": 29000 }, { "epoch": 3.4273242095327987, "grad_norm": 0.15550492703914642, "learning_rate": 9.89179114745238e-05, "loss": 0.0192, "step": 29050 }, { "epoch": 3.4332232184992924, "grad_norm": 0.2160584181547165, "learning_rate": 9.891406436069415e-05, "loss": 0.0177, "step": 29100 }, { "epoch": 3.439122227465786, "grad_norm": 0.29396361112594604, "learning_rate": 9.891021049531364e-05, "loss": 0.0174, "step": 29150 }, { "epoch": 3.445021236432279, "grad_norm": 0.22029080986976624, "learning_rate": 9.890634987891424e-05, "loss": 0.0161, "step": 29200 }, { "epoch": 3.450920245398773, "grad_norm": 0.11369181424379349, "learning_rate": 9.890248251202882e-05, "loss": 0.0175, "step": 29250 }, { "epoch": 3.4568192543652665, "grad_norm": 0.14966927468776703, "learning_rate": 9.889860839519119e-05, "loss": 0.0147, "step": 29300 }, { "epoch": 3.46271826333176, "grad_norm": 0.2935669720172882, "learning_rate": 9.889472752893608e-05, "loss": 0.0186, "step": 29350 }, { "epoch": 3.468617272298254, "grad_norm": 0.11305372416973114, "learning_rate": 9.889083991379917e-05, "loss": 0.0171, "step": 29400 }, { "epoch": 3.4745162812647474, "grad_norm": 0.31725776195526123, "learning_rate": 9.888694555031706e-05, "loss": 0.0182, "step": 29450 }, { "epoch": 3.480415290231241, "grad_norm": 0.3843410015106201, "learning_rate": 9.888304443902728e-05, "loss": 0.0163, "step": 29500 }, { "epoch": 3.4863142991977347, "grad_norm": 0.2685706317424774, "learning_rate": 9.887913658046832e-05, "loss": 0.0201, "step": 29550 }, { "epoch": 3.4922133081642284, "grad_norm": 0.2798655033111572, "learning_rate": 9.887522197517953e-05, "loss": 0.0155, "step": 29600 }, { "epoch": 3.498112317130722, "grad_norm": 0.07517682015895844, "learning_rate": 9.887130062370129e-05, "loss": 0.0177, "step": 29650 }, { "epoch": 3.5040113260972157, "grad_norm": 0.1561184674501419, "learning_rate": 9.886737252657483e-05, "loss": 0.0154, "step": 29700 }, { "epoch": 3.5099103350637093, "grad_norm": 0.22629056870937347, "learning_rate": 9.886343768434235e-05, "loss": 0.0185, "step": 29750 }, { "epoch": 3.515809344030203, "grad_norm": 0.21461816132068634, "learning_rate": 9.885949609754695e-05, "loss": 0.0188, "step": 29800 }, { "epoch": 3.5217083529966966, "grad_norm": 0.19941721856594086, "learning_rate": 9.885554776673272e-05, "loss": 0.019, "step": 29850 }, { "epoch": 3.5276073619631902, "grad_norm": 0.5380612015724182, "learning_rate": 9.885159269244462e-05, "loss": 0.0187, "step": 29900 }, { "epoch": 3.533506370929684, "grad_norm": 0.11814635246992111, "learning_rate": 9.884763087522856e-05, "loss": 0.018, "step": 29950 }, { "epoch": 3.5394053798961775, "grad_norm": 0.2645626366138458, "learning_rate": 9.88436623156314e-05, "loss": 0.0206, "step": 30000 }, { "epoch": 3.5394053798961775, "eval_cer": 0.08950086058519793, "eval_loss": 0.0044050998985767365, "eval_runtime": 2.2362, "eval_samples_per_second": 44.719, "eval_steps_per_second": 1.789, "eval_wer": 0.28, "step": 30000 }, { "epoch": 3.545304388862671, "grad_norm": 0.18661801517009735, "learning_rate": 9.883968701420089e-05, "loss": 0.0173, "step": 30050 }, { "epoch": 3.551203397829165, "grad_norm": 0.18136368691921234, "learning_rate": 9.883570497148578e-05, "loss": 0.0182, "step": 30100 }, { "epoch": 3.5571024067956585, "grad_norm": 0.04230756685137749, "learning_rate": 9.883171618803568e-05, "loss": 0.0165, "step": 30150 }, { "epoch": 3.563001415762152, "grad_norm": 0.17051878571510315, "learning_rate": 9.882772066440116e-05, "loss": 0.0173, "step": 30200 }, { "epoch": 3.5689004247286453, "grad_norm": 0.5929579138755798, "learning_rate": 9.882371840113372e-05, "loss": 0.0176, "step": 30250 }, { "epoch": 3.574799433695139, "grad_norm": 0.16981586813926697, "learning_rate": 9.881970939878577e-05, "loss": 0.0201, "step": 30300 }, { "epoch": 3.5806984426616326, "grad_norm": 0.20812398195266724, "learning_rate": 9.881569365791067e-05, "loss": 0.0165, "step": 30350 }, { "epoch": 3.5865974516281263, "grad_norm": 0.21741695702075958, "learning_rate": 9.881167117906275e-05, "loss": 0.0184, "step": 30400 }, { "epoch": 3.59249646059462, "grad_norm": 0.17957724630832672, "learning_rate": 9.880764196279719e-05, "loss": 0.02, "step": 30450 }, { "epoch": 3.5983954695611136, "grad_norm": 0.09137777239084244, "learning_rate": 9.880360600967013e-05, "loss": 0.0173, "step": 30500 }, { "epoch": 3.604294478527607, "grad_norm": 0.17920620739459991, "learning_rate": 9.87995633202387e-05, "loss": 0.017, "step": 30550 }, { "epoch": 3.610193487494101, "grad_norm": 0.1997753530740738, "learning_rate": 9.879551389506083e-05, "loss": 0.017, "step": 30600 }, { "epoch": 3.6160924964605945, "grad_norm": 0.13107553124427795, "learning_rate": 9.879145773469552e-05, "loss": 0.0176, "step": 30650 }, { "epoch": 3.621991505427088, "grad_norm": 0.19237421452999115, "learning_rate": 9.878739483970261e-05, "loss": 0.0179, "step": 30700 }, { "epoch": 3.627890514393582, "grad_norm": 0.15683497488498688, "learning_rate": 9.878332521064291e-05, "loss": 0.0186, "step": 30750 }, { "epoch": 3.6337895233600754, "grad_norm": 0.20556238293647766, "learning_rate": 9.877924884807814e-05, "loss": 0.0175, "step": 30800 }, { "epoch": 3.639688532326569, "grad_norm": 0.28255337476730347, "learning_rate": 9.877516575257096e-05, "loss": 0.0186, "step": 30850 }, { "epoch": 3.6455875412930627, "grad_norm": 0.21663601696491241, "learning_rate": 9.877107592468494e-05, "loss": 0.0185, "step": 30900 }, { "epoch": 3.6514865502595564, "grad_norm": 0.16190974414348602, "learning_rate": 9.876697936498459e-05, "loss": 0.0198, "step": 30950 }, { "epoch": 3.65738555922605, "grad_norm": 0.1872107833623886, "learning_rate": 9.876287607403538e-05, "loss": 0.0177, "step": 31000 }, { "epoch": 3.65738555922605, "eval_cer": 0.08777969018932874, "eval_loss": 0.0034986156970262527, "eval_runtime": 2.3786, "eval_samples_per_second": 42.042, "eval_steps_per_second": 1.682, "eval_wer": 0.28, "step": 31000 }, { "epoch": 3.6632845681925437, "grad_norm": 0.4134170114994049, "learning_rate": 9.875876605240366e-05, "loss": 0.016, "step": 31050 }, { "epoch": 3.6691835771590373, "grad_norm": 0.2925160229206085, "learning_rate": 9.875464930065675e-05, "loss": 0.018, "step": 31100 }, { "epoch": 3.675082586125531, "grad_norm": 0.1551489382982254, "learning_rate": 9.875052581936288e-05, "loss": 0.0171, "step": 31150 }, { "epoch": 3.6809815950920246, "grad_norm": 0.22957894206047058, "learning_rate": 9.874639560909117e-05, "loss": 0.0176, "step": 31200 }, { "epoch": 3.6868806040585183, "grad_norm": 0.3317546248435974, "learning_rate": 9.874225867041177e-05, "loss": 0.0186, "step": 31250 }, { "epoch": 3.692779613025012, "grad_norm": 0.2897341549396515, "learning_rate": 9.873811500389566e-05, "loss": 0.0195, "step": 31300 }, { "epoch": 3.6986786219915055, "grad_norm": 0.14529524743556976, "learning_rate": 9.873396461011478e-05, "loss": 0.0156, "step": 31350 }, { "epoch": 3.704577630957999, "grad_norm": 0.1525001972913742, "learning_rate": 9.872980748964203e-05, "loss": 0.0195, "step": 31400 }, { "epoch": 3.710476639924493, "grad_norm": 0.3791784644126892, "learning_rate": 9.872564364305117e-05, "loss": 0.0185, "step": 31450 }, { "epoch": 3.7163756488909865, "grad_norm": 0.1535978466272354, "learning_rate": 9.872147307091699e-05, "loss": 0.0193, "step": 31500 }, { "epoch": 3.72227465785748, "grad_norm": 0.1551169753074646, "learning_rate": 9.87172957738151e-05, "loss": 0.0213, "step": 31550 }, { "epoch": 3.7281736668239738, "grad_norm": 0.18557950854301453, "learning_rate": 9.871311175232209e-05, "loss": 0.0165, "step": 31600 }, { "epoch": 3.7340726757904674, "grad_norm": 0.2504551112651825, "learning_rate": 9.870892100701552e-05, "loss": 0.0177, "step": 31650 }, { "epoch": 3.739971684756961, "grad_norm": 0.11979654431343079, "learning_rate": 9.870472353847377e-05, "loss": 0.017, "step": 31700 }, { "epoch": 3.7458706937234547, "grad_norm": 0.07180637121200562, "learning_rate": 9.870051934727627e-05, "loss": 0.016, "step": 31750 }, { "epoch": 3.7517697026899484, "grad_norm": 0.3083426058292389, "learning_rate": 9.86963084340033e-05, "loss": 0.0178, "step": 31800 }, { "epoch": 3.7576687116564416, "grad_norm": 0.15932561457157135, "learning_rate": 9.869209079923607e-05, "loss": 0.0169, "step": 31850 }, { "epoch": 3.763567720622935, "grad_norm": 0.24353429675102234, "learning_rate": 9.868786644355674e-05, "loss": 0.0175, "step": 31900 }, { "epoch": 3.769466729589429, "grad_norm": 1.2222667932510376, "learning_rate": 9.868363536754841e-05, "loss": 0.0176, "step": 31950 }, { "epoch": 3.7753657385559225, "grad_norm": 0.22430205345153809, "learning_rate": 9.867939757179509e-05, "loss": 0.0173, "step": 32000 }, { "epoch": 3.7753657385559225, "eval_cer": 0.08777969018932874, "eval_loss": 0.0063841501250863075, "eval_runtime": 2.1988, "eval_samples_per_second": 45.48, "eval_steps_per_second": 1.819, "eval_wer": 0.28, "step": 32000 }, { "epoch": 3.781264747522416, "grad_norm": 0.11365979164838791, "learning_rate": 9.86751530568817e-05, "loss": 0.0171, "step": 32050 }, { "epoch": 3.78716375648891, "grad_norm": 0.4598778784275055, "learning_rate": 9.867090182339411e-05, "loss": 0.0172, "step": 32100 }, { "epoch": 3.7930627654554034, "grad_norm": 0.19957250356674194, "learning_rate": 9.866664387191913e-05, "loss": 0.0173, "step": 32150 }, { "epoch": 3.798961774421897, "grad_norm": 0.07266674935817719, "learning_rate": 9.866237920304444e-05, "loss": 0.0199, "step": 32200 }, { "epoch": 3.8048607833883907, "grad_norm": 0.13181932270526886, "learning_rate": 9.865810781735874e-05, "loss": 0.0166, "step": 32250 }, { "epoch": 3.8107597923548844, "grad_norm": 0.16826869547367096, "learning_rate": 9.865382971545156e-05, "loss": 0.0177, "step": 32300 }, { "epoch": 3.816658801321378, "grad_norm": 0.22895334661006927, "learning_rate": 9.864954489791345e-05, "loss": 0.0174, "step": 32350 }, { "epoch": 3.8225578102878717, "grad_norm": 0.2619366943836212, "learning_rate": 9.864525336533578e-05, "loss": 0.0164, "step": 32400 }, { "epoch": 3.8284568192543653, "grad_norm": 0.22836406528949738, "learning_rate": 9.864095511831094e-05, "loss": 0.0202, "step": 32450 }, { "epoch": 3.834355828220859, "grad_norm": 0.24797937273979187, "learning_rate": 9.863665015743221e-05, "loss": 0.0177, "step": 32500 }, { "epoch": 3.8402548371873526, "grad_norm": 0.1578008085489273, "learning_rate": 9.863233848329379e-05, "loss": 0.0176, "step": 32550 }, { "epoch": 3.8461538461538463, "grad_norm": 0.21870563924312592, "learning_rate": 9.862802009649081e-05, "loss": 0.0182, "step": 32600 }, { "epoch": 3.85205285512034, "grad_norm": 0.22064337134361267, "learning_rate": 9.862369499761936e-05, "loss": 0.0203, "step": 32650 }, { "epoch": 3.8579518640868335, "grad_norm": 0.18647265434265137, "learning_rate": 9.861936318727641e-05, "loss": 0.0201, "step": 32700 }, { "epoch": 3.863850873053327, "grad_norm": 0.18728132545948029, "learning_rate": 9.861502466605986e-05, "loss": 0.0174, "step": 32750 }, { "epoch": 3.8697498820198204, "grad_norm": 0.3099417984485626, "learning_rate": 9.861067943456858e-05, "loss": 0.0187, "step": 32800 }, { "epoch": 3.875648890986314, "grad_norm": 0.09291265904903412, "learning_rate": 9.860632749340231e-05, "loss": 0.0163, "step": 32850 }, { "epoch": 3.8815478999528077, "grad_norm": 0.15452028810977936, "learning_rate": 9.860196884316176e-05, "loss": 0.0167, "step": 32900 }, { "epoch": 3.8874469089193013, "grad_norm": 0.29796820878982544, "learning_rate": 9.859760348444855e-05, "loss": 0.0177, "step": 32950 }, { "epoch": 3.893345917885795, "grad_norm": 0.17479290068149567, "learning_rate": 9.85932314178652e-05, "loss": 0.0168, "step": 33000 }, { "epoch": 3.893345917885795, "eval_cer": 0.09036144578313253, "eval_loss": 0.005217348225414753, "eval_runtime": 2.2608, "eval_samples_per_second": 44.232, "eval_steps_per_second": 1.769, "eval_wer": 0.29, "step": 33000 }, { "epoch": 3.8992449268522886, "grad_norm": 0.19998224079608917, "learning_rate": 9.858885264401523e-05, "loss": 0.015, "step": 33050 }, { "epoch": 3.9051439358187823, "grad_norm": 0.1814209371805191, "learning_rate": 9.858446716350301e-05, "loss": 0.0192, "step": 33100 }, { "epoch": 3.911042944785276, "grad_norm": 0.23082958161830902, "learning_rate": 9.858007497693384e-05, "loss": 0.0171, "step": 33150 }, { "epoch": 3.9169419537517696, "grad_norm": 0.2309986799955368, "learning_rate": 9.857567608491399e-05, "loss": 0.0174, "step": 33200 }, { "epoch": 3.922840962718263, "grad_norm": 0.26335087418556213, "learning_rate": 9.857127048805063e-05, "loss": 0.0172, "step": 33250 }, { "epoch": 3.928739971684757, "grad_norm": 0.16843938827514648, "learning_rate": 9.856685818695189e-05, "loss": 0.0176, "step": 33300 }, { "epoch": 3.9346389806512505, "grad_norm": 0.2808878421783447, "learning_rate": 9.856243918222676e-05, "loss": 0.0184, "step": 33350 }, { "epoch": 3.940537989617744, "grad_norm": 0.28112414479255676, "learning_rate": 9.855801347448519e-05, "loss": 0.0183, "step": 33400 }, { "epoch": 3.946436998584238, "grad_norm": 0.22935375571250916, "learning_rate": 9.855358106433805e-05, "loss": 0.0164, "step": 33450 }, { "epoch": 3.9523360075507314, "grad_norm": 0.22309431433677673, "learning_rate": 9.854914195239718e-05, "loss": 0.016, "step": 33500 }, { "epoch": 3.958235016517225, "grad_norm": 0.6025923490524292, "learning_rate": 9.854469613927526e-05, "loss": 0.0133, "step": 33550 }, { "epoch": 3.9641340254837187, "grad_norm": 0.21932409703731537, "learning_rate": 9.854024362558596e-05, "loss": 0.0168, "step": 33600 }, { "epoch": 3.9700330344502124, "grad_norm": 0.23418931663036346, "learning_rate": 9.853578441194386e-05, "loss": 0.0165, "step": 33650 }, { "epoch": 3.975932043416706, "grad_norm": 0.36904671788215637, "learning_rate": 9.853131849896446e-05, "loss": 0.0197, "step": 33700 }, { "epoch": 3.9818310523831997, "grad_norm": 0.1786024123430252, "learning_rate": 9.852684588726418e-05, "loss": 0.0166, "step": 33750 }, { "epoch": 3.9877300613496933, "grad_norm": 0.19226740300655365, "learning_rate": 9.852236657746035e-05, "loss": 0.0182, "step": 33800 }, { "epoch": 3.993629070316187, "grad_norm": 0.4042689800262451, "learning_rate": 9.851788057017128e-05, "loss": 0.019, "step": 33850 }, { "epoch": 3.9995280792826806, "grad_norm": 0.23501402139663696, "learning_rate": 9.851338786601614e-05, "loss": 0.0166, "step": 33900 }, { "epoch": 4.005427088249174, "grad_norm": 0.26366809010505676, "learning_rate": 9.850888846561508e-05, "loss": 0.014, "step": 33950 }, { "epoch": 4.011326097215668, "grad_norm": 0.44632819294929504, "learning_rate": 9.850438236958912e-05, "loss": 0.0127, "step": 34000 }, { "epoch": 4.011326097215668, "eval_cer": 0.08519793459552495, "eval_loss": 0.0046807629987597466, "eval_runtime": 2.1194, "eval_samples_per_second": 47.182, "eval_steps_per_second": 1.887, "eval_wer": 0.27, "step": 34000 }, { "epoch": 4.0172251061821616, "grad_norm": 0.1738041788339615, "learning_rate": 9.849986957856023e-05, "loss": 0.0157, "step": 34050 }, { "epoch": 4.023124115148655, "grad_norm": 0.295872300863266, "learning_rate": 9.849535009315134e-05, "loss": 0.0184, "step": 34100 }, { "epoch": 4.029023124115149, "grad_norm": 0.10085457563400269, "learning_rate": 9.849082391398623e-05, "loss": 0.0143, "step": 34150 }, { "epoch": 4.0349221330816425, "grad_norm": 0.11515238881111145, "learning_rate": 9.848629104168967e-05, "loss": 0.0147, "step": 34200 }, { "epoch": 4.040821142048136, "grad_norm": 0.1571127325296402, "learning_rate": 9.84817514768873e-05, "loss": 0.0142, "step": 34250 }, { "epoch": 4.04672015101463, "grad_norm": 0.1323835700750351, "learning_rate": 9.847720522020574e-05, "loss": 0.0148, "step": 34300 }, { "epoch": 4.052619159981123, "grad_norm": 0.2711625099182129, "learning_rate": 9.847265227227249e-05, "loss": 0.0156, "step": 34350 }, { "epoch": 4.058518168947617, "grad_norm": 0.19935426115989685, "learning_rate": 9.846809263371598e-05, "loss": 0.0174, "step": 34400 }, { "epoch": 4.064417177914111, "grad_norm": 0.31848737597465515, "learning_rate": 9.84635263051656e-05, "loss": 0.0147, "step": 34450 }, { "epoch": 4.070316186880604, "grad_norm": 0.15678399801254272, "learning_rate": 9.845895328725161e-05, "loss": 0.0147, "step": 34500 }, { "epoch": 4.076215195847098, "grad_norm": 0.08404966443777084, "learning_rate": 9.84543735806052e-05, "loss": 0.016, "step": 34550 }, { "epoch": 4.082114204813592, "grad_norm": 0.24269531667232513, "learning_rate": 9.844978718585855e-05, "loss": 0.0149, "step": 34600 }, { "epoch": 4.088013213780085, "grad_norm": 0.44582560658454895, "learning_rate": 9.84451941036447e-05, "loss": 0.015, "step": 34650 }, { "epoch": 4.093912222746579, "grad_norm": 0.22990725934505463, "learning_rate": 9.844059433459761e-05, "loss": 0.0139, "step": 34700 }, { "epoch": 4.099811231713073, "grad_norm": 0.33783164620399475, "learning_rate": 9.843598787935218e-05, "loss": 0.0176, "step": 34750 }, { "epoch": 4.105710240679566, "grad_norm": 0.2434518039226532, "learning_rate": 9.843137473854424e-05, "loss": 0.016, "step": 34800 }, { "epoch": 4.11160924964606, "grad_norm": 0.1890212595462799, "learning_rate": 9.842675491281055e-05, "loss": 0.0153, "step": 34850 }, { "epoch": 4.117508258612553, "grad_norm": 0.2989141643047333, "learning_rate": 9.842212840278875e-05, "loss": 0.016, "step": 34900 }, { "epoch": 4.123407267579046, "grad_norm": 0.28836339712142944, "learning_rate": 9.841749520911748e-05, "loss": 0.016, "step": 34950 }, { "epoch": 4.12930627654554, "grad_norm": 0.21767236292362213, "learning_rate": 9.84128553324362e-05, "loss": 0.0191, "step": 35000 }, { "epoch": 4.12930627654554, "eval_cer": 0.08691910499139414, "eval_loss": 0.005998192820698023, "eval_runtime": 2.0845, "eval_samples_per_second": 47.973, "eval_steps_per_second": 1.919, "eval_wer": 0.28, "step": 35000 }, { "epoch": 4.135205285512034, "grad_norm": 0.42546316981315613, "learning_rate": 9.840820877338538e-05, "loss": 0.0169, "step": 35050 }, { "epoch": 4.141104294478527, "grad_norm": 0.1312832236289978, "learning_rate": 9.840355553260637e-05, "loss": 0.0141, "step": 35100 }, { "epoch": 4.147003303445021, "grad_norm": 0.21957463026046753, "learning_rate": 9.839889561074144e-05, "loss": 0.0162, "step": 35150 }, { "epoch": 4.1529023124115145, "grad_norm": 0.14719510078430176, "learning_rate": 9.839422900843382e-05, "loss": 0.0153, "step": 35200 }, { "epoch": 4.158801321378008, "grad_norm": 0.28372904658317566, "learning_rate": 9.838955572632762e-05, "loss": 0.0142, "step": 35250 }, { "epoch": 4.164700330344502, "grad_norm": 0.14924341440200806, "learning_rate": 9.838487576506787e-05, "loss": 0.0144, "step": 35300 }, { "epoch": 4.1705993393109955, "grad_norm": 0.2996293604373932, "learning_rate": 9.838018912530059e-05, "loss": 0.0139, "step": 35350 }, { "epoch": 4.176498348277489, "grad_norm": 0.23299606144428253, "learning_rate": 9.837549580767261e-05, "loss": 0.0159, "step": 35400 }, { "epoch": 4.182397357243983, "grad_norm": 0.21855464577674866, "learning_rate": 9.837079581283179e-05, "loss": 0.0144, "step": 35450 }, { "epoch": 4.188296366210476, "grad_norm": 0.3801169693470001, "learning_rate": 9.836608914142684e-05, "loss": 0.0171, "step": 35500 }, { "epoch": 4.19419537517697, "grad_norm": 0.1600542515516281, "learning_rate": 9.836137579410742e-05, "loss": 0.0148, "step": 35550 }, { "epoch": 4.200094384143464, "grad_norm": 0.39650917053222656, "learning_rate": 9.835665577152411e-05, "loss": 0.0163, "step": 35600 }, { "epoch": 4.205993393109957, "grad_norm": 0.2654813826084137, "learning_rate": 9.835192907432842e-05, "loss": 0.0159, "step": 35650 }, { "epoch": 4.211892402076451, "grad_norm": 0.19161096215248108, "learning_rate": 9.834719570317276e-05, "loss": 0.0169, "step": 35700 }, { "epoch": 4.217791411042945, "grad_norm": 0.08721723407506943, "learning_rate": 9.834245565871046e-05, "loss": 0.0154, "step": 35750 }, { "epoch": 4.223690420009438, "grad_norm": 0.42034703493118286, "learning_rate": 9.833770894159581e-05, "loss": 0.016, "step": 35800 }, { "epoch": 4.229589428975932, "grad_norm": 0.22349746525287628, "learning_rate": 9.833295555248397e-05, "loss": 0.016, "step": 35850 }, { "epoch": 4.235488437942426, "grad_norm": 0.23017320036888123, "learning_rate": 9.832819549203106e-05, "loss": 0.0164, "step": 35900 }, { "epoch": 4.241387446908919, "grad_norm": 0.2232939451932907, "learning_rate": 9.832342876089409e-05, "loss": 0.0174, "step": 35950 }, { "epoch": 4.247286455875413, "grad_norm": 0.21891802549362183, "learning_rate": 9.831865535973103e-05, "loss": 0.0145, "step": 36000 }, { "epoch": 4.247286455875413, "eval_cer": 0.08864027538726334, "eval_loss": 0.003760554827749729, "eval_runtime": 2.0338, "eval_samples_per_second": 49.169, "eval_steps_per_second": 1.967, "eval_wer": 0.28, "step": 36000 }, { "epoch": 4.2531854648419065, "grad_norm": 0.05707191675901413, "learning_rate": 9.831387528920072e-05, "loss": 0.0141, "step": 36050 }, { "epoch": 4.2590844738084, "grad_norm": 0.23562248051166534, "learning_rate": 9.830908854996297e-05, "loss": 0.0139, "step": 36100 }, { "epoch": 4.264983482774894, "grad_norm": 0.09480427205562592, "learning_rate": 9.830429514267847e-05, "loss": 0.015, "step": 36150 }, { "epoch": 4.2708824917413875, "grad_norm": 0.26202645897865295, "learning_rate": 9.829949506800886e-05, "loss": 0.0136, "step": 36200 }, { "epoch": 4.276781500707881, "grad_norm": 0.12408323585987091, "learning_rate": 9.829468832661668e-05, "loss": 0.0147, "step": 36250 }, { "epoch": 4.282680509674375, "grad_norm": 0.13622206449508667, "learning_rate": 9.82898749191654e-05, "loss": 0.0141, "step": 36300 }, { "epoch": 4.288579518640868, "grad_norm": 0.2581905126571655, "learning_rate": 9.828505484631942e-05, "loss": 0.0154, "step": 36350 }, { "epoch": 4.294478527607362, "grad_norm": 0.24106259644031525, "learning_rate": 9.828022810874404e-05, "loss": 0.0146, "step": 36400 }, { "epoch": 4.300377536573856, "grad_norm": 0.13967661559581757, "learning_rate": 9.827539470710549e-05, "loss": 0.0158, "step": 36450 }, { "epoch": 4.306276545540349, "grad_norm": 0.14819133281707764, "learning_rate": 9.827055464207094e-05, "loss": 0.0151, "step": 36500 }, { "epoch": 4.312175554506843, "grad_norm": 0.2540696859359741, "learning_rate": 9.826570791430841e-05, "loss": 0.0163, "step": 36550 }, { "epoch": 4.318074563473337, "grad_norm": 0.9480804800987244, "learning_rate": 9.826085452448694e-05, "loss": 0.0153, "step": 36600 }, { "epoch": 4.32397357243983, "grad_norm": 0.34766605496406555, "learning_rate": 9.825599447327639e-05, "loss": 0.0151, "step": 36650 }, { "epoch": 4.329872581406324, "grad_norm": 0.18468552827835083, "learning_rate": 9.825112776134762e-05, "loss": 0.0165, "step": 36700 }, { "epoch": 4.335771590372818, "grad_norm": 0.2031431645154953, "learning_rate": 9.824625438937238e-05, "loss": 0.0136, "step": 36750 }, { "epoch": 4.341670599339311, "grad_norm": 0.19597594439983368, "learning_rate": 9.824137435802331e-05, "loss": 0.0154, "step": 36800 }, { "epoch": 4.347569608305805, "grad_norm": 0.1434662938117981, "learning_rate": 9.823648766797401e-05, "loss": 0.0159, "step": 36850 }, { "epoch": 4.3534686172722985, "grad_norm": 0.2551201581954956, "learning_rate": 9.823159431989899e-05, "loss": 0.015, "step": 36900 }, { "epoch": 4.359367626238792, "grad_norm": 0.3714548945426941, "learning_rate": 9.822669431447365e-05, "loss": 0.0153, "step": 36950 }, { "epoch": 4.365266635205286, "grad_norm": 0.17439939081668854, "learning_rate": 9.822178765237436e-05, "loss": 0.0145, "step": 37000 }, { "epoch": 4.365266635205286, "eval_cer": 0.08864027538726334, "eval_loss": 0.005244044587016106, "eval_runtime": 2.0553, "eval_samples_per_second": 48.656, "eval_steps_per_second": 1.946, "eval_wer": 0.28, "step": 37000 }, { "epoch": 4.371165644171779, "grad_norm": 0.17451536655426025, "learning_rate": 9.821687433427837e-05, "loss": 0.0136, "step": 37050 }, { "epoch": 4.377064653138273, "grad_norm": 0.28569716215133667, "learning_rate": 9.821195436086385e-05, "loss": 0.0159, "step": 37100 }, { "epoch": 4.382963662104767, "grad_norm": 0.22659027576446533, "learning_rate": 9.820702773280991e-05, "loss": 0.016, "step": 37150 }, { "epoch": 4.38886267107126, "grad_norm": 0.26056918501853943, "learning_rate": 9.820209445079656e-05, "loss": 0.0165, "step": 37200 }, { "epoch": 4.394761680037754, "grad_norm": 0.1550283133983612, "learning_rate": 9.819715451550474e-05, "loss": 0.0168, "step": 37250 }, { "epoch": 4.400660689004248, "grad_norm": 0.22134554386138916, "learning_rate": 9.81922079276163e-05, "loss": 0.0137, "step": 37300 }, { "epoch": 4.406559697970741, "grad_norm": 0.22292019426822662, "learning_rate": 9.818725468781402e-05, "loss": 0.0148, "step": 37350 }, { "epoch": 4.412458706937235, "grad_norm": 0.15625500679016113, "learning_rate": 9.818229479678158e-05, "loss": 0.014, "step": 37400 }, { "epoch": 4.418357715903728, "grad_norm": 0.29477083683013916, "learning_rate": 9.817732825520358e-05, "loss": 0.0143, "step": 37450 }, { "epoch": 4.424256724870222, "grad_norm": 0.23380330204963684, "learning_rate": 9.817235506376556e-05, "loss": 0.0162, "step": 37500 }, { "epoch": 4.430155733836715, "grad_norm": 0.165362149477005, "learning_rate": 9.816737522315398e-05, "loss": 0.0159, "step": 37550 }, { "epoch": 4.436054742803209, "grad_norm": 0.8591015934944153, "learning_rate": 9.816238873405616e-05, "loss": 0.0155, "step": 37600 }, { "epoch": 4.441953751769702, "grad_norm": 0.2406962662935257, "learning_rate": 9.815739559716042e-05, "loss": 0.0153, "step": 37650 }, { "epoch": 4.447852760736196, "grad_norm": 0.10157783329486847, "learning_rate": 9.815239581315592e-05, "loss": 0.0141, "step": 37700 }, { "epoch": 4.45375176970269, "grad_norm": 0.2566896677017212, "learning_rate": 9.81473893827328e-05, "loss": 0.0141, "step": 37750 }, { "epoch": 4.459650778669183, "grad_norm": 0.11647289246320724, "learning_rate": 9.814237630658208e-05, "loss": 0.0133, "step": 37800 }, { "epoch": 4.465549787635677, "grad_norm": 0.29457539319992065, "learning_rate": 9.813735658539572e-05, "loss": 0.016, "step": 37850 }, { "epoch": 4.4714487966021705, "grad_norm": 0.24431535601615906, "learning_rate": 9.813233021986659e-05, "loss": 0.0142, "step": 37900 }, { "epoch": 4.477347805568664, "grad_norm": 0.16300024092197418, "learning_rate": 9.812729721068844e-05, "loss": 0.0146, "step": 37950 }, { "epoch": 4.483246814535158, "grad_norm": 0.2961362600326538, "learning_rate": 9.812225755855599e-05, "loss": 0.0152, "step": 38000 }, { "epoch": 4.483246814535158, "eval_cer": 0.08777969018932874, "eval_loss": 0.006630950141698122, "eval_runtime": 2.0757, "eval_samples_per_second": 48.176, "eval_steps_per_second": 1.927, "eval_wer": 0.27, "step": 38000 }, { "epoch": 4.4891458235016515, "grad_norm": 0.24002204835414886, "learning_rate": 9.811721126416489e-05, "loss": 0.0183, "step": 38050 }, { "epoch": 4.495044832468145, "grad_norm": 0.24717293679714203, "learning_rate": 9.811215832821161e-05, "loss": 0.0142, "step": 38100 }, { "epoch": 4.500943841434639, "grad_norm": 0.2193300575017929, "learning_rate": 9.810709875139366e-05, "loss": 0.0142, "step": 38150 }, { "epoch": 4.506842850401132, "grad_norm": 0.17247149348258972, "learning_rate": 9.810203253440937e-05, "loss": 0.0145, "step": 38200 }, { "epoch": 4.512741859367626, "grad_norm": 0.21384267508983612, "learning_rate": 9.809695967795805e-05, "loss": 0.0136, "step": 38250 }, { "epoch": 4.51864086833412, "grad_norm": 0.22786371409893036, "learning_rate": 9.809188018273986e-05, "loss": 0.0178, "step": 38300 }, { "epoch": 4.524539877300613, "grad_norm": 0.229579895734787, "learning_rate": 9.808679404945597e-05, "loss": 0.0153, "step": 38350 }, { "epoch": 4.530438886267107, "grad_norm": 0.15844567120075226, "learning_rate": 9.808170127880838e-05, "loss": 0.0156, "step": 38400 }, { "epoch": 4.536337895233601, "grad_norm": 0.05045616254210472, "learning_rate": 9.807660187150005e-05, "loss": 0.0135, "step": 38450 }, { "epoch": 4.542236904200094, "grad_norm": 0.19792410731315613, "learning_rate": 9.807149582823484e-05, "loss": 0.0144, "step": 38500 }, { "epoch": 4.548135913166588, "grad_norm": 0.1598484218120575, "learning_rate": 9.806638314971753e-05, "loss": 0.0176, "step": 38550 }, { "epoch": 4.554034922133082, "grad_norm": 0.05504409223794937, "learning_rate": 9.806126383665381e-05, "loss": 0.0149, "step": 38600 }, { "epoch": 4.559933931099575, "grad_norm": 0.2320374846458435, "learning_rate": 9.805613788975032e-05, "loss": 0.0142, "step": 38650 }, { "epoch": 4.565832940066069, "grad_norm": 0.13000454008579254, "learning_rate": 9.805100530971456e-05, "loss": 0.0149, "step": 38700 }, { "epoch": 4.5717319490325625, "grad_norm": 0.16056041419506073, "learning_rate": 9.804586609725499e-05, "loss": 0.015, "step": 38750 }, { "epoch": 4.577630957999056, "grad_norm": 0.1679510921239853, "learning_rate": 9.804072025308095e-05, "loss": 0.0136, "step": 38800 }, { "epoch": 4.58352996696555, "grad_norm": 0.08037034422159195, "learning_rate": 9.803556777790274e-05, "loss": 0.0164, "step": 38850 }, { "epoch": 4.5894289759320435, "grad_norm": 0.16590721905231476, "learning_rate": 9.803040867243154e-05, "loss": 0.0148, "step": 38900 }, { "epoch": 4.595327984898537, "grad_norm": 0.25079041719436646, "learning_rate": 9.802524293737945e-05, "loss": 0.0168, "step": 38950 }, { "epoch": 4.601226993865031, "grad_norm": 0.33689138293266296, "learning_rate": 9.80200705734595e-05, "loss": 0.0135, "step": 39000 }, { "epoch": 4.601226993865031, "eval_cer": 0.09208261617900172, "eval_loss": 0.007052063010632992, "eval_runtime": 2.0585, "eval_samples_per_second": 48.58, "eval_steps_per_second": 1.943, "eval_wer": 0.28, "step": 39000 }, { "epoch": 4.607126002831524, "grad_norm": 0.33073708415031433, "learning_rate": 9.801489158138561e-05, "loss": 0.0141, "step": 39050 }, { "epoch": 4.613025011798018, "grad_norm": 0.2405056357383728, "learning_rate": 9.800970596187265e-05, "loss": 0.0159, "step": 39100 }, { "epoch": 4.618924020764512, "grad_norm": 0.2696436643600464, "learning_rate": 9.800451371563637e-05, "loss": 0.0131, "step": 39150 }, { "epoch": 4.624823029731005, "grad_norm": 0.10760989040136337, "learning_rate": 9.799931484339344e-05, "loss": 0.0155, "step": 39200 }, { "epoch": 4.630722038697499, "grad_norm": 0.13790263235569, "learning_rate": 9.799410934586149e-05, "loss": 0.0187, "step": 39250 }, { "epoch": 4.636621047663993, "grad_norm": 0.22097963094711304, "learning_rate": 9.7988897223759e-05, "loss": 0.0174, "step": 39300 }, { "epoch": 4.642520056630486, "grad_norm": 0.0845661610364914, "learning_rate": 9.79836784778054e-05, "loss": 0.0176, "step": 39350 }, { "epoch": 4.64841906559698, "grad_norm": 0.40129128098487854, "learning_rate": 9.797845310872104e-05, "loss": 0.0156, "step": 39400 }, { "epoch": 4.654318074563474, "grad_norm": 0.2341821938753128, "learning_rate": 9.797322111722715e-05, "loss": 0.0155, "step": 39450 }, { "epoch": 4.660217083529967, "grad_norm": 0.31461548805236816, "learning_rate": 9.796798250404591e-05, "loss": 0.0169, "step": 39500 }, { "epoch": 4.666116092496461, "grad_norm": 0.18101881444454193, "learning_rate": 9.796273726990037e-05, "loss": 0.015, "step": 39550 }, { "epoch": 4.6720151014629545, "grad_norm": 0.4284253418445587, "learning_rate": 9.795748541551457e-05, "loss": 0.0181, "step": 39600 }, { "epoch": 4.677914110429448, "grad_norm": 0.15515239536762238, "learning_rate": 9.79522269416134e-05, "loss": 0.0159, "step": 39650 }, { "epoch": 4.683813119395942, "grad_norm": 0.19637803733348846, "learning_rate": 9.794696184892267e-05, "loss": 0.0175, "step": 39700 }, { "epoch": 4.6897121283624355, "grad_norm": 0.11516980081796646, "learning_rate": 9.794169013816912e-05, "loss": 0.0142, "step": 39750 }, { "epoch": 4.695611137328929, "grad_norm": 0.2768246829509735, "learning_rate": 9.793641181008041e-05, "loss": 0.0161, "step": 39800 }, { "epoch": 4.701510146295423, "grad_norm": 0.39490699768066406, "learning_rate": 9.793112686538508e-05, "loss": 0.0127, "step": 39850 }, { "epoch": 4.7074091552619155, "grad_norm": 0.17264436185359955, "learning_rate": 9.79258353048126e-05, "loss": 0.0132, "step": 39900 }, { "epoch": 4.71330816422841, "grad_norm": 0.2060287743806839, "learning_rate": 9.79205371290934e-05, "loss": 0.0136, "step": 39950 }, { "epoch": 4.719207173194903, "grad_norm": 0.25649890303611755, "learning_rate": 9.791523233895875e-05, "loss": 0.0183, "step": 40000 }, { "epoch": 4.719207173194903, "eval_cer": 0.08777969018932874, "eval_loss": 0.006279061548411846, "eval_runtime": 2.0348, "eval_samples_per_second": 49.146, "eval_steps_per_second": 1.966, "eval_wer": 0.28, "step": 40000 }, { "epoch": 4.725106182161397, "grad_norm": 0.28118619322776794, "learning_rate": 9.790992093514086e-05, "loss": 0.0155, "step": 40050 }, { "epoch": 4.73100519112789, "grad_norm": 0.0959843099117279, "learning_rate": 9.790460291837286e-05, "loss": 0.0161, "step": 40100 }, { "epoch": 4.736904200094385, "grad_norm": 0.0908626914024353, "learning_rate": 9.789927828938881e-05, "loss": 0.0137, "step": 40150 }, { "epoch": 4.742803209060877, "grad_norm": 0.19591175019741058, "learning_rate": 9.789394704892364e-05, "loss": 0.0158, "step": 40200 }, { "epoch": 4.748702218027371, "grad_norm": 0.26490557193756104, "learning_rate": 9.788860919771323e-05, "loss": 0.0158, "step": 40250 }, { "epoch": 4.754601226993865, "grad_norm": 0.2765480875968933, "learning_rate": 9.788326473649433e-05, "loss": 0.0171, "step": 40300 }, { "epoch": 4.760500235960358, "grad_norm": 0.3143293857574463, "learning_rate": 9.787791366600465e-05, "loss": 0.0131, "step": 40350 }, { "epoch": 4.766399244926852, "grad_norm": 0.35607194900512695, "learning_rate": 9.787255598698281e-05, "loss": 0.0161, "step": 40400 }, { "epoch": 4.772298253893346, "grad_norm": 0.13870596885681152, "learning_rate": 9.786719170016829e-05, "loss": 0.0147, "step": 40450 }, { "epoch": 4.778197262859839, "grad_norm": 0.21849285066127777, "learning_rate": 9.786182080630154e-05, "loss": 0.0147, "step": 40500 }, { "epoch": 4.784096271826333, "grad_norm": 0.12867091596126556, "learning_rate": 9.785644330612389e-05, "loss": 0.0141, "step": 40550 }, { "epoch": 4.7899952807928265, "grad_norm": 0.3970583379268646, "learning_rate": 9.785105920037759e-05, "loss": 0.0151, "step": 40600 }, { "epoch": 4.79589428975932, "grad_norm": 0.2577088177204132, "learning_rate": 9.784566848980578e-05, "loss": 0.0129, "step": 40650 }, { "epoch": 4.801793298725814, "grad_norm": 0.1679120808839798, "learning_rate": 9.784027117515258e-05, "loss": 0.0149, "step": 40700 }, { "epoch": 4.8076923076923075, "grad_norm": 0.16444753110408783, "learning_rate": 9.783486725716293e-05, "loss": 0.0139, "step": 40750 }, { "epoch": 4.813591316658801, "grad_norm": 0.11007341742515564, "learning_rate": 9.782945673658276e-05, "loss": 0.0157, "step": 40800 }, { "epoch": 4.819490325625295, "grad_norm": 0.2131779044866562, "learning_rate": 9.782403961415885e-05, "loss": 0.0159, "step": 40850 }, { "epoch": 4.825389334591788, "grad_norm": 0.16144008934497833, "learning_rate": 9.781861589063895e-05, "loss": 0.0149, "step": 40900 }, { "epoch": 4.831288343558282, "grad_norm": 0.11072848737239838, "learning_rate": 9.781318556677166e-05, "loss": 0.014, "step": 40950 }, { "epoch": 4.837187352524776, "grad_norm": 0.5138325691223145, "learning_rate": 9.780774864330655e-05, "loss": 0.0152, "step": 41000 }, { "epoch": 4.837187352524776, "eval_cer": 0.09208261617900172, "eval_loss": 0.004712502472102642, "eval_runtime": 2.0166, "eval_samples_per_second": 49.589, "eval_steps_per_second": 1.984, "eval_wer": 0.29, "step": 41000 }, { "epoch": 4.843086361491269, "grad_norm": 0.3165017366409302, "learning_rate": 9.780230512099405e-05, "loss": 0.0161, "step": 41050 }, { "epoch": 4.848985370457763, "grad_norm": 0.32610559463500977, "learning_rate": 9.779685500058552e-05, "loss": 0.015, "step": 41100 }, { "epoch": 4.854884379424257, "grad_norm": 0.23756933212280273, "learning_rate": 9.779139828283324e-05, "loss": 0.0157, "step": 41150 }, { "epoch": 4.86078338839075, "grad_norm": 0.18311873078346252, "learning_rate": 9.77859349684904e-05, "loss": 0.0156, "step": 41200 }, { "epoch": 4.866682397357244, "grad_norm": 0.12603141367435455, "learning_rate": 9.77804650583111e-05, "loss": 0.0136, "step": 41250 }, { "epoch": 4.872581406323738, "grad_norm": 0.0803796648979187, "learning_rate": 9.777498855305033e-05, "loss": 0.0162, "step": 41300 }, { "epoch": 4.878480415290231, "grad_norm": 0.21031217277050018, "learning_rate": 9.776950545346404e-05, "loss": 0.0147, "step": 41350 }, { "epoch": 4.884379424256725, "grad_norm": 0.25660574436187744, "learning_rate": 9.7764015760309e-05, "loss": 0.0164, "step": 41400 }, { "epoch": 4.8902784332232185, "grad_norm": 0.17118404805660248, "learning_rate": 9.775851947434299e-05, "loss": 0.0165, "step": 41450 }, { "epoch": 4.896177442189712, "grad_norm": 0.21807195246219635, "learning_rate": 9.775301659632463e-05, "loss": 0.0129, "step": 41500 }, { "epoch": 4.902076451156206, "grad_norm": 0.23998522758483887, "learning_rate": 9.774750712701349e-05, "loss": 0.0145, "step": 41550 }, { "epoch": 4.9079754601226995, "grad_norm": 0.28835710883140564, "learning_rate": 9.774199106717003e-05, "loss": 0.0169, "step": 41600 }, { "epoch": 4.913874469089193, "grad_norm": 0.20525211095809937, "learning_rate": 9.773646841755564e-05, "loss": 0.0148, "step": 41650 }, { "epoch": 4.919773478055687, "grad_norm": 0.22650498151779175, "learning_rate": 9.773093917893257e-05, "loss": 0.0152, "step": 41700 }, { "epoch": 4.92567248702218, "grad_norm": 0.25918447971343994, "learning_rate": 9.772540335206406e-05, "loss": 0.0151, "step": 41750 }, { "epoch": 4.931571495988674, "grad_norm": 0.06975080817937851, "learning_rate": 9.771986093771416e-05, "loss": 0.0158, "step": 41800 }, { "epoch": 4.937470504955168, "grad_norm": 0.10984576493501663, "learning_rate": 9.771431193664795e-05, "loss": 0.0156, "step": 41850 }, { "epoch": 4.943369513921661, "grad_norm": 0.17649120092391968, "learning_rate": 9.770875634963129e-05, "loss": 0.0138, "step": 41900 }, { "epoch": 4.949268522888155, "grad_norm": 0.3092782199382782, "learning_rate": 9.770319417743105e-05, "loss": 0.0131, "step": 41950 }, { "epoch": 4.955167531854649, "grad_norm": 0.30232787132263184, "learning_rate": 9.769762542081496e-05, "loss": 0.0141, "step": 42000 }, { "epoch": 4.955167531854649, "eval_cer": 0.08433734939759036, "eval_loss": 0.0020190360955893993, "eval_runtime": 2.1207, "eval_samples_per_second": 47.155, "eval_steps_per_second": 1.886, "eval_wer": 0.26, "step": 42000 }, { "epoch": 4.961066540821142, "grad_norm": 0.298849493265152, "learning_rate": 9.769205008055164e-05, "loss": 0.0148, "step": 42050 }, { "epoch": 4.966965549787636, "grad_norm": 0.22196568548679352, "learning_rate": 9.768646815741069e-05, "loss": 0.0158, "step": 42100 }, { "epoch": 4.97286455875413, "grad_norm": 0.2014562338590622, "learning_rate": 9.768087965216257e-05, "loss": 0.0151, "step": 42150 }, { "epoch": 4.978763567720623, "grad_norm": 0.16048863530158997, "learning_rate": 9.767528456557862e-05, "loss": 0.014, "step": 42200 }, { "epoch": 4.984662576687117, "grad_norm": 0.15153883397579193, "learning_rate": 9.766968289843117e-05, "loss": 0.0124, "step": 42250 }, { "epoch": 4.9905615856536105, "grad_norm": 0.08848384022712708, "learning_rate": 9.766407465149337e-05, "loss": 0.0136, "step": 42300 }, { "epoch": 4.996460594620104, "grad_norm": 0.15477828681468964, "learning_rate": 9.765845982553935e-05, "loss": 0.0171, "step": 42350 }, { "epoch": 5.002359603586598, "grad_norm": 0.11676661670207977, "learning_rate": 9.76528384213441e-05, "loss": 0.0134, "step": 42400 }, { "epoch": 5.0082586125530915, "grad_norm": 0.15548166632652283, "learning_rate": 9.764721043968355e-05, "loss": 0.0137, "step": 42450 }, { "epoch": 5.014157621519585, "grad_norm": 0.20585298538208008, "learning_rate": 9.764157588133452e-05, "loss": 0.0137, "step": 42500 }, { "epoch": 5.020056630486079, "grad_norm": 0.12621235847473145, "learning_rate": 9.763593474707473e-05, "loss": 0.0117, "step": 42550 }, { "epoch": 5.025955639452572, "grad_norm": 0.2667747139930725, "learning_rate": 9.763028703768282e-05, "loss": 0.0123, "step": 42600 }, { "epoch": 5.031854648419065, "grad_norm": 0.1147405132651329, "learning_rate": 9.762463275393836e-05, "loss": 0.016, "step": 42650 }, { "epoch": 5.037753657385559, "grad_norm": 0.9805943965911865, "learning_rate": 9.761897189662178e-05, "loss": 0.0147, "step": 42700 }, { "epoch": 5.0436526663520524, "grad_norm": 0.07192052900791168, "learning_rate": 9.761330446651445e-05, "loss": 0.0126, "step": 42750 }, { "epoch": 5.049551675318546, "grad_norm": 0.2472480684518814, "learning_rate": 9.760763046439863e-05, "loss": 0.0129, "step": 42800 }, { "epoch": 5.05545068428504, "grad_norm": 0.18394730985164642, "learning_rate": 9.76019498910575e-05, "loss": 0.0151, "step": 42850 }, { "epoch": 5.061349693251533, "grad_norm": 0.17825838923454285, "learning_rate": 9.759626274727516e-05, "loss": 0.0128, "step": 42900 }, { "epoch": 5.067248702218027, "grad_norm": 0.19029362499713898, "learning_rate": 9.759056903383658e-05, "loss": 0.0127, "step": 42950 }, { "epoch": 5.073147711184521, "grad_norm": 0.21771086752414703, "learning_rate": 9.758486875152766e-05, "loss": 0.0137, "step": 43000 }, { "epoch": 5.073147711184521, "eval_cer": 0.08691910499139414, "eval_loss": 0.0022734475787729025, "eval_runtime": 2.032, "eval_samples_per_second": 49.213, "eval_steps_per_second": 1.969, "eval_wer": 0.27, "step": 43000 }, { "epoch": 5.079046720151014, "grad_norm": 0.16701284050941467, "learning_rate": 9.757916190113521e-05, "loss": 0.012, "step": 43050 }, { "epoch": 5.084945729117508, "grad_norm": 0.1381247639656067, "learning_rate": 9.757344848344694e-05, "loss": 0.0131, "step": 43100 }, { "epoch": 5.090844738084002, "grad_norm": 0.26033705472946167, "learning_rate": 9.756772849925145e-05, "loss": 0.015, "step": 43150 }, { "epoch": 5.096743747050495, "grad_norm": 0.2047414630651474, "learning_rate": 9.756200194933829e-05, "loss": 0.0133, "step": 43200 }, { "epoch": 5.102642756016989, "grad_norm": 0.18171359598636627, "learning_rate": 9.755626883449786e-05, "loss": 0.0117, "step": 43250 }, { "epoch": 5.108541764983483, "grad_norm": 0.1653711199760437, "learning_rate": 9.755052915552152e-05, "loss": 0.0147, "step": 43300 }, { "epoch": 5.114440773949976, "grad_norm": 0.24473267793655396, "learning_rate": 9.75447829132015e-05, "loss": 0.0138, "step": 43350 }, { "epoch": 5.12033978291647, "grad_norm": 0.2676519453525543, "learning_rate": 9.753903010833094e-05, "loss": 0.0147, "step": 43400 }, { "epoch": 5.1262387918829635, "grad_norm": 0.17994138598442078, "learning_rate": 9.753327074170391e-05, "loss": 0.015, "step": 43450 }, { "epoch": 5.132137800849457, "grad_norm": 0.3173046410083771, "learning_rate": 9.752750481411534e-05, "loss": 0.0139, "step": 43500 }, { "epoch": 5.138036809815951, "grad_norm": 0.19729779660701752, "learning_rate": 9.752173232636112e-05, "loss": 0.0121, "step": 43550 }, { "epoch": 5.143935818782444, "grad_norm": 0.1806355118751526, "learning_rate": 9.751595327923802e-05, "loss": 0.0125, "step": 43600 }, { "epoch": 5.149834827748938, "grad_norm": 0.09857175499200821, "learning_rate": 9.75101676735437e-05, "loss": 0.0119, "step": 43650 }, { "epoch": 5.155733836715432, "grad_norm": 0.22470448911190033, "learning_rate": 9.750437551007676e-05, "loss": 0.0136, "step": 43700 }, { "epoch": 5.161632845681925, "grad_norm": 0.3431908190250397, "learning_rate": 9.749857678963668e-05, "loss": 0.0138, "step": 43750 }, { "epoch": 5.167531854648419, "grad_norm": 0.12854793667793274, "learning_rate": 9.749277151302383e-05, "loss": 0.0129, "step": 43800 }, { "epoch": 5.173430863614913, "grad_norm": 0.23574602603912354, "learning_rate": 9.748695968103952e-05, "loss": 0.0119, "step": 43850 }, { "epoch": 5.179329872581406, "grad_norm": 0.2726353108882904, "learning_rate": 9.748114129448594e-05, "loss": 0.0153, "step": 43900 }, { "epoch": 5.1852288815479, "grad_norm": 0.19473299384117126, "learning_rate": 9.747531635416622e-05, "loss": 0.014, "step": 43950 }, { "epoch": 5.191127890514394, "grad_norm": 0.16663016378879547, "learning_rate": 9.746948486088435e-05, "loss": 0.0116, "step": 44000 }, { "epoch": 5.191127890514394, "eval_cer": 0.08519793459552495, "eval_loss": 0.0037201859522610903, "eval_runtime": 2.01, "eval_samples_per_second": 49.752, "eval_steps_per_second": 1.99, "eval_wer": 0.26, "step": 44000 }, { "epoch": 5.197026899480887, "grad_norm": 0.20310349762439728, "learning_rate": 9.746364681544525e-05, "loss": 0.0121, "step": 44050 }, { "epoch": 5.202925908447381, "grad_norm": 0.21634447574615479, "learning_rate": 9.745780221865474e-05, "loss": 0.014, "step": 44100 }, { "epoch": 5.2088249174138745, "grad_norm": 0.1820598691701889, "learning_rate": 9.745195107131953e-05, "loss": 0.0127, "step": 44150 }, { "epoch": 5.214723926380368, "grad_norm": 0.22680197656154633, "learning_rate": 9.744609337424727e-05, "loss": 0.014, "step": 44200 }, { "epoch": 5.220622935346862, "grad_norm": 0.17297236621379852, "learning_rate": 9.744022912824649e-05, "loss": 0.0136, "step": 44250 }, { "epoch": 5.2265219443133555, "grad_norm": 0.21963928639888763, "learning_rate": 9.743435833412659e-05, "loss": 0.0114, "step": 44300 }, { "epoch": 5.232420953279849, "grad_norm": 0.13144555687904358, "learning_rate": 9.742848099269793e-05, "loss": 0.013, "step": 44350 }, { "epoch": 5.238319962246343, "grad_norm": 0.234639972448349, "learning_rate": 9.742259710477177e-05, "loss": 0.0129, "step": 44400 }, { "epoch": 5.244218971212836, "grad_norm": 0.21274390816688538, "learning_rate": 9.741670667116023e-05, "loss": 0.0132, "step": 44450 }, { "epoch": 5.25011798017933, "grad_norm": 0.16458329558372498, "learning_rate": 9.741080969267637e-05, "loss": 0.012, "step": 44500 }, { "epoch": 5.256016989145824, "grad_norm": 0.18307918310165405, "learning_rate": 9.740490617013416e-05, "loss": 0.0131, "step": 44550 }, { "epoch": 5.261915998112317, "grad_norm": 0.30608904361724854, "learning_rate": 9.739899610434841e-05, "loss": 0.0122, "step": 44600 }, { "epoch": 5.267815007078811, "grad_norm": 0.19642053544521332, "learning_rate": 9.739307949613494e-05, "loss": 0.0124, "step": 44650 }, { "epoch": 5.273714016045305, "grad_norm": 0.2871667444705963, "learning_rate": 9.738715634631038e-05, "loss": 0.014, "step": 44700 }, { "epoch": 5.279613025011798, "grad_norm": 0.35782504081726074, "learning_rate": 9.738122665569227e-05, "loss": 0.0128, "step": 44750 }, { "epoch": 5.285512033978292, "grad_norm": 0.32668036222457886, "learning_rate": 9.737529042509913e-05, "loss": 0.0139, "step": 44800 }, { "epoch": 5.291411042944786, "grad_norm": 0.15733739733695984, "learning_rate": 9.736934765535029e-05, "loss": 0.0138, "step": 44850 }, { "epoch": 5.297310051911279, "grad_norm": 0.20906807482242584, "learning_rate": 9.736339834726604e-05, "loss": 0.0136, "step": 44900 }, { "epoch": 5.303209060877773, "grad_norm": 0.14966309070587158, "learning_rate": 9.735744250166759e-05, "loss": 0.0129, "step": 44950 }, { "epoch": 5.3091080698442665, "grad_norm": 0.22560735046863556, "learning_rate": 9.735148011937694e-05, "loss": 0.0147, "step": 45000 }, { "epoch": 5.3091080698442665, "eval_cer": 0.08777969018932874, "eval_loss": 0.005522606428712606, "eval_runtime": 2.0311, "eval_samples_per_second": 49.235, "eval_steps_per_second": 1.969, "eval_wer": 0.27, "step": 45000 }, { "epoch": 5.31500707881076, "grad_norm": 0.06632822006940842, "learning_rate": 9.734551120121714e-05, "loss": 0.0129, "step": 45050 }, { "epoch": 5.320906087777254, "grad_norm": 0.2870294451713562, "learning_rate": 9.733953574801203e-05, "loss": 0.0131, "step": 45100 }, { "epoch": 5.3268050967437475, "grad_norm": 0.24803125858306885, "learning_rate": 9.733355376058643e-05, "loss": 0.0143, "step": 45150 }, { "epoch": 5.33270410571024, "grad_norm": 0.2511065900325775, "learning_rate": 9.7327565239766e-05, "loss": 0.0117, "step": 45200 }, { "epoch": 5.338603114676735, "grad_norm": 0.18628984689712524, "learning_rate": 9.732157018637733e-05, "loss": 0.0144, "step": 45250 }, { "epoch": 5.3445021236432275, "grad_norm": 0.5666053891181946, "learning_rate": 9.731556860124793e-05, "loss": 0.0133, "step": 45300 }, { "epoch": 5.350401132609721, "grad_norm": 0.19458810985088348, "learning_rate": 9.730956048520618e-05, "loss": 0.0141, "step": 45350 }, { "epoch": 5.356300141576215, "grad_norm": 0.28090450167655945, "learning_rate": 9.730354583908137e-05, "loss": 0.0124, "step": 45400 }, { "epoch": 5.3621991505427085, "grad_norm": 0.18761464953422546, "learning_rate": 9.72975246637037e-05, "loss": 0.0134, "step": 45450 }, { "epoch": 5.368098159509202, "grad_norm": 0.09367839992046356, "learning_rate": 9.729149695990425e-05, "loss": 0.0128, "step": 45500 }, { "epoch": 5.373997168475696, "grad_norm": 0.14329609274864197, "learning_rate": 9.728546272851504e-05, "loss": 0.0141, "step": 45550 }, { "epoch": 5.379896177442189, "grad_norm": 0.0946892648935318, "learning_rate": 9.727942197036895e-05, "loss": 0.0133, "step": 45600 }, { "epoch": 5.385795186408683, "grad_norm": 0.2744847238063812, "learning_rate": 9.72733746862998e-05, "loss": 0.0119, "step": 45650 }, { "epoch": 5.391694195375177, "grad_norm": 0.15489462018013, "learning_rate": 9.726732087714228e-05, "loss": 0.0137, "step": 45700 }, { "epoch": 5.39759320434167, "grad_norm": 0.1209046021103859, "learning_rate": 9.726126054373196e-05, "loss": 0.0137, "step": 45750 }, { "epoch": 5.403492213308164, "grad_norm": 0.6185845732688904, "learning_rate": 9.725519368690538e-05, "loss": 0.0145, "step": 45800 }, { "epoch": 5.409391222274658, "grad_norm": 0.15894515812397003, "learning_rate": 9.724912030749993e-05, "loss": 0.0136, "step": 45850 }, { "epoch": 5.415290231241151, "grad_norm": 0.21691733598709106, "learning_rate": 9.724304040635391e-05, "loss": 0.0133, "step": 45900 }, { "epoch": 5.421189240207645, "grad_norm": 0.11285976320505142, "learning_rate": 9.723695398430652e-05, "loss": 0.0127, "step": 45950 }, { "epoch": 5.427088249174139, "grad_norm": 0.39303159713745117, "learning_rate": 9.723086104219787e-05, "loss": 0.0138, "step": 46000 }, { "epoch": 5.427088249174139, "eval_cer": 0.08519793459552495, "eval_loss": 0.0026279655285179615, "eval_runtime": 2.0908, "eval_samples_per_second": 47.829, "eval_steps_per_second": 1.913, "eval_wer": 0.26, "step": 46000 }, { "epoch": 5.432987258140632, "grad_norm": 0.14232246577739716, "learning_rate": 9.722476158086896e-05, "loss": 0.0124, "step": 46050 }, { "epoch": 5.438886267107126, "grad_norm": 0.11032086610794067, "learning_rate": 9.721865560116166e-05, "loss": 0.012, "step": 46100 }, { "epoch": 5.4447852760736195, "grad_norm": 0.1801489144563675, "learning_rate": 9.72125431039188e-05, "loss": 0.012, "step": 46150 }, { "epoch": 5.450684285040113, "grad_norm": 0.19257621467113495, "learning_rate": 9.72064240899841e-05, "loss": 0.0112, "step": 46200 }, { "epoch": 5.456583294006607, "grad_norm": 0.12765991687774658, "learning_rate": 9.720029856020212e-05, "loss": 0.0147, "step": 46250 }, { "epoch": 5.4624823029731004, "grad_norm": 0.19845658540725708, "learning_rate": 9.719416651541839e-05, "loss": 0.0122, "step": 46300 }, { "epoch": 5.468381311939594, "grad_norm": 0.09958872199058533, "learning_rate": 9.718802795647927e-05, "loss": 0.0155, "step": 46350 }, { "epoch": 5.474280320906088, "grad_norm": 0.9268890619277954, "learning_rate": 9.718188288423212e-05, "loss": 0.0171, "step": 46400 }, { "epoch": 5.480179329872581, "grad_norm": 0.36984503269195557, "learning_rate": 9.717573129952509e-05, "loss": 0.0138, "step": 46450 }, { "epoch": 5.486078338839075, "grad_norm": 0.13849887251853943, "learning_rate": 9.716957320320727e-05, "loss": 0.0128, "step": 46500 }, { "epoch": 5.491977347805569, "grad_norm": 0.20703719556331635, "learning_rate": 9.71634085961287e-05, "loss": 0.0135, "step": 46550 }, { "epoch": 5.497876356772062, "grad_norm": 0.5271524786949158, "learning_rate": 9.715723747914023e-05, "loss": 0.0129, "step": 46600 }, { "epoch": 5.503775365738556, "grad_norm": 0.17821401357650757, "learning_rate": 9.715105985309368e-05, "loss": 0.0145, "step": 46650 }, { "epoch": 5.50967437470505, "grad_norm": 0.15053100883960724, "learning_rate": 9.714487571884172e-05, "loss": 0.0139, "step": 46700 }, { "epoch": 5.515573383671543, "grad_norm": 0.15797396004199982, "learning_rate": 9.713868507723793e-05, "loss": 0.0122, "step": 46750 }, { "epoch": 5.521472392638037, "grad_norm": 0.14999975264072418, "learning_rate": 9.713248792913685e-05, "loss": 0.0136, "step": 46800 }, { "epoch": 5.5273714016045306, "grad_norm": 0.0659950003027916, "learning_rate": 9.712628427539381e-05, "loss": 0.0124, "step": 46850 }, { "epoch": 5.533270410571024, "grad_norm": 0.3931555449962616, "learning_rate": 9.712007411686512e-05, "loss": 0.0123, "step": 46900 }, { "epoch": 5.539169419537518, "grad_norm": 0.29410138726234436, "learning_rate": 9.711385745440794e-05, "loss": 0.0104, "step": 46950 }, { "epoch": 5.5450684285040115, "grad_norm": 0.4489244818687439, "learning_rate": 9.710763428888036e-05, "loss": 0.013, "step": 47000 }, { "epoch": 5.5450684285040115, "eval_cer": 0.08691910499139414, "eval_loss": 0.004845923278480768, "eval_runtime": 2.011, "eval_samples_per_second": 49.726, "eval_steps_per_second": 1.989, "eval_wer": 0.27, "step": 47000 }, { "epoch": 5.550967437470505, "grad_norm": 0.14007315039634705, "learning_rate": 9.710140462114138e-05, "loss": 0.0134, "step": 47050 }, { "epoch": 5.556866446436999, "grad_norm": 0.14388678967952728, "learning_rate": 9.709516845205084e-05, "loss": 0.0126, "step": 47100 }, { "epoch": 5.562765455403492, "grad_norm": 0.25200363993644714, "learning_rate": 9.708892578246953e-05, "loss": 0.0138, "step": 47150 }, { "epoch": 5.568664464369986, "grad_norm": 0.20539125800132751, "learning_rate": 9.708267661325909e-05, "loss": 0.0133, "step": 47200 }, { "epoch": 5.57456347333648, "grad_norm": 0.1160576269030571, "learning_rate": 9.707642094528213e-05, "loss": 0.0128, "step": 47250 }, { "epoch": 5.580462482302973, "grad_norm": 0.3053942918777466, "learning_rate": 9.707015877940209e-05, "loss": 0.0133, "step": 47300 }, { "epoch": 5.586361491269467, "grad_norm": 0.20198199152946472, "learning_rate": 9.706389011648332e-05, "loss": 0.0142, "step": 47350 }, { "epoch": 5.592260500235961, "grad_norm": 0.1342591941356659, "learning_rate": 9.705761495739107e-05, "loss": 0.0142, "step": 47400 }, { "epoch": 5.598159509202454, "grad_norm": 0.2812662124633789, "learning_rate": 9.705133330299153e-05, "loss": 0.0136, "step": 47450 }, { "epoch": 5.604058518168948, "grad_norm": 0.11323291808366776, "learning_rate": 9.704504515415173e-05, "loss": 0.0137, "step": 47500 }, { "epoch": 5.609957527135442, "grad_norm": 0.15867789089679718, "learning_rate": 9.703875051173959e-05, "loss": 0.011, "step": 47550 }, { "epoch": 5.615856536101935, "grad_norm": 0.22813838720321655, "learning_rate": 9.7032449376624e-05, "loss": 0.0123, "step": 47600 }, { "epoch": 5.621755545068428, "grad_norm": 0.21685156226158142, "learning_rate": 9.702614174967466e-05, "loss": 0.013, "step": 47650 }, { "epoch": 5.6276545540349225, "grad_norm": 0.16061468422412872, "learning_rate": 9.701982763176222e-05, "loss": 0.0137, "step": 47700 }, { "epoch": 5.633553563001415, "grad_norm": 0.3179032504558563, "learning_rate": 9.701350702375821e-05, "loss": 0.0126, "step": 47750 }, { "epoch": 5.63945257196791, "grad_norm": 0.17640653252601624, "learning_rate": 9.700717992653506e-05, "loss": 0.0124, "step": 47800 }, { "epoch": 5.645351580934403, "grad_norm": 0.2296363264322281, "learning_rate": 9.700084634096607e-05, "loss": 0.0142, "step": 47850 }, { "epoch": 5.651250589900897, "grad_norm": 0.11965318769216537, "learning_rate": 9.699450626792548e-05, "loss": 0.0139, "step": 47900 }, { "epoch": 5.65714959886739, "grad_norm": 0.31868860125541687, "learning_rate": 9.698815970828841e-05, "loss": 0.0122, "step": 47950 }, { "epoch": 5.6630486078338835, "grad_norm": 0.1602361500263214, "learning_rate": 9.698180666293083e-05, "loss": 0.0146, "step": 48000 }, { "epoch": 5.6630486078338835, "eval_cer": 0.08777969018932874, "eval_loss": 0.006117711775004864, "eval_runtime": 2.0887, "eval_samples_per_second": 47.877, "eval_steps_per_second": 1.915, "eval_wer": 0.27, "step": 48000 }, { "epoch": 5.668947616800377, "grad_norm": 0.10578951239585876, "learning_rate": 9.697544713272969e-05, "loss": 0.0135, "step": 48050 }, { "epoch": 5.674846625766871, "grad_norm": 0.20654989778995514, "learning_rate": 9.696908111856275e-05, "loss": 0.0158, "step": 48100 }, { "epoch": 5.6807456347333645, "grad_norm": 0.23349711298942566, "learning_rate": 9.696270862130872e-05, "loss": 0.0154, "step": 48150 }, { "epoch": 5.686644643699858, "grad_norm": 0.1499098390340805, "learning_rate": 9.69563296418472e-05, "loss": 0.0125, "step": 48200 }, { "epoch": 5.692543652666352, "grad_norm": 0.08564648032188416, "learning_rate": 9.694994418105865e-05, "loss": 0.0129, "step": 48250 }, { "epoch": 5.698442661632845, "grad_norm": 0.22087514400482178, "learning_rate": 9.694355223982446e-05, "loss": 0.0129, "step": 48300 }, { "epoch": 5.704341670599339, "grad_norm": 0.0601191371679306, "learning_rate": 9.69371538190269e-05, "loss": 0.0123, "step": 48350 }, { "epoch": 5.710240679565833, "grad_norm": 0.23807287216186523, "learning_rate": 9.693074891954915e-05, "loss": 0.0112, "step": 48400 }, { "epoch": 5.716139688532326, "grad_norm": 0.18858177959918976, "learning_rate": 9.692433754227525e-05, "loss": 0.0156, "step": 48450 }, { "epoch": 5.72203869749882, "grad_norm": 0.1419638842344284, "learning_rate": 9.691791968809016e-05, "loss": 0.0135, "step": 48500 }, { "epoch": 5.727937706465314, "grad_norm": 0.15836025774478912, "learning_rate": 9.691149535787974e-05, "loss": 0.0155, "step": 48550 }, { "epoch": 5.733836715431807, "grad_norm": 0.15265315771102905, "learning_rate": 9.690506455253073e-05, "loss": 0.0135, "step": 48600 }, { "epoch": 5.739735724398301, "grad_norm": 0.2320513278245926, "learning_rate": 9.689862727293073e-05, "loss": 0.013, "step": 48650 }, { "epoch": 5.745634733364795, "grad_norm": 0.2584886848926544, "learning_rate": 9.689218351996833e-05, "loss": 0.0142, "step": 48700 }, { "epoch": 5.751533742331288, "grad_norm": 0.5189709067344666, "learning_rate": 9.688573329453292e-05, "loss": 0.0132, "step": 48750 }, { "epoch": 5.757432751297782, "grad_norm": 0.15239682793617249, "learning_rate": 9.687927659751481e-05, "loss": 0.0116, "step": 48800 }, { "epoch": 5.7633317602642755, "grad_norm": 0.23002533614635468, "learning_rate": 9.687281342980523e-05, "loss": 0.0123, "step": 48850 }, { "epoch": 5.769230769230769, "grad_norm": 0.32338786125183105, "learning_rate": 9.686634379229627e-05, "loss": 0.0136, "step": 48900 }, { "epoch": 5.775129778197263, "grad_norm": 0.15144482254981995, "learning_rate": 9.685986768588094e-05, "loss": 0.0137, "step": 48950 }, { "epoch": 5.7810287871637565, "grad_norm": 0.09470724314451218, "learning_rate": 9.685338511145312e-05, "loss": 0.0123, "step": 49000 }, { "epoch": 5.7810287871637565, "eval_cer": 0.08691910499139414, "eval_loss": 0.003697662614285946, "eval_runtime": 2.0194, "eval_samples_per_second": 49.521, "eval_steps_per_second": 1.981, "eval_wer": 0.27, "step": 49000 }, { "epoch": 5.78692779613025, "grad_norm": 0.6690936088562012, "learning_rate": 9.68468960699076e-05, "loss": 0.0145, "step": 49050 }, { "epoch": 5.792826805096744, "grad_norm": 0.44640296697616577, "learning_rate": 9.684040056214003e-05, "loss": 0.0114, "step": 49100 }, { "epoch": 5.798725814063237, "grad_norm": 0.19277842342853546, "learning_rate": 9.683389858904702e-05, "loss": 0.0159, "step": 49150 }, { "epoch": 5.804624823029731, "grad_norm": 0.25648176670074463, "learning_rate": 9.682739015152598e-05, "loss": 0.0135, "step": 49200 }, { "epoch": 5.810523831996225, "grad_norm": 0.2654130756855011, "learning_rate": 9.68208752504753e-05, "loss": 0.0131, "step": 49250 }, { "epoch": 5.816422840962718, "grad_norm": 0.3069216310977936, "learning_rate": 9.68143538867942e-05, "loss": 0.015, "step": 49300 }, { "epoch": 5.822321849929212, "grad_norm": 0.49635350704193115, "learning_rate": 9.680782606138285e-05, "loss": 0.0147, "step": 49350 }, { "epoch": 5.828220858895706, "grad_norm": 0.1419650912284851, "learning_rate": 9.680129177514225e-05, "loss": 0.0126, "step": 49400 }, { "epoch": 5.834119867862199, "grad_norm": 0.08900855481624603, "learning_rate": 9.679475102897432e-05, "loss": 0.012, "step": 49450 }, { "epoch": 5.840018876828693, "grad_norm": 0.1338479071855545, "learning_rate": 9.678820382378188e-05, "loss": 0.0103, "step": 49500 }, { "epoch": 5.845917885795187, "grad_norm": 0.22418507933616638, "learning_rate": 9.678165016046863e-05, "loss": 0.013, "step": 49550 }, { "epoch": 5.85181689476168, "grad_norm": 0.29663076996803284, "learning_rate": 9.677509003993915e-05, "loss": 0.014, "step": 49600 }, { "epoch": 5.857715903728174, "grad_norm": 0.06203584000468254, "learning_rate": 9.676852346309896e-05, "loss": 0.0135, "step": 49650 }, { "epoch": 5.8636149126946675, "grad_norm": 0.08449065685272217, "learning_rate": 9.676195043085441e-05, "loss": 0.013, "step": 49700 }, { "epoch": 5.869513921661161, "grad_norm": 0.20869465172290802, "learning_rate": 9.675537094411278e-05, "loss": 0.0126, "step": 49750 }, { "epoch": 5.875412930627655, "grad_norm": 0.13869403302669525, "learning_rate": 9.674878500378222e-05, "loss": 0.0132, "step": 49800 }, { "epoch": 5.881311939594148, "grad_norm": 0.2849960923194885, "learning_rate": 9.674219261077178e-05, "loss": 0.0126, "step": 49850 }, { "epoch": 5.887210948560642, "grad_norm": 0.1197873055934906, "learning_rate": 9.67355937659914e-05, "loss": 0.0139, "step": 49900 }, { "epoch": 5.893109957527136, "grad_norm": 0.1069718450307846, "learning_rate": 9.672898847035192e-05, "loss": 0.0159, "step": 49950 }, { "epoch": 5.899008966493629, "grad_norm": 0.23524530231952667, "learning_rate": 9.672237672476506e-05, "loss": 0.0132, "step": 50000 }, { "epoch": 5.899008966493629, "eval_cer": 0.08950086058519793, "eval_loss": 0.003184175118803978, "eval_runtime": 2.032, "eval_samples_per_second": 49.213, "eval_steps_per_second": 1.969, "eval_wer": 0.28, "step": 50000 }, { "epoch": 5.904907975460123, "grad_norm": 0.11993358284235, "learning_rate": 9.671575853014343e-05, "loss": 0.0144, "step": 50050 }, { "epoch": 5.910806984426617, "grad_norm": 0.28592345118522644, "learning_rate": 9.670913388740051e-05, "loss": 0.0124, "step": 50100 }, { "epoch": 5.91670599339311, "grad_norm": 0.10284174978733063, "learning_rate": 9.670250279745073e-05, "loss": 0.0128, "step": 50150 }, { "epoch": 5.922605002359604, "grad_norm": 0.20037025213241577, "learning_rate": 9.669586526120936e-05, "loss": 0.0143, "step": 50200 }, { "epoch": 5.928504011326098, "grad_norm": 0.5127488970756531, "learning_rate": 9.668922127959253e-05, "loss": 0.0123, "step": 50250 }, { "epoch": 5.93440302029259, "grad_norm": 0.20731499791145325, "learning_rate": 9.668257085351735e-05, "loss": 0.0135, "step": 50300 }, { "epoch": 5.940302029259085, "grad_norm": 0.1911650002002716, "learning_rate": 9.667591398390176e-05, "loss": 0.0127, "step": 50350 }, { "epoch": 5.946201038225578, "grad_norm": 0.2594684958457947, "learning_rate": 9.66692506716646e-05, "loss": 0.0124, "step": 50400 }, { "epoch": 5.952100047192072, "grad_norm": 0.16972830891609192, "learning_rate": 9.666258091772559e-05, "loss": 0.0113, "step": 50450 }, { "epoch": 5.957999056158565, "grad_norm": 0.1387680619955063, "learning_rate": 9.665590472300534e-05, "loss": 0.0151, "step": 50500 }, { "epoch": 5.9638980651250595, "grad_norm": 0.12183617800474167, "learning_rate": 9.664922208842538e-05, "loss": 0.0132, "step": 50550 }, { "epoch": 5.969797074091552, "grad_norm": 0.3173953592777252, "learning_rate": 9.66425330149081e-05, "loss": 0.0119, "step": 50600 }, { "epoch": 5.975696083058046, "grad_norm": 0.30532410740852356, "learning_rate": 9.663583750337677e-05, "loss": 0.0133, "step": 50650 }, { "epoch": 5.9815950920245395, "grad_norm": 0.48345914483070374, "learning_rate": 9.662913555475558e-05, "loss": 0.0118, "step": 50700 }, { "epoch": 5.987494100991033, "grad_norm": 0.4079224169254303, "learning_rate": 9.662242716996959e-05, "loss": 0.0145, "step": 50750 }, { "epoch": 5.993393109957527, "grad_norm": 0.14965760707855225, "learning_rate": 9.661571234994475e-05, "loss": 0.0122, "step": 50800 }, { "epoch": 5.9992921189240205, "grad_norm": 0.3367091417312622, "learning_rate": 9.660899109560788e-05, "loss": 0.0134, "step": 50850 }, { "epoch": 6.005191127890514, "grad_norm": 0.13113732635974884, "learning_rate": 9.660226340788674e-05, "loss": 0.0125, "step": 50900 }, { "epoch": 6.011090136857008, "grad_norm": 0.22865794599056244, "learning_rate": 9.659552928770991e-05, "loss": 0.0106, "step": 50950 }, { "epoch": 6.016989145823501, "grad_norm": 0.2045484185218811, "learning_rate": 9.658878873600692e-05, "loss": 0.0106, "step": 51000 }, { "epoch": 6.016989145823501, "eval_cer": 0.09294320137693632, "eval_loss": 0.006017682142555714, "eval_runtime": 2.0433, "eval_samples_per_second": 48.941, "eval_steps_per_second": 1.958, "eval_wer": 0.3, "step": 51000 }, { "epoch": 6.022888154789995, "grad_norm": 0.06134037300944328, "learning_rate": 9.658204175370815e-05, "loss": 0.0105, "step": 51050 }, { "epoch": 6.028787163756489, "grad_norm": 0.19204458594322205, "learning_rate": 9.657528834174488e-05, "loss": 0.0118, "step": 51100 }, { "epoch": 6.034686172722982, "grad_norm": 0.14361870288848877, "learning_rate": 9.656852850104927e-05, "loss": 0.0108, "step": 51150 }, { "epoch": 6.040585181689476, "grad_norm": 0.1626593917608261, "learning_rate": 9.656176223255439e-05, "loss": 0.0109, "step": 51200 }, { "epoch": 6.04648419065597, "grad_norm": 0.15699218213558197, "learning_rate": 9.655498953719415e-05, "loss": 0.0119, "step": 51250 }, { "epoch": 6.052383199622463, "grad_norm": 0.1550770252943039, "learning_rate": 9.654821041590341e-05, "loss": 0.0129, "step": 51300 }, { "epoch": 6.058282208588957, "grad_norm": 0.11197759956121445, "learning_rate": 9.654142486961786e-05, "loss": 0.0111, "step": 51350 }, { "epoch": 6.064181217555451, "grad_norm": 0.23946014046669006, "learning_rate": 9.653463289927411e-05, "loss": 0.0107, "step": 51400 }, { "epoch": 6.070080226521944, "grad_norm": 0.04698847234249115, "learning_rate": 9.652783450580964e-05, "loss": 0.0086, "step": 51450 }, { "epoch": 6.075979235488438, "grad_norm": 0.21385595202445984, "learning_rate": 9.652102969016284e-05, "loss": 0.0113, "step": 51500 }, { "epoch": 6.0818782444549315, "grad_norm": 0.10789880901575089, "learning_rate": 9.651421845327296e-05, "loss": 0.0119, "step": 51550 }, { "epoch": 6.087777253421425, "grad_norm": 0.27113524079322815, "learning_rate": 9.650740079608014e-05, "loss": 0.0104, "step": 51600 }, { "epoch": 6.093676262387919, "grad_norm": 0.12847699224948883, "learning_rate": 9.650057671952542e-05, "loss": 0.0109, "step": 51650 }, { "epoch": 6.0995752713544125, "grad_norm": 0.1926417052745819, "learning_rate": 9.649374622455074e-05, "loss": 0.0125, "step": 51700 }, { "epoch": 6.105474280320906, "grad_norm": 0.20949354767799377, "learning_rate": 9.648690931209887e-05, "loss": 0.0147, "step": 51750 }, { "epoch": 6.1113732892874, "grad_norm": 0.2437167912721634, "learning_rate": 9.648006598311353e-05, "loss": 0.014, "step": 51800 }, { "epoch": 6.117272298253893, "grad_norm": 0.32109299302101135, "learning_rate": 9.647321623853929e-05, "loss": 0.0095, "step": 51850 }, { "epoch": 6.123171307220387, "grad_norm": 0.21691176295280457, "learning_rate": 9.646636007932159e-05, "loss": 0.0125, "step": 51900 }, { "epoch": 6.129070316186881, "grad_norm": 0.11057230830192566, "learning_rate": 9.645949750640679e-05, "loss": 0.0105, "step": 51950 }, { "epoch": 6.134969325153374, "grad_norm": 0.22997784614562988, "learning_rate": 9.645262852074216e-05, "loss": 0.0107, "step": 52000 }, { "epoch": 6.134969325153374, "eval_cer": 0.08864027538726334, "eval_loss": 0.004884571302682161, "eval_runtime": 2.0757, "eval_samples_per_second": 48.176, "eval_steps_per_second": 1.927, "eval_wer": 0.28, "step": 52000 }, { "epoch": 6.140868334119868, "grad_norm": 0.19583098590373993, "learning_rate": 9.644575312327575e-05, "loss": 0.0109, "step": 52050 }, { "epoch": 6.146767343086362, "grad_norm": 0.2955881953239441, "learning_rate": 9.643887131495661e-05, "loss": 0.011, "step": 52100 }, { "epoch": 6.152666352052855, "grad_norm": 0.13174374401569366, "learning_rate": 9.64319830967346e-05, "loss": 0.0117, "step": 52150 }, { "epoch": 6.158565361019349, "grad_norm": 0.21649867296218872, "learning_rate": 9.642508846956053e-05, "loss": 0.0123, "step": 52200 }, { "epoch": 6.164464369985843, "grad_norm": 0.24348969757556915, "learning_rate": 9.641818743438604e-05, "loss": 0.0114, "step": 52250 }, { "epoch": 6.170363378952336, "grad_norm": 0.1887369155883789, "learning_rate": 9.641127999216366e-05, "loss": 0.0125, "step": 52300 }, { "epoch": 6.17626238791883, "grad_norm": 0.2097659707069397, "learning_rate": 9.640436614384683e-05, "loss": 0.0108, "step": 52350 }, { "epoch": 6.1821613968853235, "grad_norm": 0.1560640037059784, "learning_rate": 9.639744589038984e-05, "loss": 0.0121, "step": 52400 }, { "epoch": 6.188060405851817, "grad_norm": 0.19053038954734802, "learning_rate": 9.63905192327479e-05, "loss": 0.0132, "step": 52450 }, { "epoch": 6.193959414818311, "grad_norm": 0.3104229271411896, "learning_rate": 9.63835861718771e-05, "loss": 0.0149, "step": 52500 }, { "epoch": 6.1998584237848045, "grad_norm": 0.349936842918396, "learning_rate": 9.637664670873439e-05, "loss": 0.0121, "step": 52550 }, { "epoch": 6.205757432751298, "grad_norm": 0.388614684343338, "learning_rate": 9.636970084427759e-05, "loss": 0.0136, "step": 52600 }, { "epoch": 6.211656441717792, "grad_norm": 0.20306573808193207, "learning_rate": 9.636274857946547e-05, "loss": 0.0135, "step": 52650 }, { "epoch": 6.217555450684285, "grad_norm": 0.1503097265958786, "learning_rate": 9.635578991525764e-05, "loss": 0.0097, "step": 52700 }, { "epoch": 6.223454459650779, "grad_norm": 0.2394477277994156, "learning_rate": 9.634882485261458e-05, "loss": 0.0132, "step": 52750 }, { "epoch": 6.229353468617273, "grad_norm": 0.3438850939273834, "learning_rate": 9.634185339249766e-05, "loss": 0.0103, "step": 52800 }, { "epoch": 6.235252477583766, "grad_norm": 0.13622817397117615, "learning_rate": 9.633487553586916e-05, "loss": 0.0128, "step": 52850 }, { "epoch": 6.24115148655026, "grad_norm": 0.16474422812461853, "learning_rate": 9.632789128369222e-05, "loss": 0.0123, "step": 52900 }, { "epoch": 6.247050495516753, "grad_norm": 0.10336677730083466, "learning_rate": 9.632090063693088e-05, "loss": 0.0129, "step": 52950 }, { "epoch": 6.252949504483247, "grad_norm": 0.0994843915104866, "learning_rate": 9.631390359655004e-05, "loss": 0.0125, "step": 53000 }, { "epoch": 6.252949504483247, "eval_cer": 0.08950086058519793, "eval_loss": 0.00645048450678587, "eval_runtime": 2.0591, "eval_samples_per_second": 48.565, "eval_steps_per_second": 1.943, "eval_wer": 0.29, "step": 53000 }, { "epoch": 6.25884851344974, "grad_norm": 0.22904548048973083, "learning_rate": 9.630690016351547e-05, "loss": 0.0109, "step": 53050 }, { "epoch": 6.264747522416234, "grad_norm": 0.2753964960575104, "learning_rate": 9.62998903387939e-05, "loss": 0.0134, "step": 53100 }, { "epoch": 6.270646531382727, "grad_norm": 0.27785664796829224, "learning_rate": 9.629287412335284e-05, "loss": 0.0105, "step": 53150 }, { "epoch": 6.276545540349221, "grad_norm": 0.14507389068603516, "learning_rate": 9.628585151816074e-05, "loss": 0.0114, "step": 53200 }, { "epoch": 6.282444549315715, "grad_norm": 0.1471472680568695, "learning_rate": 9.627882252418695e-05, "loss": 0.0133, "step": 53250 }, { "epoch": 6.288343558282208, "grad_norm": 0.18696172535419464, "learning_rate": 9.627178714240163e-05, "loss": 0.0128, "step": 53300 }, { "epoch": 6.294242567248702, "grad_norm": 0.11634916812181473, "learning_rate": 9.62647453737759e-05, "loss": 0.0128, "step": 53350 }, { "epoch": 6.3001415762151955, "grad_norm": 0.27637767791748047, "learning_rate": 9.625769721928172e-05, "loss": 0.0112, "step": 53400 }, { "epoch": 6.306040585181689, "grad_norm": 0.31218603253364563, "learning_rate": 9.625064267989192e-05, "loss": 0.0125, "step": 53450 }, { "epoch": 6.311939594148183, "grad_norm": 0.1500752866268158, "learning_rate": 9.624358175658025e-05, "loss": 0.0122, "step": 53500 }, { "epoch": 6.3178386031146765, "grad_norm": 0.18436899781227112, "learning_rate": 9.623651445032132e-05, "loss": 0.0122, "step": 53550 }, { "epoch": 6.32373761208117, "grad_norm": 0.18690377473831177, "learning_rate": 9.622944076209062e-05, "loss": 0.0121, "step": 53600 }, { "epoch": 6.329636621047664, "grad_norm": 0.1139013022184372, "learning_rate": 9.62223606928645e-05, "loss": 0.0109, "step": 53650 }, { "epoch": 6.335535630014157, "grad_norm": 0.07645145058631897, "learning_rate": 9.621527424362024e-05, "loss": 0.0096, "step": 53700 }, { "epoch": 6.341434638980651, "grad_norm": 0.2050042301416397, "learning_rate": 9.620818141533598e-05, "loss": 0.0114, "step": 53750 }, { "epoch": 6.347333647947145, "grad_norm": 0.21267913281917572, "learning_rate": 9.620108220899071e-05, "loss": 0.0116, "step": 53800 }, { "epoch": 6.353232656913638, "grad_norm": 0.20599062740802765, "learning_rate": 9.619397662556435e-05, "loss": 0.0138, "step": 53850 }, { "epoch": 6.359131665880132, "grad_norm": 0.16318672895431519, "learning_rate": 9.618686466603764e-05, "loss": 0.0111, "step": 53900 }, { "epoch": 6.365030674846626, "grad_norm": 0.15455856919288635, "learning_rate": 9.617974633139228e-05, "loss": 0.0114, "step": 53950 }, { "epoch": 6.370929683813119, "grad_norm": 0.34723225235939026, "learning_rate": 9.617262162261075e-05, "loss": 0.0118, "step": 54000 }, { "epoch": 6.370929683813119, "eval_cer": 0.08777969018932874, "eval_loss": 0.005922945681959391, "eval_runtime": 2.0991, "eval_samples_per_second": 47.64, "eval_steps_per_second": 1.906, "eval_wer": 0.27, "step": 54000 }, { "epoch": 6.376828692779613, "grad_norm": 0.13091154396533966, "learning_rate": 9.616549054067654e-05, "loss": 0.0118, "step": 54050 }, { "epoch": 6.382727701746107, "grad_norm": 0.12332022190093994, "learning_rate": 9.615835308657388e-05, "loss": 0.0119, "step": 54100 }, { "epoch": 6.3886267107126, "grad_norm": 0.3059333562850952, "learning_rate": 9.615120926128798e-05, "loss": 0.0125, "step": 54150 }, { "epoch": 6.394525719679094, "grad_norm": 0.155336394906044, "learning_rate": 9.614405906580485e-05, "loss": 0.0124, "step": 54200 }, { "epoch": 6.4004247286455875, "grad_norm": 0.1606709361076355, "learning_rate": 9.61369025011115e-05, "loss": 0.011, "step": 54250 }, { "epoch": 6.406323737612081, "grad_norm": 0.2319399118423462, "learning_rate": 9.612973956819567e-05, "loss": 0.011, "step": 54300 }, { "epoch": 6.412222746578575, "grad_norm": 0.13492260873317719, "learning_rate": 9.612257026804608e-05, "loss": 0.0123, "step": 54350 }, { "epoch": 6.4181217555450685, "grad_norm": 0.12450329214334488, "learning_rate": 9.61153946016523e-05, "loss": 0.0099, "step": 54400 }, { "epoch": 6.424020764511562, "grad_norm": 0.1268911212682724, "learning_rate": 9.610821257000478e-05, "loss": 0.0117, "step": 54450 }, { "epoch": 6.429919773478056, "grad_norm": 0.2190091609954834, "learning_rate": 9.610102417409483e-05, "loss": 0.0107, "step": 54500 }, { "epoch": 6.435818782444549, "grad_norm": 0.19543308019638062, "learning_rate": 9.609382941491468e-05, "loss": 0.012, "step": 54550 }, { "epoch": 6.441717791411043, "grad_norm": 0.11771304160356522, "learning_rate": 9.608662829345741e-05, "loss": 0.0142, "step": 54600 }, { "epoch": 6.447616800377537, "grad_norm": 0.22588591277599335, "learning_rate": 9.607942081071697e-05, "loss": 0.011, "step": 54650 }, { "epoch": 6.45351580934403, "grad_norm": 0.13472077250480652, "learning_rate": 9.607220696768821e-05, "loss": 0.0121, "step": 54700 }, { "epoch": 6.459414818310524, "grad_norm": 0.1615174412727356, "learning_rate": 9.606498676536684e-05, "loss": 0.0113, "step": 54750 }, { "epoch": 6.465313827277018, "grad_norm": 0.13247506320476532, "learning_rate": 9.605776020474946e-05, "loss": 0.0132, "step": 54800 }, { "epoch": 6.471212836243511, "grad_norm": 0.20800936222076416, "learning_rate": 9.605052728683354e-05, "loss": 0.012, "step": 54850 }, { "epoch": 6.477111845210005, "grad_norm": 0.10365154594182968, "learning_rate": 9.604328801261746e-05, "loss": 0.0123, "step": 54900 }, { "epoch": 6.483010854176499, "grad_norm": 0.15250594913959503, "learning_rate": 9.603604238310039e-05, "loss": 0.0124, "step": 54950 }, { "epoch": 6.488909863142992, "grad_norm": 0.14300771057605743, "learning_rate": 9.602879039928249e-05, "loss": 0.0115, "step": 55000 }, { "epoch": 6.488909863142992, "eval_cer": 0.08605851979345955, "eval_loss": 0.0020612599328160286, "eval_runtime": 2.0353, "eval_samples_per_second": 49.133, "eval_steps_per_second": 1.965, "eval_wer": 0.27, "step": 55000 }, { "epoch": 6.494808872109486, "grad_norm": 0.023269513621926308, "learning_rate": 9.602153206216473e-05, "loss": 0.01, "step": 55050 }, { "epoch": 6.5007078810759795, "grad_norm": 0.14296986162662506, "learning_rate": 9.601426737274896e-05, "loss": 0.0132, "step": 55100 }, { "epoch": 6.506606890042473, "grad_norm": 0.31311511993408203, "learning_rate": 9.600699633203791e-05, "loss": 0.0121, "step": 55150 }, { "epoch": 6.512505899008967, "grad_norm": 0.2632223069667816, "learning_rate": 9.59997189410352e-05, "loss": 0.012, "step": 55200 }, { "epoch": 6.5184049079754605, "grad_norm": 0.18228037655353546, "learning_rate": 9.599243520074534e-05, "loss": 0.012, "step": 55250 }, { "epoch": 6.524303916941954, "grad_norm": 0.1516638696193695, "learning_rate": 9.598514511217367e-05, "loss": 0.0117, "step": 55300 }, { "epoch": 6.530202925908448, "grad_norm": 0.18229617178440094, "learning_rate": 9.597784867632645e-05, "loss": 0.0116, "step": 55350 }, { "epoch": 6.5361019348749405, "grad_norm": 0.19062268733978271, "learning_rate": 9.597054589421077e-05, "loss": 0.0107, "step": 55400 }, { "epoch": 6.542000943841435, "grad_norm": 0.17254923284053802, "learning_rate": 9.596323676683466e-05, "loss": 0.014, "step": 55450 }, { "epoch": 6.547899952807928, "grad_norm": 0.13062907755374908, "learning_rate": 9.595592129520695e-05, "loss": 0.0118, "step": 55500 }, { "epoch": 6.553798961774422, "grad_norm": 0.262468546628952, "learning_rate": 9.594859948033742e-05, "loss": 0.0107, "step": 55550 }, { "epoch": 6.559697970740915, "grad_norm": 0.1303364783525467, "learning_rate": 9.594127132323669e-05, "loss": 0.011, "step": 55600 }, { "epoch": 6.56559697970741, "grad_norm": 0.21086041629314423, "learning_rate": 9.593393682491624e-05, "loss": 0.0134, "step": 55650 }, { "epoch": 6.571495988673902, "grad_norm": 0.29268988966941833, "learning_rate": 9.592659598638847e-05, "loss": 0.0122, "step": 55700 }, { "epoch": 6.577394997640396, "grad_norm": 0.1578705757856369, "learning_rate": 9.591924880866658e-05, "loss": 0.0113, "step": 55750 }, { "epoch": 6.58329400660689, "grad_norm": 0.15779335796833038, "learning_rate": 9.591189529276474e-05, "loss": 0.0127, "step": 55800 }, { "epoch": 6.589193015573383, "grad_norm": 0.07028817385435104, "learning_rate": 9.590453543969793e-05, "loss": 0.0098, "step": 55850 }, { "epoch": 6.595092024539877, "grad_norm": 0.0682520717382431, "learning_rate": 9.5897169250482e-05, "loss": 0.0093, "step": 55900 }, { "epoch": 6.600991033506371, "grad_norm": 0.2551731765270233, "learning_rate": 9.588979672613373e-05, "loss": 0.0129, "step": 55950 }, { "epoch": 6.606890042472864, "grad_norm": 0.16099557280540466, "learning_rate": 9.588241786767071e-05, "loss": 0.0107, "step": 56000 }, { "epoch": 6.606890042472864, "eval_cer": 0.08519793459552495, "eval_loss": 0.0024020567070692778, "eval_runtime": 2.0396, "eval_samples_per_second": 49.029, "eval_steps_per_second": 1.961, "eval_wer": 0.26, "step": 56000 }, { "epoch": 6.612789051439358, "grad_norm": 0.19888658821582794, "learning_rate": 9.587503267611146e-05, "loss": 0.0115, "step": 56050 }, { "epoch": 6.618688060405852, "grad_norm": 0.3182220160961151, "learning_rate": 9.586764115247535e-05, "loss": 0.0109, "step": 56100 }, { "epoch": 6.624587069372345, "grad_norm": 0.13699300587177277, "learning_rate": 9.586024329778262e-05, "loss": 0.0148, "step": 56150 }, { "epoch": 6.630486078338839, "grad_norm": 0.23459847271442413, "learning_rate": 9.585283911305437e-05, "loss": 0.0122, "step": 56200 }, { "epoch": 6.6363850873053325, "grad_norm": 0.25463902950286865, "learning_rate": 9.58454285993126e-05, "loss": 0.0115, "step": 56250 }, { "epoch": 6.642284096271826, "grad_norm": 0.1541976034641266, "learning_rate": 9.583801175758018e-05, "loss": 0.0127, "step": 56300 }, { "epoch": 6.64818310523832, "grad_norm": 0.2736074924468994, "learning_rate": 9.583058858888084e-05, "loss": 0.0132, "step": 56350 }, { "epoch": 6.654082114204813, "grad_norm": 0.14838916063308716, "learning_rate": 9.58231590942392e-05, "loss": 0.0117, "step": 56400 }, { "epoch": 6.659981123171307, "grad_norm": 0.059853971004486084, "learning_rate": 9.581572327468074e-05, "loss": 0.0108, "step": 56450 }, { "epoch": 6.665880132137801, "grad_norm": 0.22838100790977478, "learning_rate": 9.580828113123181e-05, "loss": 0.0112, "step": 56500 }, { "epoch": 6.671779141104294, "grad_norm": 0.3116530776023865, "learning_rate": 9.580083266491966e-05, "loss": 0.0151, "step": 56550 }, { "epoch": 6.677678150070788, "grad_norm": 0.15688751637935638, "learning_rate": 9.579337787677239e-05, "loss": 0.0105, "step": 56600 }, { "epoch": 6.683577159037282, "grad_norm": 0.10559248924255371, "learning_rate": 9.578591676781895e-05, "loss": 0.0131, "step": 56650 }, { "epoch": 6.689476168003775, "grad_norm": 0.2346154749393463, "learning_rate": 9.577844933908922e-05, "loss": 0.014, "step": 56700 }, { "epoch": 6.695375176970269, "grad_norm": 0.11492932587862015, "learning_rate": 9.577097559161388e-05, "loss": 0.0124, "step": 56750 }, { "epoch": 6.701274185936763, "grad_norm": 0.15556907653808594, "learning_rate": 9.576349552642456e-05, "loss": 0.0108, "step": 56800 }, { "epoch": 6.707173194903256, "grad_norm": 0.14393387734889984, "learning_rate": 9.575600914455372e-05, "loss": 0.0116, "step": 56850 }, { "epoch": 6.71307220386975, "grad_norm": 0.13762167096138, "learning_rate": 9.574851644703469e-05, "loss": 0.0118, "step": 56900 }, { "epoch": 6.7189712128362435, "grad_norm": 0.23158355057239532, "learning_rate": 9.574101743490167e-05, "loss": 0.0133, "step": 56950 }, { "epoch": 6.724870221802737, "grad_norm": 0.10597040504217148, "learning_rate": 9.573351210918974e-05, "loss": 0.0118, "step": 57000 }, { "epoch": 6.724870221802737, "eval_cer": 0.08605851979345955, "eval_loss": 0.0036210366524755955, "eval_runtime": 2.0631, "eval_samples_per_second": 48.47, "eval_steps_per_second": 1.939, "eval_wer": 0.27, "step": 57000 }, { "epoch": 6.730769230769231, "grad_norm": 0.10410529375076294, "learning_rate": 9.572600047093487e-05, "loss": 0.013, "step": 57050 }, { "epoch": 6.7366682397357245, "grad_norm": 0.2371734082698822, "learning_rate": 9.571848252117388e-05, "loss": 0.0122, "step": 57100 }, { "epoch": 6.742567248702218, "grad_norm": 0.311148077249527, "learning_rate": 9.571095826094445e-05, "loss": 0.0106, "step": 57150 }, { "epoch": 6.748466257668712, "grad_norm": 0.1886921226978302, "learning_rate": 9.570342769128514e-05, "loss": 0.0105, "step": 57200 }, { "epoch": 6.754365266635205, "grad_norm": 0.2555009424686432, "learning_rate": 9.56958908132354e-05, "loss": 0.0111, "step": 57250 }, { "epoch": 6.760264275601699, "grad_norm": 0.25010839104652405, "learning_rate": 9.568834762783554e-05, "loss": 0.0128, "step": 57300 }, { "epoch": 6.766163284568193, "grad_norm": 0.12123282253742218, "learning_rate": 9.568079813612672e-05, "loss": 0.0117, "step": 57350 }, { "epoch": 6.772062293534686, "grad_norm": 0.34284645318984985, "learning_rate": 9.567324233915099e-05, "loss": 0.013, "step": 57400 }, { "epoch": 6.77796130250118, "grad_norm": 0.2559986412525177, "learning_rate": 9.566568023795128e-05, "loss": 0.013, "step": 57450 }, { "epoch": 6.783860311467674, "grad_norm": 0.2410593181848526, "learning_rate": 9.565811183357137e-05, "loss": 0.0115, "step": 57500 }, { "epoch": 6.789759320434167, "grad_norm": 0.16477932035923004, "learning_rate": 9.565053712705593e-05, "loss": 0.0117, "step": 57550 }, { "epoch": 6.795658329400661, "grad_norm": 0.3095419704914093, "learning_rate": 9.564295611945046e-05, "loss": 0.0109, "step": 57600 }, { "epoch": 6.801557338367155, "grad_norm": 0.16235609352588654, "learning_rate": 9.56353688118014e-05, "loss": 0.0124, "step": 57650 }, { "epoch": 6.807456347333648, "grad_norm": 0.2599146366119385, "learning_rate": 9.562777520515598e-05, "loss": 0.013, "step": 57700 }, { "epoch": 6.813355356300142, "grad_norm": 0.16992750763893127, "learning_rate": 9.562017530056234e-05, "loss": 0.0116, "step": 57750 }, { "epoch": 6.8192543652666355, "grad_norm": 0.25207918882369995, "learning_rate": 9.561256909906951e-05, "loss": 0.0119, "step": 57800 }, { "epoch": 6.825153374233129, "grad_norm": 0.6379481554031372, "learning_rate": 9.560495660172734e-05, "loss": 0.0111, "step": 57850 }, { "epoch": 6.831052383199623, "grad_norm": 0.3004337251186371, "learning_rate": 9.559733780958659e-05, "loss": 0.0114, "step": 57900 }, { "epoch": 6.8369513921661165, "grad_norm": 0.19293105602264404, "learning_rate": 9.558971272369885e-05, "loss": 0.0119, "step": 57950 }, { "epoch": 6.84285040113261, "grad_norm": 0.14347144961357117, "learning_rate": 9.558208134511664e-05, "loss": 0.0139, "step": 58000 }, { "epoch": 6.84285040113261, "eval_cer": 0.08605851979345955, "eval_loss": 0.0023124245926737785, "eval_runtime": 2.0966, "eval_samples_per_second": 47.695, "eval_steps_per_second": 1.908, "eval_wer": 0.27, "step": 58000 }, { "epoch": 6.848749410099103, "grad_norm": 0.07267823070287704, "learning_rate": 9.557444367489328e-05, "loss": 0.0126, "step": 58050 }, { "epoch": 6.854648419065597, "grad_norm": 0.13160739839076996, "learning_rate": 9.556679971408301e-05, "loss": 0.012, "step": 58100 }, { "epoch": 6.86054742803209, "grad_norm": 0.20710210502147675, "learning_rate": 9.555914946374089e-05, "loss": 0.0128, "step": 58150 }, { "epoch": 6.866446436998585, "grad_norm": 0.13213810324668884, "learning_rate": 9.555149292492289e-05, "loss": 0.0101, "step": 58200 }, { "epoch": 6.8723454459650775, "grad_norm": 0.22271493077278137, "learning_rate": 9.554383009868584e-05, "loss": 0.0115, "step": 58250 }, { "epoch": 6.878244454931572, "grad_norm": 0.12168186902999878, "learning_rate": 9.553616098608742e-05, "loss": 0.0107, "step": 58300 }, { "epoch": 6.884143463898065, "grad_norm": 0.11576646566390991, "learning_rate": 9.55284855881862e-05, "loss": 0.0118, "step": 58350 }, { "epoch": 6.890042472864558, "grad_norm": 0.24406087398529053, "learning_rate": 9.55208039060416e-05, "loss": 0.0109, "step": 58400 }, { "epoch": 6.895941481831052, "grad_norm": 0.09502393007278442, "learning_rate": 9.551311594071389e-05, "loss": 0.0104, "step": 58450 }, { "epoch": 6.901840490797546, "grad_norm": 0.2852206230163574, "learning_rate": 9.550542169326427e-05, "loss": 0.0129, "step": 58500 }, { "epoch": 6.907739499764039, "grad_norm": 0.1565900444984436, "learning_rate": 9.549772116475476e-05, "loss": 0.0124, "step": 58550 }, { "epoch": 6.913638508730533, "grad_norm": 0.24071058630943298, "learning_rate": 9.549001435624823e-05, "loss": 0.0116, "step": 58600 }, { "epoch": 6.919537517697027, "grad_norm": 0.43122953176498413, "learning_rate": 9.548230126880845e-05, "loss": 0.0118, "step": 58650 }, { "epoch": 6.92543652666352, "grad_norm": 0.22748690843582153, "learning_rate": 9.547458190350008e-05, "loss": 0.0126, "step": 58700 }, { "epoch": 6.931335535630014, "grad_norm": 0.30233532190322876, "learning_rate": 9.546685626138856e-05, "loss": 0.012, "step": 58750 }, { "epoch": 6.937234544596508, "grad_norm": 0.1429918110370636, "learning_rate": 9.54591243435403e-05, "loss": 0.0111, "step": 58800 }, { "epoch": 6.943133553563001, "grad_norm": 0.4111623466014862, "learning_rate": 9.545138615102251e-05, "loss": 0.0101, "step": 58850 }, { "epoch": 6.949032562529495, "grad_norm": 0.0493866503238678, "learning_rate": 9.544364168490328e-05, "loss": 0.012, "step": 58900 }, { "epoch": 6.9549315714959885, "grad_norm": 0.17557421326637268, "learning_rate": 9.543589094625157e-05, "loss": 0.01, "step": 58950 }, { "epoch": 6.960830580462482, "grad_norm": 0.1347290426492691, "learning_rate": 9.542813393613721e-05, "loss": 0.0163, "step": 59000 }, { "epoch": 6.960830580462482, "eval_cer": 0.08950086058519793, "eval_loss": 0.0041783396154642105, "eval_runtime": 2.0366, "eval_samples_per_second": 49.1, "eval_steps_per_second": 1.964, "eval_wer": 0.28, "step": 59000 }, { "epoch": 6.966729589428976, "grad_norm": 0.12272752821445465, "learning_rate": 9.54203706556309e-05, "loss": 0.0106, "step": 59050 }, { "epoch": 6.9726285983954694, "grad_norm": 0.2244037538766861, "learning_rate": 9.541260110580417e-05, "loss": 0.0109, "step": 59100 }, { "epoch": 6.978527607361963, "grad_norm": 0.15972813963890076, "learning_rate": 9.540482528772947e-05, "loss": 0.0122, "step": 59150 }, { "epoch": 6.984426616328457, "grad_norm": 0.5393397808074951, "learning_rate": 9.539704320248007e-05, "loss": 0.0128, "step": 59200 }, { "epoch": 6.99032562529495, "grad_norm": 0.2541995346546173, "learning_rate": 9.538925485113011e-05, "loss": 0.0118, "step": 59250 }, { "epoch": 6.996224634261444, "grad_norm": 0.1189579963684082, "learning_rate": 9.538146023475464e-05, "loss": 0.01, "step": 59300 }, { "epoch": 7.002123643227938, "grad_norm": 0.032989390194416046, "learning_rate": 9.537365935442952e-05, "loss": 0.0114, "step": 59350 }, { "epoch": 7.008022652194431, "grad_norm": 0.7950332760810852, "learning_rate": 9.536585221123151e-05, "loss": 0.0116, "step": 59400 }, { "epoch": 7.013921661160925, "grad_norm": 0.21944725513458252, "learning_rate": 9.535803880623822e-05, "loss": 0.0097, "step": 59450 }, { "epoch": 7.019820670127419, "grad_norm": 0.15878574550151825, "learning_rate": 9.535021914052808e-05, "loss": 0.0096, "step": 59500 }, { "epoch": 7.025719679093912, "grad_norm": 0.1529179960489273, "learning_rate": 9.53423932151805e-05, "loss": 0.0093, "step": 59550 }, { "epoch": 7.031618688060406, "grad_norm": 0.12882712483406067, "learning_rate": 9.533456103127566e-05, "loss": 0.0095, "step": 59600 }, { "epoch": 7.0375176970268996, "grad_norm": 0.3620966672897339, "learning_rate": 9.532672258989459e-05, "loss": 0.0117, "step": 59650 }, { "epoch": 7.043416705993393, "grad_norm": 0.13038146495819092, "learning_rate": 9.531887789211925e-05, "loss": 0.0095, "step": 59700 }, { "epoch": 7.049315714959887, "grad_norm": 0.04669421538710594, "learning_rate": 9.531102693903245e-05, "loss": 0.0095, "step": 59750 }, { "epoch": 7.0552147239263805, "grad_norm": 0.19285506010055542, "learning_rate": 9.53031697317178e-05, "loss": 0.0091, "step": 59800 }, { "epoch": 7.061113732892874, "grad_norm": 0.09836020320653915, "learning_rate": 9.529530627125987e-05, "loss": 0.0105, "step": 59850 }, { "epoch": 7.067012741859368, "grad_norm": 0.35016411542892456, "learning_rate": 9.528743655874404e-05, "loss": 0.012, "step": 59900 }, { "epoch": 7.072911750825861, "grad_norm": 0.346596896648407, "learning_rate": 9.527956059525653e-05, "loss": 0.011, "step": 59950 }, { "epoch": 7.078810759792355, "grad_norm": 0.1133182942867279, "learning_rate": 9.527167838188446e-05, "loss": 0.0101, "step": 60000 }, { "epoch": 7.078810759792355, "eval_cer": 0.09036144578313253, "eval_loss": 0.0053115906193852425, "eval_runtime": 2.12, "eval_samples_per_second": 47.171, "eval_steps_per_second": 1.887, "eval_wer": 0.28, "step": 60000 }, { "epoch": 7.084709768758849, "grad_norm": 0.2650994658470154, "learning_rate": 9.52637899197158e-05, "loss": 0.0089, "step": 60050 }, { "epoch": 7.090608777725342, "grad_norm": 0.12952235341072083, "learning_rate": 9.52558952098394e-05, "loss": 0.0117, "step": 60100 }, { "epoch": 7.096507786691836, "grad_norm": 0.14604976773262024, "learning_rate": 9.524799425334494e-05, "loss": 0.011, "step": 60150 }, { "epoch": 7.10240679565833, "grad_norm": 0.32305118441581726, "learning_rate": 9.5240087051323e-05, "loss": 0.0099, "step": 60200 }, { "epoch": 7.108305804624823, "grad_norm": 0.09555312246084213, "learning_rate": 9.523217360486498e-05, "loss": 0.0102, "step": 60250 }, { "epoch": 7.114204813591317, "grad_norm": 0.06371057778596878, "learning_rate": 9.522425391506318e-05, "loss": 0.0099, "step": 60300 }, { "epoch": 7.120103822557811, "grad_norm": 0.32803982496261597, "learning_rate": 9.521632798301072e-05, "loss": 0.0114, "step": 60350 }, { "epoch": 7.126002831524304, "grad_norm": 0.14593878388404846, "learning_rate": 9.520839580980166e-05, "loss": 0.0108, "step": 60400 }, { "epoch": 7.131901840490798, "grad_norm": 0.08637553453445435, "learning_rate": 9.52004573965308e-05, "loss": 0.0092, "step": 60450 }, { "epoch": 7.1378008494572915, "grad_norm": 0.15183517336845398, "learning_rate": 9.519251274429393e-05, "loss": 0.009, "step": 60500 }, { "epoch": 7.143699858423785, "grad_norm": 0.17923247814178467, "learning_rate": 9.518456185418761e-05, "loss": 0.009, "step": 60550 }, { "epoch": 7.149598867390279, "grad_norm": 0.3793523907661438, "learning_rate": 9.51766047273093e-05, "loss": 0.0113, "step": 60600 }, { "epoch": 7.1554978763567725, "grad_norm": 0.23812296986579895, "learning_rate": 9.516864136475729e-05, "loss": 0.0105, "step": 60650 }, { "epoch": 7.161396885323265, "grad_norm": 0.08261583745479584, "learning_rate": 9.516067176763078e-05, "loss": 0.0111, "step": 60700 }, { "epoch": 7.16729589428976, "grad_norm": 0.1464952826499939, "learning_rate": 9.51526959370298e-05, "loss": 0.0104, "step": 60750 }, { "epoch": 7.1731949032562525, "grad_norm": 0.1779615879058838, "learning_rate": 9.514471387405526e-05, "loss": 0.0099, "step": 60800 }, { "epoch": 7.179093912222746, "grad_norm": 0.09767632931470871, "learning_rate": 9.513672557980889e-05, "loss": 0.0102, "step": 60850 }, { "epoch": 7.18499292118924, "grad_norm": 0.16102062165737152, "learning_rate": 9.512873105539329e-05, "loss": 0.0101, "step": 60900 }, { "epoch": 7.1908919301557335, "grad_norm": 0.044969592243433, "learning_rate": 9.512073030191197e-05, "loss": 0.0106, "step": 60950 }, { "epoch": 7.196790939122227, "grad_norm": 0.1763601005077362, "learning_rate": 9.511272332046927e-05, "loss": 0.012, "step": 61000 }, { "epoch": 7.196790939122227, "eval_cer": 0.08777969018932874, "eval_loss": 0.005654764827340841, "eval_runtime": 2.0429, "eval_samples_per_second": 48.949, "eval_steps_per_second": 1.958, "eval_wer": 0.27, "step": 61000 }, { "epoch": 7.202689948088721, "grad_norm": 0.12993644177913666, "learning_rate": 9.510471011217035e-05, "loss": 0.011, "step": 61050 }, { "epoch": 7.208588957055214, "grad_norm": 0.31124529242515564, "learning_rate": 9.509669067812128e-05, "loss": 0.0121, "step": 61100 }, { "epoch": 7.214487966021708, "grad_norm": 0.13357725739479065, "learning_rate": 9.508866501942896e-05, "loss": 0.009, "step": 61150 }, { "epoch": 7.220386974988202, "grad_norm": 0.11526846885681152, "learning_rate": 9.508063313720119e-05, "loss": 0.01, "step": 61200 }, { "epoch": 7.226285983954695, "grad_norm": 0.17496001720428467, "learning_rate": 9.507259503254659e-05, "loss": 0.0105, "step": 61250 }, { "epoch": 7.232184992921189, "grad_norm": 0.6334359645843506, "learning_rate": 9.506455070657464e-05, "loss": 0.0111, "step": 61300 }, { "epoch": 7.238084001887683, "grad_norm": 0.06810826063156128, "learning_rate": 9.505650016039569e-05, "loss": 0.0094, "step": 61350 }, { "epoch": 7.243983010854176, "grad_norm": 0.23465979099273682, "learning_rate": 9.504844339512095e-05, "loss": 0.0119, "step": 61400 }, { "epoch": 7.24988201982067, "grad_norm": 0.08159612119197845, "learning_rate": 9.50403804118625e-05, "loss": 0.0089, "step": 61450 }, { "epoch": 7.255781028787164, "grad_norm": 0.2689908444881439, "learning_rate": 9.503231121173326e-05, "loss": 0.0116, "step": 61500 }, { "epoch": 7.261680037753657, "grad_norm": 0.4975597560405731, "learning_rate": 9.502423579584698e-05, "loss": 0.0105, "step": 61550 }, { "epoch": 7.267579046720151, "grad_norm": 0.1445157527923584, "learning_rate": 9.501615416531836e-05, "loss": 0.0119, "step": 61600 }, { "epoch": 7.2734780556866445, "grad_norm": 0.02881554327905178, "learning_rate": 9.500806632126282e-05, "loss": 0.0098, "step": 61650 }, { "epoch": 7.279377064653138, "grad_norm": 0.1951785385608673, "learning_rate": 9.499997226479679e-05, "loss": 0.0128, "step": 61700 }, { "epoch": 7.285276073619632, "grad_norm": 0.1802564412355423, "learning_rate": 9.499187199703743e-05, "loss": 0.011, "step": 61750 }, { "epoch": 7.2911750825861255, "grad_norm": 0.10238390415906906, "learning_rate": 9.498376551910286e-05, "loss": 0.0111, "step": 61800 }, { "epoch": 7.297074091552619, "grad_norm": 0.08270090073347092, "learning_rate": 9.497565283211196e-05, "loss": 0.01, "step": 61850 }, { "epoch": 7.302973100519113, "grad_norm": 0.04806792736053467, "learning_rate": 9.496753393718453e-05, "loss": 0.0109, "step": 61900 }, { "epoch": 7.308872109485606, "grad_norm": 0.06503172218799591, "learning_rate": 9.495940883544122e-05, "loss": 0.0117, "step": 61950 }, { "epoch": 7.3147711184521, "grad_norm": 0.18274810910224915, "learning_rate": 9.495127752800353e-05, "loss": 0.0133, "step": 62000 }, { "epoch": 7.3147711184521, "eval_cer": 0.09036144578313253, "eval_loss": 0.005692591890692711, "eval_runtime": 2.0347, "eval_samples_per_second": 49.148, "eval_steps_per_second": 1.966, "eval_wer": 0.27, "step": 62000 }, { "epoch": 7.320670127418594, "grad_norm": 0.32411742210388184, "learning_rate": 9.494314001599381e-05, "loss": 0.009, "step": 62050 }, { "epoch": 7.326569136385087, "grad_norm": 0.7680760025978088, "learning_rate": 9.493499630053526e-05, "loss": 0.0105, "step": 62100 }, { "epoch": 7.332468145351581, "grad_norm": 0.15355661511421204, "learning_rate": 9.492684638275195e-05, "loss": 0.0098, "step": 62150 }, { "epoch": 7.338367154318075, "grad_norm": 0.22501908242702484, "learning_rate": 9.491869026376883e-05, "loss": 0.0101, "step": 62200 }, { "epoch": 7.344266163284568, "grad_norm": 0.2665337324142456, "learning_rate": 9.491052794471165e-05, "loss": 0.0099, "step": 62250 }, { "epoch": 7.350165172251062, "grad_norm": 0.11837026476860046, "learning_rate": 9.490235942670706e-05, "loss": 0.0091, "step": 62300 }, { "epoch": 7.356064181217556, "grad_norm": 0.17038197815418243, "learning_rate": 9.489418471088257e-05, "loss": 0.0088, "step": 62350 }, { "epoch": 7.361963190184049, "grad_norm": 0.2263016402721405, "learning_rate": 9.488600379836647e-05, "loss": 0.0093, "step": 62400 }, { "epoch": 7.367862199150543, "grad_norm": 0.22859308123588562, "learning_rate": 9.487781669028801e-05, "loss": 0.0107, "step": 62450 }, { "epoch": 7.3737612081170365, "grad_norm": 0.25634825229644775, "learning_rate": 9.486962338777723e-05, "loss": 0.0106, "step": 62500 }, { "epoch": 7.37966021708353, "grad_norm": 0.39090844988822937, "learning_rate": 9.486142389196504e-05, "loss": 0.0121, "step": 62550 }, { "epoch": 7.385559226050024, "grad_norm": 0.18008311092853546, "learning_rate": 9.485321820398321e-05, "loss": 0.0107, "step": 62600 }, { "epoch": 7.391458235016517, "grad_norm": 0.4435904920101166, "learning_rate": 9.484500632496436e-05, "loss": 0.0113, "step": 62650 }, { "epoch": 7.397357243983011, "grad_norm": 0.1818181723356247, "learning_rate": 9.483678825604198e-05, "loss": 0.0113, "step": 62700 }, { "epoch": 7.403256252949505, "grad_norm": 0.19083915650844574, "learning_rate": 9.482856399835038e-05, "loss": 0.0122, "step": 62750 }, { "epoch": 7.409155261915998, "grad_norm": 0.08124452829360962, "learning_rate": 9.482033355302475e-05, "loss": 0.009, "step": 62800 }, { "epoch": 7.415054270882492, "grad_norm": 0.22745956480503082, "learning_rate": 9.481209692120114e-05, "loss": 0.0113, "step": 62850 }, { "epoch": 7.420953279848986, "grad_norm": 0.14135988056659698, "learning_rate": 9.480385410401641e-05, "loss": 0.0098, "step": 62900 }, { "epoch": 7.426852288815479, "grad_norm": 0.037323880940675735, "learning_rate": 9.479560510260836e-05, "loss": 0.0113, "step": 62950 }, { "epoch": 7.432751297781973, "grad_norm": 0.248859241604805, "learning_rate": 9.478734991811556e-05, "loss": 0.0121, "step": 63000 }, { "epoch": 7.432751297781973, "eval_cer": 0.08691910499139414, "eval_loss": 0.00340130552649498, "eval_runtime": 2.0698, "eval_samples_per_second": 48.314, "eval_steps_per_second": 1.933, "eval_wer": 0.27, "step": 63000 }, { "epoch": 7.438650306748467, "grad_norm": 0.20284274220466614, "learning_rate": 9.477908855167745e-05, "loss": 0.0099, "step": 63050 }, { "epoch": 7.44454931571496, "grad_norm": 0.0733318105340004, "learning_rate": 9.477082100443437e-05, "loss": 0.0102, "step": 63100 }, { "epoch": 7.450448324681454, "grad_norm": 0.1780892312526703, "learning_rate": 9.476254727752745e-05, "loss": 0.0129, "step": 63150 }, { "epoch": 7.4563473336479476, "grad_norm": 0.0902891531586647, "learning_rate": 9.475426737209871e-05, "loss": 0.0112, "step": 63200 }, { "epoch": 7.46224634261444, "grad_norm": 0.14366862177848816, "learning_rate": 9.474598128929102e-05, "loss": 0.0105, "step": 63250 }, { "epoch": 7.468145351580935, "grad_norm": 0.19107605516910553, "learning_rate": 9.47376890302481e-05, "loss": 0.0092, "step": 63300 }, { "epoch": 7.474044360547428, "grad_norm": 0.13060033321380615, "learning_rate": 9.472939059611451e-05, "loss": 0.0116, "step": 63350 }, { "epoch": 7.479943369513921, "grad_norm": 0.07377956062555313, "learning_rate": 9.47210859880357e-05, "loss": 0.0093, "step": 63400 }, { "epoch": 7.485842378480415, "grad_norm": 0.1676730513572693, "learning_rate": 9.471277520715792e-05, "loss": 0.0095, "step": 63450 }, { "epoch": 7.4917413874469085, "grad_norm": 0.18925787508487701, "learning_rate": 9.47044582546283e-05, "loss": 0.0104, "step": 63500 }, { "epoch": 7.497640396413402, "grad_norm": 0.29281365871429443, "learning_rate": 9.469613513159483e-05, "loss": 0.0102, "step": 63550 }, { "epoch": 7.503539405379896, "grad_norm": 0.15236841142177582, "learning_rate": 9.468780583920631e-05, "loss": 0.0099, "step": 63600 }, { "epoch": 7.5094384143463895, "grad_norm": 0.339510053396225, "learning_rate": 9.467947037861246e-05, "loss": 0.0101, "step": 63650 }, { "epoch": 7.515337423312883, "grad_norm": 0.20987090468406677, "learning_rate": 9.467112875096382e-05, "loss": 0.0097, "step": 63700 }, { "epoch": 7.521236432279377, "grad_norm": 0.01523631066083908, "learning_rate": 9.466278095741173e-05, "loss": 0.0101, "step": 63750 }, { "epoch": 7.52713544124587, "grad_norm": 0.28231319785118103, "learning_rate": 9.465442699910847e-05, "loss": 0.0113, "step": 63800 }, { "epoch": 7.533034450212364, "grad_norm": 0.19407948851585388, "learning_rate": 9.46460668772071e-05, "loss": 0.0119, "step": 63850 }, { "epoch": 7.538933459178858, "grad_norm": 0.2350284606218338, "learning_rate": 9.463770059286157e-05, "loss": 0.0102, "step": 63900 }, { "epoch": 7.544832468145351, "grad_norm": 0.4715636968612671, "learning_rate": 9.462932814722665e-05, "loss": 0.0129, "step": 63950 }, { "epoch": 7.550731477111845, "grad_norm": 0.1310206949710846, "learning_rate": 9.4620949541458e-05, "loss": 0.0098, "step": 64000 }, { "epoch": 7.550731477111845, "eval_cer": 0.08950086058519793, "eval_loss": 0.003336368128657341, "eval_runtime": 2.0275, "eval_samples_per_second": 49.322, "eval_steps_per_second": 1.973, "eval_wer": 0.28, "step": 64000 }, { "epoch": 7.556630486078339, "grad_norm": 0.1277180165052414, "learning_rate": 9.461256477671211e-05, "loss": 0.0115, "step": 64050 }, { "epoch": 7.562529495044832, "grad_norm": 0.15450452268123627, "learning_rate": 9.460417385414631e-05, "loss": 0.0102, "step": 64100 }, { "epoch": 7.568428504011326, "grad_norm": 0.06297313421964645, "learning_rate": 9.45957767749188e-05, "loss": 0.0114, "step": 64150 }, { "epoch": 7.57432751297782, "grad_norm": 0.08830330520868301, "learning_rate": 9.458737354018859e-05, "loss": 0.011, "step": 64200 }, { "epoch": 7.580226521944313, "grad_norm": 0.18017292022705078, "learning_rate": 9.45789641511156e-05, "loss": 0.0082, "step": 64250 }, { "epoch": 7.586125530910807, "grad_norm": 0.10679321736097336, "learning_rate": 9.457054860886057e-05, "loss": 0.0106, "step": 64300 }, { "epoch": 7.5920245398773005, "grad_norm": 0.1457526981830597, "learning_rate": 9.456212691458506e-05, "loss": 0.0106, "step": 64350 }, { "epoch": 7.597923548843794, "grad_norm": 0.1677292138338089, "learning_rate": 9.455369906945151e-05, "loss": 0.0107, "step": 64400 }, { "epoch": 7.603822557810288, "grad_norm": 0.20909403264522552, "learning_rate": 9.454526507462322e-05, "loss": 0.013, "step": 64450 }, { "epoch": 7.6097215667767815, "grad_norm": 0.03501804545521736, "learning_rate": 9.453682493126435e-05, "loss": 0.0102, "step": 64500 }, { "epoch": 7.615620575743275, "grad_norm": 0.17491179704666138, "learning_rate": 9.452837864053982e-05, "loss": 0.0106, "step": 64550 }, { "epoch": 7.621519584709769, "grad_norm": 0.029843850061297417, "learning_rate": 9.451992620361552e-05, "loss": 0.0089, "step": 64600 }, { "epoch": 7.627418593676262, "grad_norm": 0.2656805217266083, "learning_rate": 9.451146762165809e-05, "loss": 0.0107, "step": 64650 }, { "epoch": 7.633317602642756, "grad_norm": 0.15233132243156433, "learning_rate": 9.450300289583507e-05, "loss": 0.0089, "step": 64700 }, { "epoch": 7.63921661160925, "grad_norm": 0.3604872524738312, "learning_rate": 9.449453202731486e-05, "loss": 0.0105, "step": 64750 }, { "epoch": 7.645115620575743, "grad_norm": 0.17027856409549713, "learning_rate": 9.448605501726664e-05, "loss": 0.0122, "step": 64800 }, { "epoch": 7.651014629542237, "grad_norm": 0.3508340120315552, "learning_rate": 9.447757186686053e-05, "loss": 0.0108, "step": 64850 }, { "epoch": 7.656913638508731, "grad_norm": 0.17849643528461456, "learning_rate": 9.44690825772674e-05, "loss": 0.0133, "step": 64900 }, { "epoch": 7.662812647475224, "grad_norm": 0.25093376636505127, "learning_rate": 9.446058714965905e-05, "loss": 0.0109, "step": 64950 }, { "epoch": 7.668711656441718, "grad_norm": 0.12833966314792633, "learning_rate": 9.445208558520809e-05, "loss": 0.0137, "step": 65000 }, { "epoch": 7.668711656441718, "eval_cer": 0.08691910499139414, "eval_loss": 0.003353823907673359, "eval_runtime": 2.1013, "eval_samples_per_second": 47.59, "eval_steps_per_second": 1.904, "eval_wer": 0.27, "step": 65000 }, { "epoch": 7.674610665408212, "grad_norm": 0.28730136156082153, "learning_rate": 9.444357788508797e-05, "loss": 0.0112, "step": 65050 }, { "epoch": 7.680509674374705, "grad_norm": 0.10269756615161896, "learning_rate": 9.443506405047303e-05, "loss": 0.012, "step": 65100 }, { "epoch": 7.686408683341199, "grad_norm": 0.24728912115097046, "learning_rate": 9.442654408253838e-05, "loss": 0.0103, "step": 65150 }, { "epoch": 7.6923076923076925, "grad_norm": 0.3086473047733307, "learning_rate": 9.441801798246002e-05, "loss": 0.0104, "step": 65200 }, { "epoch": 7.698206701274186, "grad_norm": 0.16866165399551392, "learning_rate": 9.440948575141484e-05, "loss": 0.0102, "step": 65250 }, { "epoch": 7.70410571024068, "grad_norm": 0.21387292444705963, "learning_rate": 9.44009473905805e-05, "loss": 0.0093, "step": 65300 }, { "epoch": 7.7100047192071735, "grad_norm": 0.18593081831932068, "learning_rate": 9.439240290113555e-05, "loss": 0.0118, "step": 65350 }, { "epoch": 7.715903728173667, "grad_norm": 0.19345425069332123, "learning_rate": 9.438385228425938e-05, "loss": 0.0105, "step": 65400 }, { "epoch": 7.721802737140161, "grad_norm": 0.24066750705242157, "learning_rate": 9.437529554113223e-05, "loss": 0.0111, "step": 65450 }, { "epoch": 7.727701746106654, "grad_norm": 0.26062241196632385, "learning_rate": 9.436673267293516e-05, "loss": 0.0119, "step": 65500 }, { "epoch": 7.733600755073148, "grad_norm": 0.14446832239627838, "learning_rate": 9.43581636808501e-05, "loss": 0.0113, "step": 65550 }, { "epoch": 7.739499764039642, "grad_norm": 0.5924032926559448, "learning_rate": 9.434958856605982e-05, "loss": 0.0114, "step": 65600 }, { "epoch": 7.745398773006135, "grad_norm": 0.25982797145843506, "learning_rate": 9.434100732974792e-05, "loss": 0.0085, "step": 65650 }, { "epoch": 7.751297781972629, "grad_norm": 0.07667102664709091, "learning_rate": 9.433241997309888e-05, "loss": 0.0094, "step": 65700 }, { "epoch": 7.757196790939123, "grad_norm": 0.08666866272687912, "learning_rate": 9.432382649729797e-05, "loss": 0.0099, "step": 65750 }, { "epoch": 7.763095799905615, "grad_norm": 0.20800642669200897, "learning_rate": 9.431522690353137e-05, "loss": 0.0115, "step": 65800 }, { "epoch": 7.76899480887211, "grad_norm": 0.14127841591835022, "learning_rate": 9.430662119298605e-05, "loss": 0.0098, "step": 65850 }, { "epoch": 7.774893817838603, "grad_norm": 0.22234497964382172, "learning_rate": 9.429800936684988e-05, "loss": 0.0099, "step": 65900 }, { "epoch": 7.780792826805097, "grad_norm": 0.12389114499092102, "learning_rate": 9.42893914263115e-05, "loss": 0.0094, "step": 65950 }, { "epoch": 7.78669183577159, "grad_norm": 0.07474323362112045, "learning_rate": 9.428076737256045e-05, "loss": 0.0095, "step": 66000 }, { "epoch": 7.78669183577159, "eval_cer": 0.08605851979345955, "eval_loss": 0.001418738393113017, "eval_runtime": 2.0562, "eval_samples_per_second": 48.633, "eval_steps_per_second": 1.945, "eval_wer": 0.27, "step": 66000 }, { "epoch": 7.7925908447380845, "grad_norm": 0.2958829402923584, "learning_rate": 9.427213720678709e-05, "loss": 0.0118, "step": 66050 }, { "epoch": 7.798489853704577, "grad_norm": 0.13246826827526093, "learning_rate": 9.426350093018263e-05, "loss": 0.0099, "step": 66100 }, { "epoch": 7.804388862671071, "grad_norm": 0.1382545381784439, "learning_rate": 9.425485854393916e-05, "loss": 0.0092, "step": 66150 }, { "epoch": 7.8102878716375645, "grad_norm": 0.10231415927410126, "learning_rate": 9.424621004924955e-05, "loss": 0.0109, "step": 66200 }, { "epoch": 7.816186880604058, "grad_norm": 0.08132746815681458, "learning_rate": 9.423755544730755e-05, "loss": 0.009, "step": 66250 }, { "epoch": 7.822085889570552, "grad_norm": 0.07435303926467896, "learning_rate": 9.422889473930772e-05, "loss": 0.0114, "step": 66300 }, { "epoch": 7.8279848985370455, "grad_norm": 0.10187535732984543, "learning_rate": 9.422022792644552e-05, "loss": 0.0123, "step": 66350 }, { "epoch": 7.833883907503539, "grad_norm": 0.10695410519838333, "learning_rate": 9.42115550099172e-05, "loss": 0.0097, "step": 66400 }, { "epoch": 7.839782916470033, "grad_norm": 0.10771489888429642, "learning_rate": 9.420287599091987e-05, "loss": 0.0093, "step": 66450 }, { "epoch": 7.845681925436526, "grad_norm": 0.5847501158714294, "learning_rate": 9.41941908706515e-05, "loss": 0.0106, "step": 66500 }, { "epoch": 7.85158093440302, "grad_norm": 0.17155584692955017, "learning_rate": 9.418549965031089e-05, "loss": 0.0095, "step": 66550 }, { "epoch": 7.857479943369514, "grad_norm": 0.15654978156089783, "learning_rate": 9.417680233109767e-05, "loss": 0.0099, "step": 66600 }, { "epoch": 7.863378952336007, "grad_norm": 0.41954919695854187, "learning_rate": 9.416809891421232e-05, "loss": 0.0115, "step": 66650 }, { "epoch": 7.869277961302501, "grad_norm": 0.14041920006275177, "learning_rate": 9.415938940085616e-05, "loss": 0.0091, "step": 66700 }, { "epoch": 7.875176970268995, "grad_norm": 0.13843005895614624, "learning_rate": 9.415067379223134e-05, "loss": 0.01, "step": 66750 }, { "epoch": 7.881075979235488, "grad_norm": 0.19780758023262024, "learning_rate": 9.41419520895409e-05, "loss": 0.0115, "step": 66800 }, { "epoch": 7.886974988201982, "grad_norm": 0.17528754472732544, "learning_rate": 9.413322429398867e-05, "loss": 0.0113, "step": 66850 }, { "epoch": 7.892873997168476, "grad_norm": 0.08202637732028961, "learning_rate": 9.412449040677934e-05, "loss": 0.0121, "step": 66900 }, { "epoch": 7.898773006134969, "grad_norm": 0.08707831054925919, "learning_rate": 9.411575042911843e-05, "loss": 0.0088, "step": 66950 }, { "epoch": 7.904672015101463, "grad_norm": 0.26614850759506226, "learning_rate": 9.410700436221229e-05, "loss": 0.0132, "step": 67000 }, { "epoch": 7.904672015101463, "eval_cer": 0.08691910499139414, "eval_loss": 0.002736219670623541, "eval_runtime": 2.0206, "eval_samples_per_second": 49.49, "eval_steps_per_second": 1.98, "eval_wer": 0.27, "step": 67000 }, { "epoch": 7.9105710240679565, "grad_norm": 0.16219455003738403, "learning_rate": 9.409825220726818e-05, "loss": 0.0098, "step": 67050 }, { "epoch": 7.91647003303445, "grad_norm": 0.34423190355300903, "learning_rate": 9.408949396549412e-05, "loss": 0.0114, "step": 67100 }, { "epoch": 7.922369042000944, "grad_norm": 0.09077170491218567, "learning_rate": 9.408072963809897e-05, "loss": 0.0124, "step": 67150 }, { "epoch": 7.9282680509674375, "grad_norm": 0.17198394238948822, "learning_rate": 9.407195922629253e-05, "loss": 0.0087, "step": 67200 }, { "epoch": 7.934167059933931, "grad_norm": 0.11438117176294327, "learning_rate": 9.406318273128531e-05, "loss": 0.0081, "step": 67250 }, { "epoch": 7.940066068900425, "grad_norm": 0.13467295467853546, "learning_rate": 9.405440015428875e-05, "loss": 0.0097, "step": 67300 }, { "epoch": 7.945965077866918, "grad_norm": 0.07670722156763077, "learning_rate": 9.404561149651507e-05, "loss": 0.0102, "step": 67350 }, { "epoch": 7.951864086833412, "grad_norm": 0.3962189853191376, "learning_rate": 9.40368167591774e-05, "loss": 0.0116, "step": 67400 }, { "epoch": 7.957763095799906, "grad_norm": 0.0902019590139389, "learning_rate": 9.402801594348964e-05, "loss": 0.0097, "step": 67450 }, { "epoch": 7.963662104766399, "grad_norm": 0.11893511563539505, "learning_rate": 9.401920905066657e-05, "loss": 0.0101, "step": 67500 }, { "epoch": 7.969561113732893, "grad_norm": 0.116077721118927, "learning_rate": 9.401039608192377e-05, "loss": 0.0126, "step": 67550 }, { "epoch": 7.975460122699387, "grad_norm": 0.9446285367012024, "learning_rate": 9.400157703847769e-05, "loss": 0.0098, "step": 67600 }, { "epoch": 7.98135913166588, "grad_norm": 0.19386079907417297, "learning_rate": 9.399275192154563e-05, "loss": 0.0113, "step": 67650 }, { "epoch": 7.987258140632374, "grad_norm": 0.1536874622106552, "learning_rate": 9.398392073234572e-05, "loss": 0.0105, "step": 67700 }, { "epoch": 7.993157149598868, "grad_norm": 0.056475576013326645, "learning_rate": 9.397508347209689e-05, "loss": 0.0112, "step": 67750 }, { "epoch": 7.999056158565361, "grad_norm": 0.04180826619267464, "learning_rate": 9.396624014201894e-05, "loss": 0.0097, "step": 67800 }, { "epoch": 8.004955167531854, "grad_norm": 0.16285838186740875, "learning_rate": 9.395739074333251e-05, "loss": 0.0101, "step": 67850 }, { "epoch": 8.010854176498349, "grad_norm": 0.27532729506492615, "learning_rate": 9.39485352772591e-05, "loss": 0.0105, "step": 67900 }, { "epoch": 8.016753185464841, "grad_norm": 0.12693865597248077, "learning_rate": 9.393967374502098e-05, "loss": 0.01, "step": 67950 }, { "epoch": 8.022652194431336, "grad_norm": 0.1854410618543625, "learning_rate": 9.39308061478413e-05, "loss": 0.0081, "step": 68000 }, { "epoch": 8.022652194431336, "eval_cer": 0.08864027538726334, "eval_loss": 0.0019252394558861852, "eval_runtime": 2.1136, "eval_samples_per_second": 47.313, "eval_steps_per_second": 1.893, "eval_wer": 0.28, "step": 68000 }, { "epoch": 8.028551203397829, "grad_norm": 0.18211108446121216, "learning_rate": 9.392193248694406e-05, "loss": 0.0095, "step": 68050 }, { "epoch": 8.034450212364323, "grad_norm": 0.13261856138706207, "learning_rate": 9.391305276355407e-05, "loss": 0.009, "step": 68100 }, { "epoch": 8.040349221330816, "grad_norm": 0.15956255793571472, "learning_rate": 9.390416697889702e-05, "loss": 0.0071, "step": 68150 }, { "epoch": 8.04624823029731, "grad_norm": 0.17459145188331604, "learning_rate": 9.389527513419934e-05, "loss": 0.0077, "step": 68200 }, { "epoch": 8.052147239263803, "grad_norm": 0.12107226997613907, "learning_rate": 9.388637723068843e-05, "loss": 0.0085, "step": 68250 }, { "epoch": 8.058046248230298, "grad_norm": 0.3454025089740753, "learning_rate": 9.38774732695924e-05, "loss": 0.0081, "step": 68300 }, { "epoch": 8.06394525719679, "grad_norm": 0.17081262171268463, "learning_rate": 9.386856325214031e-05, "loss": 0.0085, "step": 68350 }, { "epoch": 8.069844266163285, "grad_norm": 0.16607552766799927, "learning_rate": 9.385964717956195e-05, "loss": 0.0084, "step": 68400 }, { "epoch": 8.075743275129778, "grad_norm": 0.07062529772520065, "learning_rate": 9.385072505308801e-05, "loss": 0.009, "step": 68450 }, { "epoch": 8.081642284096272, "grad_norm": 0.12675216794013977, "learning_rate": 9.384179687395e-05, "loss": 0.0096, "step": 68500 }, { "epoch": 8.087541293062765, "grad_norm": 0.1822839081287384, "learning_rate": 9.383286264338028e-05, "loss": 0.009, "step": 68550 }, { "epoch": 8.09344030202926, "grad_norm": 0.09957469999790192, "learning_rate": 9.382392236261201e-05, "loss": 0.009, "step": 68600 }, { "epoch": 8.099339310995752, "grad_norm": 0.25092893838882446, "learning_rate": 9.381497603287924e-05, "loss": 0.0082, "step": 68650 }, { "epoch": 8.105238319962247, "grad_norm": 0.11753557622432709, "learning_rate": 9.38060236554168e-05, "loss": 0.0088, "step": 68700 }, { "epoch": 8.11113732892874, "grad_norm": 0.15992002189159393, "learning_rate": 9.379706523146034e-05, "loss": 0.0084, "step": 68750 }, { "epoch": 8.117036337895234, "grad_norm": 0.14399035274982452, "learning_rate": 9.378810076224645e-05, "loss": 0.0089, "step": 68800 }, { "epoch": 8.122935346861727, "grad_norm": 0.25499704480171204, "learning_rate": 9.377913024901243e-05, "loss": 0.0097, "step": 68850 }, { "epoch": 8.128834355828221, "grad_norm": 0.20657750964164734, "learning_rate": 9.377015369299651e-05, "loss": 0.0099, "step": 68900 }, { "epoch": 8.134733364794714, "grad_norm": 0.2328042834997177, "learning_rate": 9.376117109543769e-05, "loss": 0.0101, "step": 68950 }, { "epoch": 8.140632373761209, "grad_norm": 0.05583968013525009, "learning_rate": 9.375218245757581e-05, "loss": 0.0089, "step": 69000 }, { "epoch": 8.140632373761209, "eval_cer": 0.08950086058519793, "eval_loss": 0.0023485152050852776, "eval_runtime": 2.0351, "eval_samples_per_second": 49.138, "eval_steps_per_second": 1.966, "eval_wer": 0.28, "step": 69000 }, { "epoch": 8.146531382727701, "grad_norm": 0.08570339530706406, "learning_rate": 9.374318778065162e-05, "loss": 0.0105, "step": 69050 }, { "epoch": 8.152430391694196, "grad_norm": 0.1541542112827301, "learning_rate": 9.373418706590658e-05, "loss": 0.0093, "step": 69100 }, { "epoch": 8.158329400660689, "grad_norm": 0.10622447729110718, "learning_rate": 9.372518031458308e-05, "loss": 0.0094, "step": 69150 }, { "epoch": 8.164228409627183, "grad_norm": 0.1000971794128418, "learning_rate": 9.371616752792431e-05, "loss": 0.009, "step": 69200 }, { "epoch": 8.170127418593676, "grad_norm": 0.25705474615097046, "learning_rate": 9.37071487071743e-05, "loss": 0.0107, "step": 69250 }, { "epoch": 8.17602642756017, "grad_norm": 0.15547354519367218, "learning_rate": 9.369812385357789e-05, "loss": 0.0099, "step": 69300 }, { "epoch": 8.181925436526663, "grad_norm": 0.32744932174682617, "learning_rate": 9.368909296838077e-05, "loss": 0.0106, "step": 69350 }, { "epoch": 8.187824445493158, "grad_norm": 0.03729626536369324, "learning_rate": 9.368005605282949e-05, "loss": 0.0092, "step": 69400 }, { "epoch": 8.19372345445965, "grad_norm": 0.359182745218277, "learning_rate": 9.367101310817137e-05, "loss": 0.0094, "step": 69450 }, { "epoch": 8.199622463426145, "grad_norm": 0.21109621226787567, "learning_rate": 9.366196413565462e-05, "loss": 0.0081, "step": 69500 }, { "epoch": 8.205521472392638, "grad_norm": 0.9067633152008057, "learning_rate": 9.365290913652824e-05, "loss": 0.0129, "step": 69550 }, { "epoch": 8.211420481359132, "grad_norm": 0.06970751285552979, "learning_rate": 9.364384811204211e-05, "loss": 0.0094, "step": 69600 }, { "epoch": 8.217319490325625, "grad_norm": 0.14191719889640808, "learning_rate": 9.36347810634469e-05, "loss": 0.0102, "step": 69650 }, { "epoch": 8.22321849929212, "grad_norm": 0.140197291970253, "learning_rate": 9.36257079919941e-05, "loss": 0.0093, "step": 69700 }, { "epoch": 8.229117508258613, "grad_norm": 0.16634908318519592, "learning_rate": 9.361662889893608e-05, "loss": 0.0099, "step": 69750 }, { "epoch": 8.235016517225105, "grad_norm": 0.03990714251995087, "learning_rate": 9.360754378552601e-05, "loss": 0.0109, "step": 69800 }, { "epoch": 8.2409155261916, "grad_norm": 0.16563475131988525, "learning_rate": 9.35984526530179e-05, "loss": 0.01, "step": 69850 }, { "epoch": 8.246814535158093, "grad_norm": 0.0819203108549118, "learning_rate": 9.358935550266659e-05, "loss": 0.0084, "step": 69900 }, { "epoch": 8.252713544124587, "grad_norm": 0.1996939778327942, "learning_rate": 9.358025233572775e-05, "loss": 0.011, "step": 69950 }, { "epoch": 8.25861255309108, "grad_norm": 0.36134758591651917, "learning_rate": 9.357114315345788e-05, "loss": 0.0085, "step": 70000 }, { "epoch": 8.25861255309108, "eval_cer": 0.08519793459552495, "eval_loss": 0.0024689722340554, "eval_runtime": 2.0941, "eval_samples_per_second": 47.753, "eval_steps_per_second": 1.91, "eval_wer": 0.26, "step": 70000 }, { "epoch": 8.264511562057574, "grad_norm": 0.13576547801494598, "learning_rate": 9.356202795711428e-05, "loss": 0.0083, "step": 70050 }, { "epoch": 8.270410571024067, "grad_norm": 0.3781415522098541, "learning_rate": 9.355290674795516e-05, "loss": 0.0096, "step": 70100 }, { "epoch": 8.276309579990562, "grad_norm": 0.1014973446726799, "learning_rate": 9.354377952723947e-05, "loss": 0.0106, "step": 70150 }, { "epoch": 8.282208588957054, "grad_norm": 0.2446599155664444, "learning_rate": 9.353464629622706e-05, "loss": 0.0107, "step": 70200 }, { "epoch": 8.288107597923549, "grad_norm": 0.17760932445526123, "learning_rate": 9.352550705617855e-05, "loss": 0.0107, "step": 70250 }, { "epoch": 8.294006606890042, "grad_norm": 0.07269815355539322, "learning_rate": 9.351636180835543e-05, "loss": 0.0113, "step": 70300 }, { "epoch": 8.299905615856536, "grad_norm": 0.12870965898036957, "learning_rate": 9.350721055402002e-05, "loss": 0.0094, "step": 70350 }, { "epoch": 8.305804624823029, "grad_norm": 0.1132727712392807, "learning_rate": 9.349805329443544e-05, "loss": 0.0102, "step": 70400 }, { "epoch": 8.311703633789524, "grad_norm": 0.3691772520542145, "learning_rate": 9.348889003086567e-05, "loss": 0.0094, "step": 70450 }, { "epoch": 8.317602642756016, "grad_norm": 0.18317833542823792, "learning_rate": 9.34797207645755e-05, "loss": 0.0089, "step": 70500 }, { "epoch": 8.32350165172251, "grad_norm": 0.14129307866096497, "learning_rate": 9.347054549683054e-05, "loss": 0.0091, "step": 70550 }, { "epoch": 8.329400660689004, "grad_norm": 0.2816484868526459, "learning_rate": 9.346136422889724e-05, "loss": 0.0095, "step": 70600 }, { "epoch": 8.335299669655498, "grad_norm": 0.17183566093444824, "learning_rate": 9.345217696204292e-05, "loss": 0.0095, "step": 70650 }, { "epoch": 8.341198678621991, "grad_norm": 0.3420717418193817, "learning_rate": 9.344298369753564e-05, "loss": 0.0102, "step": 70700 }, { "epoch": 8.347097687588485, "grad_norm": 0.08740220963954926, "learning_rate": 9.343378443664436e-05, "loss": 0.0095, "step": 70750 }, { "epoch": 8.352996696554978, "grad_norm": 0.23932161927223206, "learning_rate": 9.342457918063882e-05, "loss": 0.0106, "step": 70800 }, { "epoch": 8.358895705521473, "grad_norm": 0.21236400306224823, "learning_rate": 9.341536793078966e-05, "loss": 0.01, "step": 70850 }, { "epoch": 8.364794714487966, "grad_norm": 0.7516210079193115, "learning_rate": 9.340615068836824e-05, "loss": 0.0105, "step": 70900 }, { "epoch": 8.37069372345446, "grad_norm": 0.22191008925437927, "learning_rate": 9.339692745464685e-05, "loss": 0.0102, "step": 70950 }, { "epoch": 8.376592732420953, "grad_norm": 0.13798923790454865, "learning_rate": 9.338769823089853e-05, "loss": 0.0094, "step": 71000 }, { "epoch": 8.376592732420953, "eval_cer": 0.08691910499139414, "eval_loss": 0.0032046844717115164, "eval_runtime": 2.0295, "eval_samples_per_second": 49.274, "eval_steps_per_second": 1.971, "eval_wer": 0.27, "step": 71000 }, { "epoch": 8.382491741387447, "grad_norm": 0.10482214391231537, "learning_rate": 9.337846301839721e-05, "loss": 0.0097, "step": 71050 }, { "epoch": 8.38839075035394, "grad_norm": 0.10440460592508316, "learning_rate": 9.336922181841759e-05, "loss": 0.0105, "step": 71100 }, { "epoch": 8.394289759320435, "grad_norm": 0.11624065041542053, "learning_rate": 9.335997463223523e-05, "loss": 0.0098, "step": 71150 }, { "epoch": 8.400188768286927, "grad_norm": 0.0768909677863121, "learning_rate": 9.335072146112648e-05, "loss": 0.0101, "step": 71200 }, { "epoch": 8.406087777253422, "grad_norm": 0.10155040770769119, "learning_rate": 9.334146230636862e-05, "loss": 0.0085, "step": 71250 }, { "epoch": 8.411986786219915, "grad_norm": 0.2665851414203644, "learning_rate": 9.33321971692396e-05, "loss": 0.0089, "step": 71300 }, { "epoch": 8.41788579518641, "grad_norm": 0.24109239876270294, "learning_rate": 9.332292605101832e-05, "loss": 0.0097, "step": 71350 }, { "epoch": 8.423784804152902, "grad_norm": 0.2562546730041504, "learning_rate": 9.331364895298444e-05, "loss": 0.0122, "step": 71400 }, { "epoch": 8.429683813119397, "grad_norm": 0.1451035588979721, "learning_rate": 9.33043658764185e-05, "loss": 0.0094, "step": 71450 }, { "epoch": 8.43558282208589, "grad_norm": 0.08631058782339096, "learning_rate": 9.32950768226018e-05, "loss": 0.008, "step": 71500 }, { "epoch": 8.441481831052384, "grad_norm": 0.2156865894794464, "learning_rate": 9.328578179281649e-05, "loss": 0.0096, "step": 71550 }, { "epoch": 8.447380840018877, "grad_norm": 0.15681374073028564, "learning_rate": 9.327648078834558e-05, "loss": 0.0082, "step": 71600 }, { "epoch": 8.453279848985371, "grad_norm": 0.236406609416008, "learning_rate": 9.326717381047288e-05, "loss": 0.0113, "step": 71650 }, { "epoch": 8.459178857951864, "grad_norm": 0.23156413435935974, "learning_rate": 9.325786086048299e-05, "loss": 0.0099, "step": 71700 }, { "epoch": 8.465077866918358, "grad_norm": 0.22642278671264648, "learning_rate": 9.32485419396614e-05, "loss": 0.0103, "step": 71750 }, { "epoch": 8.470976875884851, "grad_norm": 0.0947188213467598, "learning_rate": 9.323921704929434e-05, "loss": 0.0077, "step": 71800 }, { "epoch": 8.476875884851346, "grad_norm": 0.16741804778575897, "learning_rate": 9.322988619066897e-05, "loss": 0.0103, "step": 71850 }, { "epoch": 8.482774893817838, "grad_norm": 0.18367567658424377, "learning_rate": 9.322054936507319e-05, "loss": 0.0093, "step": 71900 }, { "epoch": 8.488673902784333, "grad_norm": 0.16303390264511108, "learning_rate": 9.321120657379573e-05, "loss": 0.01, "step": 71950 }, { "epoch": 8.494572911750826, "grad_norm": 0.0815514549612999, "learning_rate": 9.320185781812623e-05, "loss": 0.0096, "step": 72000 }, { "epoch": 8.494572911750826, "eval_cer": 0.08519793459552495, "eval_loss": 0.0030643066857010126, "eval_runtime": 2.0447, "eval_samples_per_second": 48.906, "eval_steps_per_second": 1.956, "eval_wer": 0.26, "step": 72000 }, { "epoch": 8.50047192071732, "grad_norm": 0.15192878246307373, "learning_rate": 9.319250309935501e-05, "loss": 0.0092, "step": 72050 }, { "epoch": 8.506370929683813, "grad_norm": 0.2202191948890686, "learning_rate": 9.318314241877335e-05, "loss": 0.0107, "step": 72100 }, { "epoch": 8.512269938650308, "grad_norm": 0.09061261266469955, "learning_rate": 9.317377577767326e-05, "loss": 0.0097, "step": 72150 }, { "epoch": 8.5181689476168, "grad_norm": 0.1185733750462532, "learning_rate": 9.316440317734763e-05, "loss": 0.0094, "step": 72200 }, { "epoch": 8.524067956583295, "grad_norm": 0.1549907773733139, "learning_rate": 9.315502461909012e-05, "loss": 0.0101, "step": 72250 }, { "epoch": 8.529966965549788, "grad_norm": 0.24194985628128052, "learning_rate": 9.314564010419527e-05, "loss": 0.0104, "step": 72300 }, { "epoch": 8.53586597451628, "grad_norm": 0.16436758637428284, "learning_rate": 9.313624963395839e-05, "loss": 0.0089, "step": 72350 }, { "epoch": 8.541764983482775, "grad_norm": 0.23913806676864624, "learning_rate": 9.312685320967564e-05, "loss": 0.0083, "step": 72400 }, { "epoch": 8.54766399244927, "grad_norm": 0.047240983694791794, "learning_rate": 9.311745083264403e-05, "loss": 0.0115, "step": 72450 }, { "epoch": 8.553563001415762, "grad_norm": 0.18351484835147858, "learning_rate": 9.310804250416131e-05, "loss": 0.0108, "step": 72500 }, { "epoch": 8.559462010382255, "grad_norm": 0.14579574763774872, "learning_rate": 9.309862822552615e-05, "loss": 0.0095, "step": 72550 }, { "epoch": 8.56536101934875, "grad_norm": 0.10944526642560959, "learning_rate": 9.308920799803793e-05, "loss": 0.0086, "step": 72600 }, { "epoch": 8.571260028315242, "grad_norm": 0.10118357092142105, "learning_rate": 9.307978182299698e-05, "loss": 0.0086, "step": 72650 }, { "epoch": 8.577159037281737, "grad_norm": 0.14454637467861176, "learning_rate": 9.307034970170434e-05, "loss": 0.0111, "step": 72700 }, { "epoch": 8.58305804624823, "grad_norm": 0.15565119683742523, "learning_rate": 9.306091163546194e-05, "loss": 0.0114, "step": 72750 }, { "epoch": 8.588957055214724, "grad_norm": 0.13161292672157288, "learning_rate": 9.305146762557246e-05, "loss": 0.0091, "step": 72800 }, { "epoch": 8.594856064181217, "grad_norm": 0.19671878218650818, "learning_rate": 9.304201767333951e-05, "loss": 0.0114, "step": 72850 }, { "epoch": 8.600755073147711, "grad_norm": 0.2312028855085373, "learning_rate": 9.303256178006741e-05, "loss": 0.0081, "step": 72900 }, { "epoch": 8.606654082114204, "grad_norm": 0.1581554263830185, "learning_rate": 9.302309994706135e-05, "loss": 0.0098, "step": 72950 }, { "epoch": 8.612553091080699, "grad_norm": 0.13592112064361572, "learning_rate": 9.301363217562737e-05, "loss": 0.0099, "step": 73000 }, { "epoch": 8.612553091080699, "eval_cer": 0.09036144578313253, "eval_loss": 0.003129328601062298, "eval_runtime": 2.0765, "eval_samples_per_second": 48.158, "eval_steps_per_second": 1.926, "eval_wer": 0.28, "step": 73000 }, { "epoch": 8.618452100047191, "grad_norm": 0.23523102700710297, "learning_rate": 9.300415846707225e-05, "loss": 0.0092, "step": 73050 }, { "epoch": 8.624351109013686, "grad_norm": 0.21663644909858704, "learning_rate": 9.299467882270367e-05, "loss": 0.0117, "step": 73100 }, { "epoch": 8.630250117980179, "grad_norm": 0.16305376589298248, "learning_rate": 9.298519324383005e-05, "loss": 0.0111, "step": 73150 }, { "epoch": 8.636149126946673, "grad_norm": 0.17505314946174622, "learning_rate": 9.297570173176075e-05, "loss": 0.0092, "step": 73200 }, { "epoch": 8.642048135913166, "grad_norm": 0.24723896384239197, "learning_rate": 9.296620428780579e-05, "loss": 0.0092, "step": 73250 }, { "epoch": 8.64794714487966, "grad_norm": 0.11524656414985657, "learning_rate": 9.295670091327615e-05, "loss": 0.0098, "step": 73300 }, { "epoch": 8.653846153846153, "grad_norm": 0.15564295649528503, "learning_rate": 9.294719160948353e-05, "loss": 0.01, "step": 73350 }, { "epoch": 8.659745162812648, "grad_norm": 0.08445580303668976, "learning_rate": 9.29376763777405e-05, "loss": 0.0118, "step": 73400 }, { "epoch": 8.66564417177914, "grad_norm": 0.15865758061408997, "learning_rate": 9.292815521936045e-05, "loss": 0.0119, "step": 73450 }, { "epoch": 8.671543180745635, "grad_norm": 0.17184491455554962, "learning_rate": 9.291862813565755e-05, "loss": 0.0102, "step": 73500 }, { "epoch": 8.677442189712128, "grad_norm": 0.14041253924369812, "learning_rate": 9.290909512794685e-05, "loss": 0.0089, "step": 73550 }, { "epoch": 8.683341198678622, "grad_norm": 0.22780048847198486, "learning_rate": 9.289955619754414e-05, "loss": 0.0112, "step": 73600 }, { "epoch": 8.689240207645115, "grad_norm": 0.1708543449640274, "learning_rate": 9.28900113457661e-05, "loss": 0.0095, "step": 73650 }, { "epoch": 8.69513921661161, "grad_norm": 0.12956608831882477, "learning_rate": 9.288046057393016e-05, "loss": 0.009, "step": 73700 }, { "epoch": 8.701038225578102, "grad_norm": 0.05014921352267265, "learning_rate": 9.287090388335462e-05, "loss": 0.0088, "step": 73750 }, { "epoch": 8.706937234544597, "grad_norm": 0.17184481024742126, "learning_rate": 9.286134127535859e-05, "loss": 0.0109, "step": 73800 }, { "epoch": 8.71283624351109, "grad_norm": 0.20980200171470642, "learning_rate": 9.285177275126198e-05, "loss": 0.0103, "step": 73850 }, { "epoch": 8.718735252477584, "grad_norm": 0.12012206763029099, "learning_rate": 9.284219831238549e-05, "loss": 0.0096, "step": 73900 }, { "epoch": 8.724634261444077, "grad_norm": 0.20425385236740112, "learning_rate": 9.283261796005072e-05, "loss": 0.0095, "step": 73950 }, { "epoch": 8.730533270410572, "grad_norm": 0.10260020941495895, "learning_rate": 9.282303169558e-05, "loss": 0.0095, "step": 74000 }, { "epoch": 8.730533270410572, "eval_cer": 0.09380378657487091, "eval_loss": 0.003719760337844491, "eval_runtime": 2.1053, "eval_samples_per_second": 47.5, "eval_steps_per_second": 1.9, "eval_wer": 0.28, "step": 74000 }, { "epoch": 8.736432279377064, "grad_norm": 0.16461002826690674, "learning_rate": 9.281343952029654e-05, "loss": 0.0089, "step": 74050 }, { "epoch": 8.742331288343559, "grad_norm": 0.08207046240568161, "learning_rate": 9.28038414355243e-05, "loss": 0.0075, "step": 74100 }, { "epoch": 8.748230297310052, "grad_norm": 0.567449152469635, "learning_rate": 9.27942374425881e-05, "loss": 0.0091, "step": 74150 }, { "epoch": 8.754129306276546, "grad_norm": 0.123080775141716, "learning_rate": 9.27846275428136e-05, "loss": 0.0094, "step": 74200 }, { "epoch": 8.760028315243039, "grad_norm": 0.2981109023094177, "learning_rate": 9.27750117375272e-05, "loss": 0.0096, "step": 74250 }, { "epoch": 8.765927324209533, "grad_norm": 0.07139771431684494, "learning_rate": 9.27653900280562e-05, "loss": 0.0111, "step": 74300 }, { "epoch": 8.771826333176026, "grad_norm": 0.27293655276298523, "learning_rate": 9.275576241572863e-05, "loss": 0.0131, "step": 74350 }, { "epoch": 8.77772534214252, "grad_norm": 0.35800254344940186, "learning_rate": 9.274612890187342e-05, "loss": 0.0114, "step": 74400 }, { "epoch": 8.783624351109014, "grad_norm": 0.22999870777130127, "learning_rate": 9.273648948782026e-05, "loss": 0.0109, "step": 74450 }, { "epoch": 8.789523360075508, "grad_norm": 0.10995606333017349, "learning_rate": 9.272684417489963e-05, "loss": 0.0087, "step": 74500 }, { "epoch": 8.795422369042, "grad_norm": 0.07720629125833511, "learning_rate": 9.271719296444293e-05, "loss": 0.0091, "step": 74550 }, { "epoch": 8.801321378008495, "grad_norm": 0.16729523241519928, "learning_rate": 9.270753585778222e-05, "loss": 0.0104, "step": 74600 }, { "epoch": 8.807220386974988, "grad_norm": 0.25272101163864136, "learning_rate": 9.269787285625055e-05, "loss": 0.0107, "step": 74650 }, { "epoch": 8.813119395941483, "grad_norm": 0.13199807703495026, "learning_rate": 9.268820396118163e-05, "loss": 0.0094, "step": 74700 }, { "epoch": 8.819018404907975, "grad_norm": 0.15331976115703583, "learning_rate": 9.267852917391005e-05, "loss": 0.0102, "step": 74750 }, { "epoch": 8.82491741387447, "grad_norm": 0.04225609079003334, "learning_rate": 9.266884849577124e-05, "loss": 0.0113, "step": 74800 }, { "epoch": 8.830816422840963, "grad_norm": 0.15113411843776703, "learning_rate": 9.26591619281014e-05, "loss": 0.0128, "step": 74850 }, { "epoch": 8.836715431807455, "grad_norm": 0.23334145545959473, "learning_rate": 9.264946947223755e-05, "loss": 0.0093, "step": 74900 }, { "epoch": 8.84261444077395, "grad_norm": 0.1756592094898224, "learning_rate": 9.263977112951753e-05, "loss": 0.0102, "step": 74950 }, { "epoch": 8.848513449740445, "grad_norm": 0.10660938173532486, "learning_rate": 9.263006690127999e-05, "loss": 0.0102, "step": 75000 }, { "epoch": 8.848513449740445, "eval_cer": 0.08519793459552495, "eval_loss": 0.0016357579734176397, "eval_runtime": 2.0612, "eval_samples_per_second": 48.515, "eval_steps_per_second": 1.941, "eval_wer": 0.26, "step": 75000 }, { "epoch": 8.854412458706937, "grad_norm": 0.15972451865673065, "learning_rate": 9.262035678886441e-05, "loss": 0.0098, "step": 75050 }, { "epoch": 8.86031146767343, "grad_norm": 0.05983291566371918, "learning_rate": 9.261064079361101e-05, "loss": 0.0084, "step": 75100 }, { "epoch": 8.866210476639925, "grad_norm": 0.042831968516111374, "learning_rate": 9.260091891686095e-05, "loss": 0.0097, "step": 75150 }, { "epoch": 8.872109485606417, "grad_norm": 0.0702119991183281, "learning_rate": 9.259119115995609e-05, "loss": 0.0119, "step": 75200 }, { "epoch": 8.878008494572912, "grad_norm": 0.18407757580280304, "learning_rate": 9.258145752423914e-05, "loss": 0.0115, "step": 75250 }, { "epoch": 8.883907503539405, "grad_norm": 0.1336008757352829, "learning_rate": 9.257171801105364e-05, "loss": 0.0091, "step": 75300 }, { "epoch": 8.8898065125059, "grad_norm": 0.1396484076976776, "learning_rate": 9.256197262174394e-05, "loss": 0.0086, "step": 75350 }, { "epoch": 8.895705521472392, "grad_norm": 0.11038842052221298, "learning_rate": 9.255222135765511e-05, "loss": 0.0097, "step": 75400 }, { "epoch": 8.901604530438886, "grad_norm": 0.1609783172607422, "learning_rate": 9.254246422013317e-05, "loss": 0.0101, "step": 75450 }, { "epoch": 8.90750353940538, "grad_norm": 0.20836517214775085, "learning_rate": 9.253270121052488e-05, "loss": 0.0103, "step": 75500 }, { "epoch": 8.913402548371874, "grad_norm": 0.13597866892814636, "learning_rate": 9.25229323301778e-05, "loss": 0.011, "step": 75550 }, { "epoch": 8.919301557338366, "grad_norm": 0.24407508969306946, "learning_rate": 9.251315758044033e-05, "loss": 0.0111, "step": 75600 }, { "epoch": 8.925200566304861, "grad_norm": 0.28479236364364624, "learning_rate": 9.250337696266165e-05, "loss": 0.0095, "step": 75650 }, { "epoch": 8.931099575271354, "grad_norm": 0.06513427197933197, "learning_rate": 9.249359047819178e-05, "loss": 0.0098, "step": 75700 }, { "epoch": 8.936998584237848, "grad_norm": 0.09965776652097702, "learning_rate": 9.248379812838154e-05, "loss": 0.008, "step": 75750 }, { "epoch": 8.942897593204341, "grad_norm": 0.24161744117736816, "learning_rate": 9.247399991458255e-05, "loss": 0.0091, "step": 75800 }, { "epoch": 8.948796602170836, "grad_norm": 0.10809270292520523, "learning_rate": 9.246419583814725e-05, "loss": 0.0077, "step": 75850 }, { "epoch": 8.954695611137328, "grad_norm": 0.585153341293335, "learning_rate": 9.245438590042887e-05, "loss": 0.0082, "step": 75900 }, { "epoch": 8.960594620103823, "grad_norm": 0.388114333152771, "learning_rate": 9.24445701027815e-05, "loss": 0.0107, "step": 75950 }, { "epoch": 8.966493629070316, "grad_norm": 0.19961406290531158, "learning_rate": 9.243474844655997e-05, "loss": 0.0086, "step": 76000 }, { "epoch": 8.966493629070316, "eval_cer": 0.08777969018932874, "eval_loss": 0.002424018457531929, "eval_runtime": 2.0736, "eval_samples_per_second": 48.226, "eval_steps_per_second": 1.929, "eval_wer": 0.27, "step": 76000 }, { "epoch": 8.97239263803681, "grad_norm": 0.12791840732097626, "learning_rate": 9.242492093311996e-05, "loss": 0.0097, "step": 76050 }, { "epoch": 8.978291647003303, "grad_norm": 0.17870637774467468, "learning_rate": 9.241508756381795e-05, "loss": 0.009, "step": 76100 }, { "epoch": 8.984190655969797, "grad_norm": 0.10792700201272964, "learning_rate": 9.240524834001123e-05, "loss": 0.0118, "step": 76150 }, { "epoch": 8.99008966493629, "grad_norm": 0.045900680124759674, "learning_rate": 9.239540326305791e-05, "loss": 0.0106, "step": 76200 }, { "epoch": 8.995988673902785, "grad_norm": 0.23212726414203644, "learning_rate": 9.238555233431686e-05, "loss": 0.0121, "step": 76250 }, { "epoch": 9.001887682869278, "grad_norm": 0.19060464203357697, "learning_rate": 9.237569555514783e-05, "loss": 0.0079, "step": 76300 }, { "epoch": 9.007786691835772, "grad_norm": 0.1748841553926468, "learning_rate": 9.236583292691132e-05, "loss": 0.0085, "step": 76350 }, { "epoch": 9.013685700802265, "grad_norm": 0.14299020171165466, "learning_rate": 9.235596445096865e-05, "loss": 0.0084, "step": 76400 }, { "epoch": 9.01958470976876, "grad_norm": 0.13128779828548431, "learning_rate": 9.234609012868196e-05, "loss": 0.0086, "step": 76450 }, { "epoch": 9.025483718735252, "grad_norm": 0.1107235699892044, "learning_rate": 9.233620996141421e-05, "loss": 0.0095, "step": 76500 }, { "epoch": 9.031382727701747, "grad_norm": 0.0772876888513565, "learning_rate": 9.232632395052913e-05, "loss": 0.0097, "step": 76550 }, { "epoch": 9.03728173666824, "grad_norm": 0.2202175408601761, "learning_rate": 9.231643209739128e-05, "loss": 0.0094, "step": 76600 }, { "epoch": 9.043180745634734, "grad_norm": 0.30134421586990356, "learning_rate": 9.230653440336601e-05, "loss": 0.0084, "step": 76650 }, { "epoch": 9.049079754601227, "grad_norm": 0.06918106973171234, "learning_rate": 9.22966308698195e-05, "loss": 0.0089, "step": 76700 }, { "epoch": 9.054978763567721, "grad_norm": 0.0576365701854229, "learning_rate": 9.228672149811873e-05, "loss": 0.008, "step": 76750 }, { "epoch": 9.060877772534214, "grad_norm": 0.17193259298801422, "learning_rate": 9.227680628963145e-05, "loss": 0.0096, "step": 76800 }, { "epoch": 9.066776781500709, "grad_norm": 0.15438218414783478, "learning_rate": 9.226688524572627e-05, "loss": 0.009, "step": 76850 }, { "epoch": 9.072675790467201, "grad_norm": 0.08483544737100601, "learning_rate": 9.225695836777256e-05, "loss": 0.0073, "step": 76900 }, { "epoch": 9.078574799433696, "grad_norm": 0.24326014518737793, "learning_rate": 9.224702565714054e-05, "loss": 0.0098, "step": 76950 }, { "epoch": 9.084473808400189, "grad_norm": 0.2680503726005554, "learning_rate": 9.22370871152012e-05, "loss": 0.0086, "step": 77000 }, { "epoch": 9.084473808400189, "eval_cer": 0.08433734939759036, "eval_loss": 0.0020024063996970654, "eval_runtime": 2.0326, "eval_samples_per_second": 49.197, "eval_steps_per_second": 1.968, "eval_wer": 0.26, "step": 77000 }, { "epoch": 9.090372817366683, "grad_norm": 0.18348954617977142, "learning_rate": 9.222714274332634e-05, "loss": 0.0084, "step": 77050 }, { "epoch": 9.096271826333176, "grad_norm": 0.10026619583368301, "learning_rate": 9.221719254288856e-05, "loss": 0.0083, "step": 77100 }, { "epoch": 9.10217083529967, "grad_norm": 0.13681791722774506, "learning_rate": 9.220723651526131e-05, "loss": 0.0094, "step": 77150 }, { "epoch": 9.108069844266163, "grad_norm": 0.2766302824020386, "learning_rate": 9.219727466181877e-05, "loss": 0.0082, "step": 77200 }, { "epoch": 9.113968853232658, "grad_norm": 0.13734033703804016, "learning_rate": 9.218730698393598e-05, "loss": 0.0088, "step": 77250 }, { "epoch": 9.11986786219915, "grad_norm": 0.09657356142997742, "learning_rate": 9.217733348298878e-05, "loss": 0.0096, "step": 77300 }, { "epoch": 9.125766871165645, "grad_norm": 0.12038037180900574, "learning_rate": 9.216735416035378e-05, "loss": 0.009, "step": 77350 }, { "epoch": 9.131665880132138, "grad_norm": 0.14691320061683655, "learning_rate": 9.215736901740842e-05, "loss": 0.0072, "step": 77400 }, { "epoch": 9.137564889098632, "grad_norm": 0.16731753945350647, "learning_rate": 9.214737805553093e-05, "loss": 0.007, "step": 77450 }, { "epoch": 9.143463898065125, "grad_norm": 0.24789655208587646, "learning_rate": 9.213738127610036e-05, "loss": 0.0074, "step": 77500 }, { "epoch": 9.14936290703162, "grad_norm": 0.12026391178369522, "learning_rate": 9.212737868049657e-05, "loss": 0.0102, "step": 77550 }, { "epoch": 9.155261915998112, "grad_norm": 0.11467965692281723, "learning_rate": 9.211737027010017e-05, "loss": 0.0091, "step": 77600 }, { "epoch": 9.161160924964605, "grad_norm": 0.13679192960262299, "learning_rate": 9.210735604629263e-05, "loss": 0.0092, "step": 77650 }, { "epoch": 9.1670599339311, "grad_norm": 0.12275739014148712, "learning_rate": 9.209733601045622e-05, "loss": 0.0092, "step": 77700 }, { "epoch": 9.172958942897592, "grad_norm": 0.15574438869953156, "learning_rate": 9.208731016397396e-05, "loss": 0.0075, "step": 77750 }, { "epoch": 9.178857951864087, "grad_norm": 0.15785163640975952, "learning_rate": 9.207727850822971e-05, "loss": 0.0083, "step": 77800 }, { "epoch": 9.18475696083058, "grad_norm": 0.1864462047815323, "learning_rate": 9.206724104460815e-05, "loss": 0.0083, "step": 77850 }, { "epoch": 9.190655969797074, "grad_norm": 0.14343152940273285, "learning_rate": 9.205719777449473e-05, "loss": 0.0075, "step": 77900 }, { "epoch": 9.196554978763567, "grad_norm": 0.0927257239818573, "learning_rate": 9.20471486992757e-05, "loss": 0.0087, "step": 77950 }, { "epoch": 9.202453987730062, "grad_norm": 0.13531599938869476, "learning_rate": 9.203709382033814e-05, "loss": 0.0096, "step": 78000 }, { "epoch": 9.202453987730062, "eval_cer": 0.08777969018932874, "eval_loss": 0.0028556501492857933, "eval_runtime": 2.0538, "eval_samples_per_second": 48.689, "eval_steps_per_second": 1.948, "eval_wer": 0.27, "step": 78000 }, { "epoch": 9.208352996696554, "grad_norm": 0.18990515172481537, "learning_rate": 9.202703313906989e-05, "loss": 0.01, "step": 78050 }, { "epoch": 9.214252005663049, "grad_norm": 0.23757609724998474, "learning_rate": 9.201696665685963e-05, "loss": 0.0099, "step": 78100 }, { "epoch": 9.220151014629542, "grad_norm": 0.15517565608024597, "learning_rate": 9.200689437509682e-05, "loss": 0.0093, "step": 78150 }, { "epoch": 9.226050023596036, "grad_norm": 0.08865956962108612, "learning_rate": 9.199681629517173e-05, "loss": 0.0077, "step": 78200 }, { "epoch": 9.231949032562529, "grad_norm": 0.26294440031051636, "learning_rate": 9.198673241847542e-05, "loss": 0.0085, "step": 78250 }, { "epoch": 9.237848041529023, "grad_norm": 0.1792519986629486, "learning_rate": 9.197664274639977e-05, "loss": 0.0089, "step": 78300 }, { "epoch": 9.243747050495516, "grad_norm": 0.08272642642259598, "learning_rate": 9.196654728033742e-05, "loss": 0.0078, "step": 78350 }, { "epoch": 9.24964605946201, "grad_norm": 0.23229269683361053, "learning_rate": 9.195644602168184e-05, "loss": 0.0082, "step": 78400 }, { "epoch": 9.255545068428503, "grad_norm": 0.14149242639541626, "learning_rate": 9.194633897182732e-05, "loss": 0.0095, "step": 78450 }, { "epoch": 9.261444077394998, "grad_norm": 0.11545562744140625, "learning_rate": 9.193622613216891e-05, "loss": 0.0108, "step": 78500 }, { "epoch": 9.26734308636149, "grad_norm": 0.1395643949508667, "learning_rate": 9.192610750410245e-05, "loss": 0.0102, "step": 78550 }, { "epoch": 9.273242095327985, "grad_norm": 0.15549324452877045, "learning_rate": 9.191598308902463e-05, "loss": 0.0074, "step": 78600 }, { "epoch": 9.279141104294478, "grad_norm": 0.16917967796325684, "learning_rate": 9.190585288833291e-05, "loss": 0.0072, "step": 78650 }, { "epoch": 9.285040113260973, "grad_norm": 0.1724804788827896, "learning_rate": 9.189571690342555e-05, "loss": 0.0086, "step": 78700 }, { "epoch": 9.290939122227465, "grad_norm": 0.1604149341583252, "learning_rate": 9.18855751357016e-05, "loss": 0.0092, "step": 78750 }, { "epoch": 9.29683813119396, "grad_norm": 0.1768215298652649, "learning_rate": 9.187542758656091e-05, "loss": 0.0094, "step": 78800 }, { "epoch": 9.302737140160453, "grad_norm": 0.4036208391189575, "learning_rate": 9.186527425740416e-05, "loss": 0.0088, "step": 78850 }, { "epoch": 9.308636149126947, "grad_norm": 0.3077423572540283, "learning_rate": 9.185511514963277e-05, "loss": 0.0089, "step": 78900 }, { "epoch": 9.31453515809344, "grad_norm": 0.19992543756961823, "learning_rate": 9.184495026464902e-05, "loss": 0.0098, "step": 78950 }, { "epoch": 9.320434167059934, "grad_norm": 0.10981842130422592, "learning_rate": 9.183477960385592e-05, "loss": 0.0085, "step": 79000 }, { "epoch": 9.320434167059934, "eval_cer": 0.08605851979345955, "eval_loss": 0.0012019064743071795, "eval_runtime": 2.0586, "eval_samples_per_second": 48.577, "eval_steps_per_second": 1.943, "eval_wer": 0.27, "step": 79000 }, { "epoch": 9.326333176026427, "grad_norm": 0.13449884951114655, "learning_rate": 9.182460316865735e-05, "loss": 0.0103, "step": 79050 }, { "epoch": 9.332232184992922, "grad_norm": 0.17787902057170868, "learning_rate": 9.181442096045794e-05, "loss": 0.01, "step": 79100 }, { "epoch": 9.338131193959414, "grad_norm": 0.026064753532409668, "learning_rate": 9.180423298066311e-05, "loss": 0.0099, "step": 79150 }, { "epoch": 9.344030202925909, "grad_norm": 0.048136334866285324, "learning_rate": 9.179403923067913e-05, "loss": 0.0096, "step": 79200 }, { "epoch": 9.349929211892402, "grad_norm": 0.07399459183216095, "learning_rate": 9.1783839711913e-05, "loss": 0.01, "step": 79250 }, { "epoch": 9.355828220858896, "grad_norm": 0.07437000423669815, "learning_rate": 9.177363442577255e-05, "loss": 0.0099, "step": 79300 }, { "epoch": 9.361727229825389, "grad_norm": 0.1957276463508606, "learning_rate": 9.176342337366643e-05, "loss": 0.0077, "step": 79350 }, { "epoch": 9.367626238791884, "grad_norm": 0.14653265476226807, "learning_rate": 9.175320655700406e-05, "loss": 0.0091, "step": 79400 }, { "epoch": 9.373525247758376, "grad_norm": 0.18421100080013275, "learning_rate": 9.174298397719561e-05, "loss": 0.0089, "step": 79450 }, { "epoch": 9.379424256724871, "grad_norm": 0.18059946596622467, "learning_rate": 9.173275563565212e-05, "loss": 0.0087, "step": 79500 }, { "epoch": 9.385323265691364, "grad_norm": 0.16327981650829315, "learning_rate": 9.17225215337854e-05, "loss": 0.011, "step": 79550 }, { "epoch": 9.391222274657858, "grad_norm": 0.16621457040309906, "learning_rate": 9.171228167300804e-05, "loss": 0.0102, "step": 79600 }, { "epoch": 9.397121283624351, "grad_norm": 0.1714535802602768, "learning_rate": 9.170203605473347e-05, "loss": 0.0087, "step": 79650 }, { "epoch": 9.403020292590845, "grad_norm": 0.24839815497398376, "learning_rate": 9.169178468037582e-05, "loss": 0.0073, "step": 79700 }, { "epoch": 9.408919301557338, "grad_norm": 0.15711519122123718, "learning_rate": 9.168152755135012e-05, "loss": 0.0087, "step": 79750 }, { "epoch": 9.414818310523833, "grad_norm": 0.192637100815773, "learning_rate": 9.167126466907214e-05, "loss": 0.0094, "step": 79800 }, { "epoch": 9.420717319490326, "grad_norm": 0.0669189915060997, "learning_rate": 9.166099603495845e-05, "loss": 0.0083, "step": 79850 }, { "epoch": 9.42661632845682, "grad_norm": 0.23821379244327545, "learning_rate": 9.16507216504264e-05, "loss": 0.0093, "step": 79900 }, { "epoch": 9.432515337423313, "grad_norm": 0.18944504857063293, "learning_rate": 9.16404415168942e-05, "loss": 0.0085, "step": 79950 }, { "epoch": 9.438414346389807, "grad_norm": 0.04929226636886597, "learning_rate": 9.163015563578074e-05, "loss": 0.01, "step": 80000 }, { "epoch": 9.438414346389807, "eval_cer": 0.08691910499139414, "eval_loss": 0.004760333336889744, "eval_runtime": 2.0894, "eval_samples_per_second": 47.86, "eval_steps_per_second": 1.914, "eval_wer": 0.27, "step": 80000 }, { "epoch": 9.4443133553563, "grad_norm": 0.1262819468975067, "learning_rate": 9.161986400850583e-05, "loss": 0.01, "step": 80050 }, { "epoch": 9.450212364322795, "grad_norm": 0.02373533509671688, "learning_rate": 9.160956663648998e-05, "loss": 0.0088, "step": 80100 }, { "epoch": 9.456111373289287, "grad_norm": 0.29294368624687195, "learning_rate": 9.159926352115451e-05, "loss": 0.0089, "step": 80150 }, { "epoch": 9.46201038225578, "grad_norm": 0.15874458849430084, "learning_rate": 9.158895466392158e-05, "loss": 0.0088, "step": 80200 }, { "epoch": 9.467909391222275, "grad_norm": 0.22328579425811768, "learning_rate": 9.157864006621408e-05, "loss": 0.0092, "step": 80250 }, { "epoch": 9.473808400188767, "grad_norm": 0.2068365067243576, "learning_rate": 9.156831972945576e-05, "loss": 0.0088, "step": 80300 }, { "epoch": 9.479707409155262, "grad_norm": 0.09311965107917786, "learning_rate": 9.155799365507108e-05, "loss": 0.0092, "step": 80350 }, { "epoch": 9.485606418121755, "grad_norm": 0.1538970172405243, "learning_rate": 9.154766184448535e-05, "loss": 0.0102, "step": 80400 }, { "epoch": 9.49150542708825, "grad_norm": 0.12361559271812439, "learning_rate": 9.153732429912469e-05, "loss": 0.0107, "step": 80450 }, { "epoch": 9.497404436054742, "grad_norm": 0.12580278515815735, "learning_rate": 9.152698102041592e-05, "loss": 0.0106, "step": 80500 }, { "epoch": 9.503303445021237, "grad_norm": 0.10425682365894318, "learning_rate": 9.151663200978677e-05, "loss": 0.0079, "step": 80550 }, { "epoch": 9.50920245398773, "grad_norm": 0.11454560607671738, "learning_rate": 9.150627726866567e-05, "loss": 0.0089, "step": 80600 }, { "epoch": 9.515101462954224, "grad_norm": 0.2549012005329132, "learning_rate": 9.149591679848188e-05, "loss": 0.0094, "step": 80650 }, { "epoch": 9.521000471920717, "grad_norm": 0.1593780815601349, "learning_rate": 9.148555060066541e-05, "loss": 0.0088, "step": 80700 }, { "epoch": 9.526899480887211, "grad_norm": 0.09655273705720901, "learning_rate": 9.147517867664717e-05, "loss": 0.0093, "step": 80750 }, { "epoch": 9.532798489853704, "grad_norm": 0.11794845014810562, "learning_rate": 9.14648010278587e-05, "loss": 0.0093, "step": 80800 }, { "epoch": 9.538697498820198, "grad_norm": 0.07289727032184601, "learning_rate": 9.145441765573248e-05, "loss": 0.0085, "step": 80850 }, { "epoch": 9.544596507786691, "grad_norm": 0.10458162426948547, "learning_rate": 9.144402856170168e-05, "loss": 0.0102, "step": 80900 }, { "epoch": 9.550495516753186, "grad_norm": 0.12906308472156525, "learning_rate": 9.143363374720031e-05, "loss": 0.0091, "step": 80950 }, { "epoch": 9.556394525719679, "grad_norm": 0.15454509854316711, "learning_rate": 9.142323321366315e-05, "loss": 0.0087, "step": 81000 }, { "epoch": 9.556394525719679, "eval_cer": 0.08691910499139414, "eval_loss": 0.003389403922483325, "eval_runtime": 2.0457, "eval_samples_per_second": 48.884, "eval_steps_per_second": 1.955, "eval_wer": 0.27, "step": 81000 }, { "epoch": 9.562293534686173, "grad_norm": 0.1449652463197708, "learning_rate": 9.141282696252576e-05, "loss": 0.0077, "step": 81050 }, { "epoch": 9.568192543652666, "grad_norm": 0.1556699275970459, "learning_rate": 9.140241499522452e-05, "loss": 0.007, "step": 81100 }, { "epoch": 9.57409155261916, "grad_norm": 0.11865627765655518, "learning_rate": 9.139199731319657e-05, "loss": 0.0079, "step": 81150 }, { "epoch": 9.579990561585653, "grad_norm": 0.041149429976940155, "learning_rate": 9.138157391787985e-05, "loss": 0.0092, "step": 81200 }, { "epoch": 9.585889570552148, "grad_norm": 0.14963197708129883, "learning_rate": 9.137114481071312e-05, "loss": 0.0083, "step": 81250 }, { "epoch": 9.59178857951864, "grad_norm": 0.15547463297843933, "learning_rate": 9.136070999313585e-05, "loss": 0.0098, "step": 81300 }, { "epoch": 9.597687588485135, "grad_norm": 0.14051172137260437, "learning_rate": 9.135026946658838e-05, "loss": 0.0089, "step": 81350 }, { "epoch": 9.603586597451628, "grad_norm": 0.12585152685642242, "learning_rate": 9.133982323251178e-05, "loss": 0.0092, "step": 81400 }, { "epoch": 9.609485606418122, "grad_norm": 0.15053388476371765, "learning_rate": 9.132937129234795e-05, "loss": 0.0108, "step": 81450 }, { "epoch": 9.615384615384615, "grad_norm": 0.1353897750377655, "learning_rate": 9.131891364753955e-05, "loss": 0.0102, "step": 81500 }, { "epoch": 9.62128362435111, "grad_norm": 0.08799917995929718, "learning_rate": 9.130845029953003e-05, "loss": 0.0089, "step": 81550 }, { "epoch": 9.627182633317602, "grad_norm": 0.3216850757598877, "learning_rate": 9.129798124976366e-05, "loss": 0.0105, "step": 81600 }, { "epoch": 9.633081642284097, "grad_norm": 0.1497298777103424, "learning_rate": 9.128750649968544e-05, "loss": 0.0094, "step": 81650 }, { "epoch": 9.63898065125059, "grad_norm": 0.25826194882392883, "learning_rate": 9.12770260507412e-05, "loss": 0.0104, "step": 81700 }, { "epoch": 9.644879660217084, "grad_norm": 0.21093253791332245, "learning_rate": 9.126653990437754e-05, "loss": 0.0094, "step": 81750 }, { "epoch": 9.650778669183577, "grad_norm": 0.010433503426611423, "learning_rate": 9.125604806204186e-05, "loss": 0.0079, "step": 81800 }, { "epoch": 9.656677678150071, "grad_norm": 0.08763626962900162, "learning_rate": 9.124555052518234e-05, "loss": 0.0075, "step": 81850 }, { "epoch": 9.662576687116564, "grad_norm": 0.18721292912960052, "learning_rate": 9.123504729524792e-05, "loss": 0.0094, "step": 81900 }, { "epoch": 9.668475696083059, "grad_norm": 0.3075252175331116, "learning_rate": 9.122453837368836e-05, "loss": 0.0085, "step": 81950 }, { "epoch": 9.674374705049551, "grad_norm": 0.19154033064842224, "learning_rate": 9.121402376195422e-05, "loss": 0.0086, "step": 82000 }, { "epoch": 9.674374705049551, "eval_cer": 0.08605851979345955, "eval_loss": 0.0023899695370346308, "eval_runtime": 2.0893, "eval_samples_per_second": 47.863, "eval_steps_per_second": 1.915, "eval_wer": 0.27, "step": 82000 }, { "epoch": 9.680273714016046, "grad_norm": 0.13362255692481995, "learning_rate": 9.120350346149678e-05, "loss": 0.0115, "step": 82050 }, { "epoch": 9.686172722982539, "grad_norm": 0.1356368064880371, "learning_rate": 9.119297747376816e-05, "loss": 0.0094, "step": 82100 }, { "epoch": 9.692071731949033, "grad_norm": 0.1392257660627365, "learning_rate": 9.118244580022124e-05, "loss": 0.0097, "step": 82150 }, { "epoch": 9.697970740915526, "grad_norm": 0.05946877971291542, "learning_rate": 9.117190844230971e-05, "loss": 0.0093, "step": 82200 }, { "epoch": 9.70386974988202, "grad_norm": 0.1257510483264923, "learning_rate": 9.116136540148803e-05, "loss": 0.0098, "step": 82250 }, { "epoch": 9.709768758848513, "grad_norm": 0.1548210084438324, "learning_rate": 9.115081667921143e-05, "loss": 0.0085, "step": 82300 }, { "epoch": 9.715667767815008, "grad_norm": 0.18413442373275757, "learning_rate": 9.114026227693597e-05, "loss": 0.0103, "step": 82350 }, { "epoch": 9.7215667767815, "grad_norm": 0.22022584080696106, "learning_rate": 9.112970219611842e-05, "loss": 0.0099, "step": 82400 }, { "epoch": 9.727465785747995, "grad_norm": 0.19518758356571198, "learning_rate": 9.111913643821639e-05, "loss": 0.0084, "step": 82450 }, { "epoch": 9.733364794714488, "grad_norm": 0.1302463859319687, "learning_rate": 9.110856500468827e-05, "loss": 0.0092, "step": 82500 }, { "epoch": 9.73926380368098, "grad_norm": 0.2419438511133194, "learning_rate": 9.10979878969932e-05, "loss": 0.0084, "step": 82550 }, { "epoch": 9.745162812647475, "grad_norm": 0.19513826072216034, "learning_rate": 9.108740511659116e-05, "loss": 0.0101, "step": 82600 }, { "epoch": 9.75106182161397, "grad_norm": 0.13415783643722534, "learning_rate": 9.107681666494283e-05, "loss": 0.0092, "step": 82650 }, { "epoch": 9.756960830580462, "grad_norm": 0.16520164906978607, "learning_rate": 9.106622254350976e-05, "loss": 0.0085, "step": 82700 }, { "epoch": 9.762859839546955, "grad_norm": 0.16125982999801636, "learning_rate": 9.105562275375426e-05, "loss": 0.0099, "step": 82750 }, { "epoch": 9.76875884851345, "grad_norm": 0.21058128774166107, "learning_rate": 9.104501729713935e-05, "loss": 0.0093, "step": 82800 }, { "epoch": 9.774657857479944, "grad_norm": 0.2632635235786438, "learning_rate": 9.103440617512893e-05, "loss": 0.0087, "step": 82850 }, { "epoch": 9.780556866446437, "grad_norm": 0.13155968487262726, "learning_rate": 9.102378938918764e-05, "loss": 0.0107, "step": 82900 }, { "epoch": 9.78645587541293, "grad_norm": 0.1368851661682129, "learning_rate": 9.101316694078088e-05, "loss": 0.0086, "step": 82950 }, { "epoch": 9.792354884379424, "grad_norm": 0.17008590698242188, "learning_rate": 9.100253883137488e-05, "loss": 0.0066, "step": 83000 }, { "epoch": 9.792354884379424, "eval_cer": 0.08691910499139414, "eval_loss": 0.0029877801425755024, "eval_runtime": 2.0388, "eval_samples_per_second": 49.049, "eval_steps_per_second": 1.962, "eval_wer": 0.27, "step": 83000 }, { "epoch": 9.798253893345917, "grad_norm": 0.08097360283136368, "learning_rate": 9.09919050624366e-05, "loss": 0.0077, "step": 83050 }, { "epoch": 9.804152902312412, "grad_norm": 0.1679714173078537, "learning_rate": 9.098126563543383e-05, "loss": 0.0074, "step": 83100 }, { "epoch": 9.810051911278904, "grad_norm": 0.18643946945667267, "learning_rate": 9.09706205518351e-05, "loss": 0.0095, "step": 83150 }, { "epoch": 9.815950920245399, "grad_norm": 0.1684039682149887, "learning_rate": 9.095996981310975e-05, "loss": 0.0089, "step": 83200 }, { "epoch": 9.821849929211892, "grad_norm": 0.2159363031387329, "learning_rate": 9.094931342072787e-05, "loss": 0.0089, "step": 83250 }, { "epoch": 9.827748938178386, "grad_norm": 0.08738018572330475, "learning_rate": 9.093865137616038e-05, "loss": 0.0077, "step": 83300 }, { "epoch": 9.833647947144879, "grad_norm": 0.02978796884417534, "learning_rate": 9.092798368087892e-05, "loss": 0.0087, "step": 83350 }, { "epoch": 9.839546956111374, "grad_norm": 0.1114942729473114, "learning_rate": 9.091731033635595e-05, "loss": 0.0087, "step": 83400 }, { "epoch": 9.845445965077866, "grad_norm": 0.26252129673957825, "learning_rate": 9.090663134406471e-05, "loss": 0.0095, "step": 83450 }, { "epoch": 9.85134497404436, "grad_norm": 0.08834300935268402, "learning_rate": 9.08959467054792e-05, "loss": 0.0079, "step": 83500 }, { "epoch": 9.857243983010854, "grad_norm": 0.2513054609298706, "learning_rate": 9.088525642207422e-05, "loss": 0.0085, "step": 83550 }, { "epoch": 9.863142991977348, "grad_norm": 0.3209558427333832, "learning_rate": 9.08745604953253e-05, "loss": 0.0092, "step": 83600 }, { "epoch": 9.86904200094384, "grad_norm": 0.10994791984558105, "learning_rate": 9.086385892670884e-05, "loss": 0.0072, "step": 83650 }, { "epoch": 9.874941009910335, "grad_norm": 0.2583254277706146, "learning_rate": 9.085315171770191e-05, "loss": 0.0071, "step": 83700 }, { "epoch": 9.880840018876828, "grad_norm": 0.08437660336494446, "learning_rate": 9.084243886978246e-05, "loss": 0.0085, "step": 83750 }, { "epoch": 9.886739027843323, "grad_norm": 0.23008133471012115, "learning_rate": 9.083172038442914e-05, "loss": 0.0083, "step": 83800 }, { "epoch": 9.892638036809815, "grad_norm": 0.1455051749944687, "learning_rate": 9.082099626312143e-05, "loss": 0.0099, "step": 83850 }, { "epoch": 9.89853704577631, "grad_norm": 0.46230649948120117, "learning_rate": 9.081026650733955e-05, "loss": 0.0091, "step": 83900 }, { "epoch": 9.904436054742803, "grad_norm": 0.09898001700639725, "learning_rate": 9.079953111856454e-05, "loss": 0.0078, "step": 83950 }, { "epoch": 9.910335063709297, "grad_norm": 0.2021172195672989, "learning_rate": 9.078879009827817e-05, "loss": 0.0097, "step": 84000 }, { "epoch": 9.910335063709297, "eval_cer": 0.08519793459552495, "eval_loss": 0.0022565785329788923, "eval_runtime": 2.1238, "eval_samples_per_second": 47.084, "eval_steps_per_second": 1.883, "eval_wer": 0.26, "step": 84000 }, { "epoch": 9.91623407267579, "grad_norm": 0.18989859521389008, "learning_rate": 9.077804344796302e-05, "loss": 0.0106, "step": 84050 }, { "epoch": 9.922133081642285, "grad_norm": 0.21388134360313416, "learning_rate": 9.076729116910243e-05, "loss": 0.0092, "step": 84100 }, { "epoch": 9.928032090608777, "grad_norm": 0.21481861174106598, "learning_rate": 9.075653326318054e-05, "loss": 0.0084, "step": 84150 }, { "epoch": 9.933931099575272, "grad_norm": 0.3102792203426361, "learning_rate": 9.074576973168223e-05, "loss": 0.0094, "step": 84200 }, { "epoch": 9.939830108541765, "grad_norm": 0.14667508006095886, "learning_rate": 9.07350005760932e-05, "loss": 0.0074, "step": 84250 }, { "epoch": 9.94572911750826, "grad_norm": 0.13115188479423523, "learning_rate": 9.07242257978999e-05, "loss": 0.0074, "step": 84300 }, { "epoch": 9.951628126474752, "grad_norm": 0.1448919028043747, "learning_rate": 9.071344539858954e-05, "loss": 0.0094, "step": 84350 }, { "epoch": 9.957527135441246, "grad_norm": 0.17994825541973114, "learning_rate": 9.070265937965015e-05, "loss": 0.0091, "step": 84400 }, { "epoch": 9.96342614440774, "grad_norm": 0.17653194069862366, "learning_rate": 9.06918677425705e-05, "loss": 0.0085, "step": 84450 }, { "epoch": 9.969325153374234, "grad_norm": 0.1497245728969574, "learning_rate": 9.068107048884014e-05, "loss": 0.008, "step": 84500 }, { "epoch": 9.975224162340727, "grad_norm": 0.14216293394565582, "learning_rate": 9.06702676199494e-05, "loss": 0.0096, "step": 84550 }, { "epoch": 9.981123171307221, "grad_norm": 0.0885067954659462, "learning_rate": 9.065945913738942e-05, "loss": 0.0107, "step": 84600 }, { "epoch": 9.987022180273714, "grad_norm": 0.16897283494472504, "learning_rate": 9.064864504265205e-05, "loss": 0.0117, "step": 84650 }, { "epoch": 9.992921189240208, "grad_norm": 0.09565406292676926, "learning_rate": 9.063782533722995e-05, "loss": 0.0075, "step": 84700 }, { "epoch": 9.998820198206701, "grad_norm": 0.28161218762397766, "learning_rate": 9.062700002261656e-05, "loss": 0.0073, "step": 84750 }, { "epoch": 10.004719207173196, "grad_norm": 0.23241543769836426, "learning_rate": 9.061616910030609e-05, "loss": 0.008, "step": 84800 }, { "epoch": 10.010618216139688, "grad_norm": 0.16980797052383423, "learning_rate": 9.060533257179351e-05, "loss": 0.0069, "step": 84850 }, { "epoch": 10.016517225106183, "grad_norm": 0.6808876395225525, "learning_rate": 9.059449043857458e-05, "loss": 0.0074, "step": 84900 }, { "epoch": 10.022416234072676, "grad_norm": 0.09072942286729813, "learning_rate": 9.058364270214581e-05, "loss": 0.0087, "step": 84950 }, { "epoch": 10.02831524303917, "grad_norm": 0.12777726352214813, "learning_rate": 9.057278936400453e-05, "loss": 0.0069, "step": 85000 }, { "epoch": 10.02831524303917, "eval_cer": 0.08691910499139414, "eval_loss": 0.0027684185188263655, "eval_runtime": 2.1182, "eval_samples_per_second": 47.209, "eval_steps_per_second": 1.888, "eval_wer": 0.27, "step": 85000 }, { "epoch": 10.034214252005663, "grad_norm": 0.1171320229768753, "learning_rate": 9.056193042564877e-05, "loss": 0.0086, "step": 85050 }, { "epoch": 10.040113260972158, "grad_norm": 0.18516068160533905, "learning_rate": 9.055106588857744e-05, "loss": 0.0075, "step": 85100 }, { "epoch": 10.04601226993865, "grad_norm": 0.089289590716362, "learning_rate": 9.05401957542901e-05, "loss": 0.007, "step": 85150 }, { "epoch": 10.051911278905145, "grad_norm": 0.09438646584749222, "learning_rate": 9.052932002428715e-05, "loss": 0.0074, "step": 85200 }, { "epoch": 10.057810287871638, "grad_norm": 0.10517394542694092, "learning_rate": 9.051843870006979e-05, "loss": 0.0083, "step": 85250 }, { "epoch": 10.06370929683813, "grad_norm": 0.20231571793556213, "learning_rate": 9.050755178313994e-05, "loss": 0.0083, "step": 85300 }, { "epoch": 10.069608305804625, "grad_norm": 0.13476568460464478, "learning_rate": 9.04966592750003e-05, "loss": 0.0087, "step": 85350 }, { "epoch": 10.075507314771118, "grad_norm": 0.16308166086673737, "learning_rate": 9.048576117715435e-05, "loss": 0.0073, "step": 85400 }, { "epoch": 10.081406323737612, "grad_norm": 0.03636014088988304, "learning_rate": 9.047485749110633e-05, "loss": 0.0091, "step": 85450 }, { "epoch": 10.087305332704105, "grad_norm": 0.07623545080423355, "learning_rate": 9.04639482183613e-05, "loss": 0.0087, "step": 85500 }, { "epoch": 10.0932043416706, "grad_norm": 0.3144282102584839, "learning_rate": 9.045303336042501e-05, "loss": 0.0067, "step": 85550 }, { "epoch": 10.099103350637092, "grad_norm": 0.017579909414052963, "learning_rate": 9.044211291880407e-05, "loss": 0.008, "step": 85600 }, { "epoch": 10.105002359603587, "grad_norm": 0.022508161142468452, "learning_rate": 9.043118689500578e-05, "loss": 0.0066, "step": 85650 }, { "epoch": 10.11090136857008, "grad_norm": 0.038509346544742584, "learning_rate": 9.042025529053825e-05, "loss": 0.0074, "step": 85700 }, { "epoch": 10.116800377536574, "grad_norm": 0.018126673996448517, "learning_rate": 9.040931810691038e-05, "loss": 0.0072, "step": 85750 }, { "epoch": 10.122699386503067, "grad_norm": 0.12700650095939636, "learning_rate": 9.03983753456318e-05, "loss": 0.0069, "step": 85800 }, { "epoch": 10.128598395469561, "grad_norm": 0.12243181467056274, "learning_rate": 9.038742700821293e-05, "loss": 0.008, "step": 85850 }, { "epoch": 10.134497404436054, "grad_norm": 0.2369328737258911, "learning_rate": 9.037647309616495e-05, "loss": 0.0086, "step": 85900 }, { "epoch": 10.140396413402549, "grad_norm": 0.2955304682254791, "learning_rate": 9.036551361099983e-05, "loss": 0.0096, "step": 85950 }, { "epoch": 10.146295422369041, "grad_norm": 0.09725318849086761, "learning_rate": 9.035454855423026e-05, "loss": 0.0094, "step": 86000 }, { "epoch": 10.146295422369041, "eval_cer": 0.08605851979345955, "eval_loss": 0.0016112204175442457, "eval_runtime": 1.999, "eval_samples_per_second": 50.024, "eval_steps_per_second": 2.001, "eval_wer": 0.27, "step": 86000 }, { "epoch": 10.152194431335536, "grad_norm": 0.1473662555217743, "learning_rate": 9.034357792736978e-05, "loss": 0.0075, "step": 86050 }, { "epoch": 10.158093440302029, "grad_norm": 0.17140986025333405, "learning_rate": 9.033260173193263e-05, "loss": 0.0068, "step": 86100 }, { "epoch": 10.163992449268523, "grad_norm": 0.16747735440731049, "learning_rate": 9.032161996943384e-05, "loss": 0.0075, "step": 86150 }, { "epoch": 10.169891458235016, "grad_norm": 0.08251528441905975, "learning_rate": 9.031063264138922e-05, "loss": 0.0083, "step": 86200 }, { "epoch": 10.17579046720151, "grad_norm": 0.2150559276342392, "learning_rate": 9.029963974931533e-05, "loss": 0.0086, "step": 86250 }, { "epoch": 10.181689476168003, "grad_norm": 0.18258091807365417, "learning_rate": 9.02886412947295e-05, "loss": 0.0084, "step": 86300 }, { "epoch": 10.187588485134498, "grad_norm": 0.15795031189918518, "learning_rate": 9.027763727914984e-05, "loss": 0.0066, "step": 86350 }, { "epoch": 10.19348749410099, "grad_norm": 0.20059174299240112, "learning_rate": 9.026662770409522e-05, "loss": 0.0063, "step": 86400 }, { "epoch": 10.199386503067485, "grad_norm": 0.38747841119766235, "learning_rate": 9.02556125710853e-05, "loss": 0.0093, "step": 86450 }, { "epoch": 10.205285512033978, "grad_norm": 0.03738546743988991, "learning_rate": 9.024459188164045e-05, "loss": 0.0083, "step": 86500 }, { "epoch": 10.211184521000472, "grad_norm": 0.1501014679670334, "learning_rate": 9.023356563728187e-05, "loss": 0.0091, "step": 86550 }, { "epoch": 10.217083529966965, "grad_norm": 0.12627020478248596, "learning_rate": 9.022253383953148e-05, "loss": 0.0076, "step": 86600 }, { "epoch": 10.22298253893346, "grad_norm": 0.03688858821988106, "learning_rate": 9.021149648991199e-05, "loss": 0.0081, "step": 86650 }, { "epoch": 10.228881547899952, "grad_norm": 0.21424569189548492, "learning_rate": 9.020045358994689e-05, "loss": 0.0086, "step": 86700 }, { "epoch": 10.234780556866447, "grad_norm": 0.0848468616604805, "learning_rate": 9.018940514116039e-05, "loss": 0.0077, "step": 86750 }, { "epoch": 10.24067956583294, "grad_norm": 0.23411135375499725, "learning_rate": 9.017835114507754e-05, "loss": 0.0067, "step": 86800 }, { "epoch": 10.246578574799434, "grad_norm": 0.28762805461883545, "learning_rate": 9.016729160322405e-05, "loss": 0.0082, "step": 86850 }, { "epoch": 10.252477583765927, "grad_norm": 0.09936490654945374, "learning_rate": 9.01562265171265e-05, "loss": 0.0084, "step": 86900 }, { "epoch": 10.258376592732422, "grad_norm": 0.22555111348628998, "learning_rate": 9.014515588831217e-05, "loss": 0.0086, "step": 86950 }, { "epoch": 10.264275601698914, "grad_norm": 0.08347238600254059, "learning_rate": 9.013407971830914e-05, "loss": 0.0078, "step": 87000 }, { "epoch": 10.264275601698914, "eval_cer": 0.08777969018932874, "eval_loss": 0.0020086627919226885, "eval_runtime": 2.0256, "eval_samples_per_second": 49.368, "eval_steps_per_second": 1.975, "eval_wer": 0.27, "step": 87000 }, { "epoch": 10.270174610665409, "grad_norm": 0.21742981672286987, "learning_rate": 9.012299800864623e-05, "loss": 0.0099, "step": 87050 }, { "epoch": 10.276073619631902, "grad_norm": 0.2361154854297638, "learning_rate": 9.011191076085304e-05, "loss": 0.0072, "step": 87100 }, { "epoch": 10.281972628598396, "grad_norm": 0.47342073917388916, "learning_rate": 9.010081797645993e-05, "loss": 0.0076, "step": 87150 }, { "epoch": 10.287871637564889, "grad_norm": 0.1311987340450287, "learning_rate": 9.008971965699801e-05, "loss": 0.0084, "step": 87200 }, { "epoch": 10.293770646531383, "grad_norm": 0.1400708258152008, "learning_rate": 9.00786158039992e-05, "loss": 0.0089, "step": 87250 }, { "epoch": 10.299669655497876, "grad_norm": 0.09525229036808014, "learning_rate": 9.006750641899612e-05, "loss": 0.0084, "step": 87300 }, { "epoch": 10.30556866446437, "grad_norm": 0.22207605838775635, "learning_rate": 9.005639150352221e-05, "loss": 0.0098, "step": 87350 }, { "epoch": 10.311467673430863, "grad_norm": 0.4233706295490265, "learning_rate": 9.004527105911162e-05, "loss": 0.0076, "step": 87400 }, { "epoch": 10.317366682397358, "grad_norm": 0.1609366089105606, "learning_rate": 9.003414508729932e-05, "loss": 0.0077, "step": 87450 }, { "epoch": 10.32326569136385, "grad_norm": 0.21235470473766327, "learning_rate": 9.002301358962101e-05, "loss": 0.0105, "step": 87500 }, { "epoch": 10.329164700330345, "grad_norm": 0.1604582518339157, "learning_rate": 9.001187656761313e-05, "loss": 0.0082, "step": 87550 }, { "epoch": 10.335063709296838, "grad_norm": 0.0685640200972557, "learning_rate": 9.000073402281296e-05, "loss": 0.0078, "step": 87600 }, { "epoch": 10.340962718263333, "grad_norm": 0.2049335390329361, "learning_rate": 8.998958595675844e-05, "loss": 0.0072, "step": 87650 }, { "epoch": 10.346861727229825, "grad_norm": 0.05325634405016899, "learning_rate": 8.997843237098835e-05, "loss": 0.007, "step": 87700 }, { "epoch": 10.35276073619632, "grad_norm": 0.13309086859226227, "learning_rate": 8.996727326704222e-05, "loss": 0.0082, "step": 87750 }, { "epoch": 10.358659745162813, "grad_norm": 0.23774096369743347, "learning_rate": 8.99561086464603e-05, "loss": 0.009, "step": 87800 }, { "epoch": 10.364558754129305, "grad_norm": 0.08659426122903824, "learning_rate": 8.994493851078363e-05, "loss": 0.0089, "step": 87850 }, { "epoch": 10.3704577630958, "grad_norm": 0.05627530440688133, "learning_rate": 8.993376286155403e-05, "loss": 0.0064, "step": 87900 }, { "epoch": 10.376356772062293, "grad_norm": 0.11179652065038681, "learning_rate": 8.992258170031407e-05, "loss": 0.0068, "step": 87950 }, { "epoch": 10.382255781028787, "grad_norm": 0.14904999732971191, "learning_rate": 8.991139502860703e-05, "loss": 0.0084, "step": 88000 }, { "epoch": 10.382255781028787, "eval_cer": 0.08519793459552495, "eval_loss": 0.0013132562162354589, "eval_runtime": 2.0365, "eval_samples_per_second": 49.103, "eval_steps_per_second": 1.964, "eval_wer": 0.26, "step": 88000 }, { "epoch": 10.38815478999528, "grad_norm": 0.23244410753250122, "learning_rate": 8.990020284797704e-05, "loss": 0.0084, "step": 88050 }, { "epoch": 10.394053798961775, "grad_norm": 0.15106473863124847, "learning_rate": 8.988900515996892e-05, "loss": 0.0076, "step": 88100 }, { "epoch": 10.399952807928267, "grad_norm": 0.12812289595603943, "learning_rate": 8.987780196612828e-05, "loss": 0.0085, "step": 88150 }, { "epoch": 10.405851816894762, "grad_norm": 0.08941271901130676, "learning_rate": 8.986659326800147e-05, "loss": 0.0073, "step": 88200 }, { "epoch": 10.411750825861255, "grad_norm": 0.05316628888249397, "learning_rate": 8.985537906713563e-05, "loss": 0.0068, "step": 88250 }, { "epoch": 10.417649834827749, "grad_norm": 0.12215692549943924, "learning_rate": 8.984415936507863e-05, "loss": 0.0089, "step": 88300 }, { "epoch": 10.423548843794242, "grad_norm": 0.07210669666528702, "learning_rate": 8.983293416337913e-05, "loss": 0.0078, "step": 88350 }, { "epoch": 10.429447852760736, "grad_norm": 0.25914788246154785, "learning_rate": 8.982170346358651e-05, "loss": 0.0083, "step": 88400 }, { "epoch": 10.43534686172723, "grad_norm": 0.11007030308246613, "learning_rate": 8.981046726725096e-05, "loss": 0.009, "step": 88450 }, { "epoch": 10.441245870693724, "grad_norm": 0.7245011925697327, "learning_rate": 8.979922557592335e-05, "loss": 0.0076, "step": 88500 }, { "epoch": 10.447144879660216, "grad_norm": 0.1435178965330124, "learning_rate": 8.97879783911554e-05, "loss": 0.0081, "step": 88550 }, { "epoch": 10.453043888626711, "grad_norm": 0.267251193523407, "learning_rate": 8.977672571449955e-05, "loss": 0.0072, "step": 88600 }, { "epoch": 10.458942897593204, "grad_norm": 0.7178346514701843, "learning_rate": 8.976546754750896e-05, "loss": 0.0081, "step": 88650 }, { "epoch": 10.464841906559698, "grad_norm": 0.17502722144126892, "learning_rate": 8.975420389173761e-05, "loss": 0.0087, "step": 88700 }, { "epoch": 10.470740915526191, "grad_norm": 0.19917824864387512, "learning_rate": 8.974293474874019e-05, "loss": 0.0091, "step": 88750 }, { "epoch": 10.476639924492686, "grad_norm": 0.09312061965465546, "learning_rate": 8.973166012007217e-05, "loss": 0.0066, "step": 88800 }, { "epoch": 10.482538933459178, "grad_norm": 0.6748299598693848, "learning_rate": 8.972038000728978e-05, "loss": 0.0086, "step": 88850 }, { "epoch": 10.488437942425673, "grad_norm": 0.168801948428154, "learning_rate": 8.970909441195001e-05, "loss": 0.0075, "step": 88900 }, { "epoch": 10.494336951392166, "grad_norm": 0.4834231436252594, "learning_rate": 8.969780333561058e-05, "loss": 0.0095, "step": 88950 }, { "epoch": 10.50023596035866, "grad_norm": 0.1674998253583908, "learning_rate": 8.968650677982998e-05, "loss": 0.0067, "step": 89000 }, { "epoch": 10.50023596035866, "eval_cer": 0.08777969018932874, "eval_loss": 0.004239379893988371, "eval_runtime": 2.0531, "eval_samples_per_second": 48.707, "eval_steps_per_second": 1.948, "eval_wer": 0.27, "step": 89000 }, { "epoch": 10.506134969325153, "grad_norm": 0.12892469763755798, "learning_rate": 8.967520474616749e-05, "loss": 0.0084, "step": 89050 }, { "epoch": 10.512033978291647, "grad_norm": 0.11715924739837646, "learning_rate": 8.966389723618307e-05, "loss": 0.0099, "step": 89100 }, { "epoch": 10.51793298725814, "grad_norm": 0.2499929815530777, "learning_rate": 8.965258425143753e-05, "loss": 0.0076, "step": 89150 }, { "epoch": 10.523831996224635, "grad_norm": 0.12644746899604797, "learning_rate": 8.964126579349236e-05, "loss": 0.0073, "step": 89200 }, { "epoch": 10.529731005191127, "grad_norm": 0.1385444849729538, "learning_rate": 8.962994186390985e-05, "loss": 0.0084, "step": 89250 }, { "epoch": 10.535630014157622, "grad_norm": 0.04404478147625923, "learning_rate": 8.9618612464253e-05, "loss": 0.0083, "step": 89300 }, { "epoch": 10.541529023124115, "grad_norm": 0.11276499181985855, "learning_rate": 8.960727759608564e-05, "loss": 0.0096, "step": 89350 }, { "epoch": 10.54742803209061, "grad_norm": 0.08079201728105545, "learning_rate": 8.959593726097227e-05, "loss": 0.0081, "step": 89400 }, { "epoch": 10.553327041057102, "grad_norm": 0.17065133154392242, "learning_rate": 8.958459146047818e-05, "loss": 0.0084, "step": 89450 }, { "epoch": 10.559226050023597, "grad_norm": 0.1745220571756363, "learning_rate": 8.957324019616945e-05, "loss": 0.0077, "step": 89500 }, { "epoch": 10.56512505899009, "grad_norm": 0.12183694541454315, "learning_rate": 8.956188346961285e-05, "loss": 0.009, "step": 89550 }, { "epoch": 10.571024067956584, "grad_norm": 0.12208889424800873, "learning_rate": 8.955052128237597e-05, "loss": 0.0082, "step": 89600 }, { "epoch": 10.576923076923077, "grad_norm": 0.15957722067832947, "learning_rate": 8.953915363602708e-05, "loss": 0.008, "step": 89650 }, { "epoch": 10.582822085889571, "grad_norm": 0.16518385708332062, "learning_rate": 8.952778053213526e-05, "loss": 0.0095, "step": 89700 }, { "epoch": 10.588721094856064, "grad_norm": 0.09078332036733627, "learning_rate": 8.951640197227034e-05, "loss": 0.0097, "step": 89750 }, { "epoch": 10.594620103822558, "grad_norm": 0.1275145262479782, "learning_rate": 8.950501795800287e-05, "loss": 0.0081, "step": 89800 }, { "epoch": 10.600519112789051, "grad_norm": 0.2653827965259552, "learning_rate": 8.94936284909042e-05, "loss": 0.0079, "step": 89850 }, { "epoch": 10.606418121755546, "grad_norm": 0.20556998252868652, "learning_rate": 8.948223357254636e-05, "loss": 0.0084, "step": 89900 }, { "epoch": 10.612317130722039, "grad_norm": 0.19698898494243622, "learning_rate": 8.94708332045022e-05, "loss": 0.0082, "step": 89950 }, { "epoch": 10.618216139688533, "grad_norm": 0.11328552663326263, "learning_rate": 8.945942738834532e-05, "loss": 0.009, "step": 90000 }, { "epoch": 10.618216139688533, "eval_cer": 0.08777969018932874, "eval_loss": 0.0034844179172068834, "eval_runtime": 2.0453, "eval_samples_per_second": 48.892, "eval_steps_per_second": 1.956, "eval_wer": 0.27, "step": 90000 }, { "epoch": 10.624115148655026, "grad_norm": 0.14406660199165344, "learning_rate": 8.944801612565001e-05, "loss": 0.0088, "step": 90050 }, { "epoch": 10.63001415762152, "grad_norm": 0.4490949809551239, "learning_rate": 8.94365994179914e-05, "loss": 0.0086, "step": 90100 }, { "epoch": 10.635913166588013, "grad_norm": 0.13656523823738098, "learning_rate": 8.942517726694528e-05, "loss": 0.0073, "step": 90150 }, { "epoch": 10.641812175554508, "grad_norm": 0.09837698191404343, "learning_rate": 8.941374967408826e-05, "loss": 0.0085, "step": 90200 }, { "epoch": 10.647711184521, "grad_norm": 0.18987469375133514, "learning_rate": 8.940231664099769e-05, "loss": 0.0073, "step": 90250 }, { "epoch": 10.653610193487495, "grad_norm": 0.15707004070281982, "learning_rate": 8.939087816925163e-05, "loss": 0.0072, "step": 90300 }, { "epoch": 10.659509202453988, "grad_norm": 0.143109530210495, "learning_rate": 8.937943426042891e-05, "loss": 0.0081, "step": 90350 }, { "epoch": 10.66540821142048, "grad_norm": 0.19445571303367615, "learning_rate": 8.936798491610916e-05, "loss": 0.0072, "step": 90400 }, { "epoch": 10.671307220386975, "grad_norm": 0.23063351213932037, "learning_rate": 8.93565301378727e-05, "loss": 0.0074, "step": 90450 }, { "epoch": 10.67720622935347, "grad_norm": 0.1740354299545288, "learning_rate": 8.934506992730062e-05, "loss": 0.0072, "step": 90500 }, { "epoch": 10.683105238319962, "grad_norm": 0.09137275069952011, "learning_rate": 8.933360428597475e-05, "loss": 0.0075, "step": 90550 }, { "epoch": 10.689004247286455, "grad_norm": 0.3518984317779541, "learning_rate": 8.93221332154777e-05, "loss": 0.0092, "step": 90600 }, { "epoch": 10.69490325625295, "grad_norm": 0.25437724590301514, "learning_rate": 8.931065671739278e-05, "loss": 0.0083, "step": 90650 }, { "epoch": 10.700802265219442, "grad_norm": 0.09133987873792648, "learning_rate": 8.92991747933041e-05, "loss": 0.0096, "step": 90700 }, { "epoch": 10.706701274185937, "grad_norm": 0.17840184271335602, "learning_rate": 8.928768744479648e-05, "loss": 0.0102, "step": 90750 }, { "epoch": 10.71260028315243, "grad_norm": 0.11323760449886322, "learning_rate": 8.927619467345554e-05, "loss": 0.0087, "step": 90800 }, { "epoch": 10.718499292118924, "grad_norm": 0.2009395956993103, "learning_rate": 8.926469648086757e-05, "loss": 0.0063, "step": 90850 }, { "epoch": 10.724398301085417, "grad_norm": 0.13909682631492615, "learning_rate": 8.925319286861966e-05, "loss": 0.0081, "step": 90900 }, { "epoch": 10.730297310051911, "grad_norm": 0.22949685156345367, "learning_rate": 8.924168383829967e-05, "loss": 0.0082, "step": 90950 }, { "epoch": 10.736196319018404, "grad_norm": 0.07126981019973755, "learning_rate": 8.923016939149615e-05, "loss": 0.0075, "step": 91000 }, { "epoch": 10.736196319018404, "eval_cer": 0.09122203098106713, "eval_loss": 0.003698958083987236, "eval_runtime": 2.0429, "eval_samples_per_second": 48.95, "eval_steps_per_second": 1.958, "eval_wer": 0.27, "step": 91000 }, { "epoch": 10.742095327984899, "grad_norm": 0.018996400758624077, "learning_rate": 8.921864952979843e-05, "loss": 0.0065, "step": 91050 }, { "epoch": 10.747994336951392, "grad_norm": 0.05553293600678444, "learning_rate": 8.92071242547966e-05, "loss": 0.0089, "step": 91100 }, { "epoch": 10.753893345917886, "grad_norm": 0.13726837933063507, "learning_rate": 8.919559356808148e-05, "loss": 0.0087, "step": 91150 }, { "epoch": 10.759792354884379, "grad_norm": 0.3327675759792328, "learning_rate": 8.918405747124459e-05, "loss": 0.008, "step": 91200 }, { "epoch": 10.765691363850873, "grad_norm": 0.17878739535808563, "learning_rate": 8.91725159658783e-05, "loss": 0.0092, "step": 91250 }, { "epoch": 10.771590372817366, "grad_norm": 0.11160699278116226, "learning_rate": 8.916096905357563e-05, "loss": 0.007, "step": 91300 }, { "epoch": 10.77748938178386, "grad_norm": 0.2072824239730835, "learning_rate": 8.914941673593043e-05, "loss": 0.008, "step": 91350 }, { "epoch": 10.783388390750353, "grad_norm": 0.15825320780277252, "learning_rate": 8.913785901453722e-05, "loss": 0.0081, "step": 91400 }, { "epoch": 10.789287399716848, "grad_norm": 0.12629768252372742, "learning_rate": 8.912629589099129e-05, "loss": 0.0077, "step": 91450 }, { "epoch": 10.79518640868334, "grad_norm": 0.12554754316806793, "learning_rate": 8.911472736688871e-05, "loss": 0.0071, "step": 91500 }, { "epoch": 10.801085417649835, "grad_norm": 0.05499526858329773, "learning_rate": 8.910315344382627e-05, "loss": 0.0075, "step": 91550 }, { "epoch": 10.806984426616328, "grad_norm": 0.11749812960624695, "learning_rate": 8.90915741234015e-05, "loss": 0.0078, "step": 91600 }, { "epoch": 10.812883435582823, "grad_norm": 0.14468324184417725, "learning_rate": 8.907998940721266e-05, "loss": 0.0084, "step": 91650 }, { "epoch": 10.818782444549315, "grad_norm": 0.17322275042533875, "learning_rate": 8.90683992968588e-05, "loss": 0.0082, "step": 91700 }, { "epoch": 10.82468145351581, "grad_norm": 0.17559795081615448, "learning_rate": 8.905680379393967e-05, "loss": 0.0065, "step": 91750 }, { "epoch": 10.830580462482303, "grad_norm": 0.25785455107688904, "learning_rate": 8.904520290005582e-05, "loss": 0.0085, "step": 91800 }, { "epoch": 10.836479471448797, "grad_norm": 0.14679807424545288, "learning_rate": 8.903359661680846e-05, "loss": 0.0083, "step": 91850 }, { "epoch": 10.84237848041529, "grad_norm": 0.14396773278713226, "learning_rate": 8.902198494579962e-05, "loss": 0.009, "step": 91900 }, { "epoch": 10.848277489381784, "grad_norm": 0.04407093673944473, "learning_rate": 8.901036788863204e-05, "loss": 0.0067, "step": 91950 }, { "epoch": 10.854176498348277, "grad_norm": 0.10306646674871445, "learning_rate": 8.89987454469092e-05, "loss": 0.0092, "step": 92000 }, { "epoch": 10.854176498348277, "eval_cer": 0.08864027538726334, "eval_loss": 0.0029097695369273424, "eval_runtime": 2.1123, "eval_samples_per_second": 47.341, "eval_steps_per_second": 1.894, "eval_wer": 0.27, "step": 92000 }, { "epoch": 10.860075507314772, "grad_norm": 0.05273793637752533, "learning_rate": 8.898711762223534e-05, "loss": 0.0081, "step": 92050 }, { "epoch": 10.865974516281264, "grad_norm": 0.34742650389671326, "learning_rate": 8.897548441621544e-05, "loss": 0.0095, "step": 92100 }, { "epoch": 10.871873525247759, "grad_norm": 0.06256134063005447, "learning_rate": 8.89638458304552e-05, "loss": 0.0101, "step": 92150 }, { "epoch": 10.877772534214252, "grad_norm": 0.18760384619235992, "learning_rate": 8.895220186656112e-05, "loss": 0.0104, "step": 92200 }, { "epoch": 10.883671543180746, "grad_norm": 0.24278868734836578, "learning_rate": 8.894055252614036e-05, "loss": 0.0089, "step": 92250 }, { "epoch": 10.889570552147239, "grad_norm": 0.17006656527519226, "learning_rate": 8.892889781080087e-05, "loss": 0.0088, "step": 92300 }, { "epoch": 10.895469561113734, "grad_norm": 0.0546855702996254, "learning_rate": 8.891723772215135e-05, "loss": 0.0091, "step": 92350 }, { "epoch": 10.901368570080226, "grad_norm": 0.10995038598775864, "learning_rate": 8.890557226180122e-05, "loss": 0.0078, "step": 92400 }, { "epoch": 10.90726757904672, "grad_norm": 0.3142499029636383, "learning_rate": 8.889390143136067e-05, "loss": 0.0094, "step": 92450 }, { "epoch": 10.913166588013214, "grad_norm": 0.27311328053474426, "learning_rate": 8.888222523244059e-05, "loss": 0.0088, "step": 92500 }, { "epoch": 10.919065596979708, "grad_norm": 0.15844863653182983, "learning_rate": 8.887054366665264e-05, "loss": 0.0071, "step": 92550 }, { "epoch": 10.924964605946201, "grad_norm": 0.07931257039308548, "learning_rate": 8.885885673560921e-05, "loss": 0.0074, "step": 92600 }, { "epoch": 10.930863614912695, "grad_norm": 0.1507597267627716, "learning_rate": 8.884716444092343e-05, "loss": 0.0074, "step": 92650 }, { "epoch": 10.936762623879188, "grad_norm": 0.3518977165222168, "learning_rate": 8.883546678420917e-05, "loss": 0.0078, "step": 92700 }, { "epoch": 10.942661632845683, "grad_norm": 0.12836192548274994, "learning_rate": 8.882376376708106e-05, "loss": 0.0081, "step": 92750 }, { "epoch": 10.948560641812175, "grad_norm": 0.06633894145488739, "learning_rate": 8.881205539115445e-05, "loss": 0.0079, "step": 92800 }, { "epoch": 10.95445965077867, "grad_norm": 0.07375039905309677, "learning_rate": 8.880034165804543e-05, "loss": 0.0081, "step": 92850 }, { "epoch": 10.960358659745163, "grad_norm": 0.14927959442138672, "learning_rate": 8.878862256937082e-05, "loss": 0.0093, "step": 92900 }, { "epoch": 10.966257668711656, "grad_norm": 0.12635183334350586, "learning_rate": 8.87768981267482e-05, "loss": 0.009, "step": 92950 }, { "epoch": 10.97215667767815, "grad_norm": 0.12633761763572693, "learning_rate": 8.876516833179588e-05, "loss": 0.0081, "step": 93000 }, { "epoch": 10.97215667767815, "eval_cer": 0.08777969018932874, "eval_loss": 0.0017400733195245266, "eval_runtime": 2.0261, "eval_samples_per_second": 49.356, "eval_steps_per_second": 1.974, "eval_wer": 0.27, "step": 93000 }, { "epoch": 10.978055686644645, "grad_norm": 0.17444540560245514, "learning_rate": 8.875343318613294e-05, "loss": 0.0091, "step": 93050 }, { "epoch": 10.983954695611137, "grad_norm": 0.07203218340873718, "learning_rate": 8.874169269137913e-05, "loss": 0.008, "step": 93100 }, { "epoch": 10.98985370457763, "grad_norm": 0.27249953150749207, "learning_rate": 8.872994684915499e-05, "loss": 0.0069, "step": 93150 }, { "epoch": 10.995752713544125, "grad_norm": 0.10603173077106476, "learning_rate": 8.871819566108176e-05, "loss": 0.0077, "step": 93200 }, { "epoch": 11.001651722510617, "grad_norm": 0.014003458432853222, "learning_rate": 8.87064391287815e-05, "loss": 0.0084, "step": 93250 }, { "epoch": 11.007550731477112, "grad_norm": 0.22153331339359283, "learning_rate": 8.86946772538769e-05, "loss": 0.0075, "step": 93300 }, { "epoch": 11.013449740443605, "grad_norm": 0.10346460342407227, "learning_rate": 8.868291003799147e-05, "loss": 0.006, "step": 93350 }, { "epoch": 11.0193487494101, "grad_norm": 0.23537111282348633, "learning_rate": 8.86711374827494e-05, "loss": 0.0063, "step": 93400 }, { "epoch": 11.025247758376592, "grad_norm": 0.2783186137676239, "learning_rate": 8.865935958977565e-05, "loss": 0.0073, "step": 93450 }, { "epoch": 11.031146767343087, "grad_norm": 0.15080046653747559, "learning_rate": 8.864757636069591e-05, "loss": 0.0059, "step": 93500 }, { "epoch": 11.03704577630958, "grad_norm": 0.1696406453847885, "learning_rate": 8.863578779713662e-05, "loss": 0.0068, "step": 93550 }, { "epoch": 11.042944785276074, "grad_norm": 0.15676181018352509, "learning_rate": 8.862399390072491e-05, "loss": 0.0075, "step": 93600 }, { "epoch": 11.048843794242567, "grad_norm": 0.16721056401729584, "learning_rate": 8.86121946730887e-05, "loss": 0.0088, "step": 93650 }, { "epoch": 11.054742803209061, "grad_norm": 0.0902264416217804, "learning_rate": 8.860039011585661e-05, "loss": 0.0074, "step": 93700 }, { "epoch": 11.060641812175554, "grad_norm": 0.11809127777814865, "learning_rate": 8.858858023065803e-05, "loss": 0.0074, "step": 93750 }, { "epoch": 11.066540821142048, "grad_norm": 0.16813279688358307, "learning_rate": 8.857676501912306e-05, "loss": 0.0063, "step": 93800 }, { "epoch": 11.072439830108541, "grad_norm": 0.20335936546325684, "learning_rate": 8.856494448288252e-05, "loss": 0.0056, "step": 93850 }, { "epoch": 11.078338839075036, "grad_norm": 0.1632988154888153, "learning_rate": 8.855311862356799e-05, "loss": 0.0068, "step": 93900 }, { "epoch": 11.084237848041528, "grad_norm": 0.055057547986507416, "learning_rate": 8.85412874428118e-05, "loss": 0.0081, "step": 93950 }, { "epoch": 11.090136857008023, "grad_norm": 0.07485587894916534, "learning_rate": 8.852945094224697e-05, "loss": 0.0082, "step": 94000 }, { "epoch": 11.090136857008023, "eval_cer": 0.08691910499139414, "eval_loss": 0.0033507596235722303, "eval_runtime": 2.0979, "eval_samples_per_second": 47.667, "eval_steps_per_second": 1.907, "eval_wer": 0.27, "step": 94000 }, { "epoch": 11.096035865974516, "grad_norm": 0.06728523224592209, "learning_rate": 8.851760912350731e-05, "loss": 0.0061, "step": 94050 }, { "epoch": 11.10193487494101, "grad_norm": 0.04665340855717659, "learning_rate": 8.850576198822728e-05, "loss": 0.0065, "step": 94100 }, { "epoch": 11.107833883907503, "grad_norm": 0.2631240487098694, "learning_rate": 8.849390953804218e-05, "loss": 0.0064, "step": 94150 }, { "epoch": 11.113732892873998, "grad_norm": 0.1570916622877121, "learning_rate": 8.848205177458795e-05, "loss": 0.0054, "step": 94200 }, { "epoch": 11.11963190184049, "grad_norm": 0.14106863737106323, "learning_rate": 8.847018869950132e-05, "loss": 0.0074, "step": 94250 }, { "epoch": 11.125530910806985, "grad_norm": 0.22996102273464203, "learning_rate": 8.845832031441975e-05, "loss": 0.0088, "step": 94300 }, { "epoch": 11.131429919773478, "grad_norm": 0.08991673588752747, "learning_rate": 8.844644662098141e-05, "loss": 0.0072, "step": 94350 }, { "epoch": 11.137328928739972, "grad_norm": 0.17150169610977173, "learning_rate": 8.843456762082518e-05, "loss": 0.0074, "step": 94400 }, { "epoch": 11.143227937706465, "grad_norm": 0.871953010559082, "learning_rate": 8.842268331559076e-05, "loss": 0.0087, "step": 94450 }, { "epoch": 11.14912694667296, "grad_norm": 0.022399580106139183, "learning_rate": 8.841079370691848e-05, "loss": 0.0065, "step": 94500 }, { "epoch": 11.155025955639452, "grad_norm": 0.12539680302143097, "learning_rate": 8.839889879644947e-05, "loss": 0.0076, "step": 94550 }, { "epoch": 11.160924964605947, "grad_norm": 0.026793580502271652, "learning_rate": 8.838699858582558e-05, "loss": 0.0089, "step": 94600 }, { "epoch": 11.16682397357244, "grad_norm": 0.11034753918647766, "learning_rate": 8.837509307668936e-05, "loss": 0.0081, "step": 94650 }, { "epoch": 11.172722982538934, "grad_norm": 0.1461506187915802, "learning_rate": 8.836318227068414e-05, "loss": 0.0076, "step": 94700 }, { "epoch": 11.178621991505427, "grad_norm": 0.22639822959899902, "learning_rate": 8.83512661694539e-05, "loss": 0.0074, "step": 94750 }, { "epoch": 11.184521000471921, "grad_norm": 0.08421792089939117, "learning_rate": 8.833934477464347e-05, "loss": 0.0066, "step": 94800 }, { "epoch": 11.190420009438414, "grad_norm": 0.1823163479566574, "learning_rate": 8.832741808789832e-05, "loss": 0.0092, "step": 94850 }, { "epoch": 11.196319018404909, "grad_norm": 0.04586123302578926, "learning_rate": 8.831548611086468e-05, "loss": 0.008, "step": 94900 }, { "epoch": 11.202218027371401, "grad_norm": 0.2177780717611313, "learning_rate": 8.83035488451895e-05, "loss": 0.0085, "step": 94950 }, { "epoch": 11.208117036337896, "grad_norm": 0.0886489748954773, "learning_rate": 8.829160629252046e-05, "loss": 0.0071, "step": 95000 }, { "epoch": 11.208117036337896, "eval_cer": 0.08691910499139414, "eval_loss": 0.0030962424352765083, "eval_runtime": 2.0519, "eval_samples_per_second": 48.734, "eval_steps_per_second": 1.949, "eval_wer": 0.27, "step": 95000 }, { "epoch": 11.214016045304389, "grad_norm": 0.12798890471458435, "learning_rate": 8.8279658454506e-05, "loss": 0.0071, "step": 95050 }, { "epoch": 11.219915054270883, "grad_norm": 0.1734216809272766, "learning_rate": 8.826770533279522e-05, "loss": 0.0069, "step": 95100 }, { "epoch": 11.225814063237376, "grad_norm": 0.044934000819921494, "learning_rate": 8.825574692903806e-05, "loss": 0.0077, "step": 95150 }, { "epoch": 11.23171307220387, "grad_norm": 0.05021200329065323, "learning_rate": 8.824378324488509e-05, "loss": 0.0063, "step": 95200 }, { "epoch": 11.237612081170363, "grad_norm": 0.15017221868038177, "learning_rate": 8.823181428198764e-05, "loss": 0.0065, "step": 95250 }, { "epoch": 11.243511090136858, "grad_norm": 0.20410920679569244, "learning_rate": 8.821984004199777e-05, "loss": 0.0077, "step": 95300 }, { "epoch": 11.24941009910335, "grad_norm": 0.060406796634197235, "learning_rate": 8.820786052656829e-05, "loss": 0.0071, "step": 95350 }, { "epoch": 11.255309108069845, "grad_norm": 0.20935939252376556, "learning_rate": 8.819587573735269e-05, "loss": 0.0071, "step": 95400 }, { "epoch": 11.261208117036338, "grad_norm": 0.31635335087776184, "learning_rate": 8.818388567600524e-05, "loss": 0.0062, "step": 95450 }, { "epoch": 11.267107126002832, "grad_norm": 0.0834333673119545, "learning_rate": 8.81718903441809e-05, "loss": 0.0078, "step": 95500 }, { "epoch": 11.273006134969325, "grad_norm": 0.10991941392421722, "learning_rate": 8.81598897435354e-05, "loss": 0.0073, "step": 95550 }, { "epoch": 11.27890514393582, "grad_norm": 0.10278336703777313, "learning_rate": 8.814788387572513e-05, "loss": 0.0079, "step": 95600 }, { "epoch": 11.284804152902312, "grad_norm": 0.1909763067960739, "learning_rate": 8.813587274240727e-05, "loss": 0.0092, "step": 95650 }, { "epoch": 11.290703161868805, "grad_norm": 0.0994473397731781, "learning_rate": 8.812385634523967e-05, "loss": 0.0069, "step": 95700 }, { "epoch": 11.2966021708353, "grad_norm": 0.2115398645401001, "learning_rate": 8.811183468588098e-05, "loss": 0.0085, "step": 95750 }, { "epoch": 11.302501179801792, "grad_norm": 0.1401805728673935, "learning_rate": 8.809980776599054e-05, "loss": 0.0086, "step": 95800 }, { "epoch": 11.308400188768287, "grad_norm": 0.1845487803220749, "learning_rate": 8.808777558722836e-05, "loss": 0.0081, "step": 95850 }, { "epoch": 11.31429919773478, "grad_norm": 0.14485234022140503, "learning_rate": 8.807573815125527e-05, "loss": 0.0091, "step": 95900 }, { "epoch": 11.320198206701274, "grad_norm": 0.07542978972196579, "learning_rate": 8.806369545973279e-05, "loss": 0.0084, "step": 95950 }, { "epoch": 11.326097215667767, "grad_norm": 0.06590378284454346, "learning_rate": 8.805164751432313e-05, "loss": 0.0073, "step": 96000 }, { "epoch": 11.326097215667767, "eval_cer": 0.08691910499139414, "eval_loss": 0.0027830915059894323, "eval_runtime": 2.0829, "eval_samples_per_second": 48.011, "eval_steps_per_second": 1.92, "eval_wer": 0.27, "step": 96000 }, { "epoch": 11.331996224634262, "grad_norm": 0.05972849577665329, "learning_rate": 8.803959431668925e-05, "loss": 0.0067, "step": 96050 }, { "epoch": 11.337895233600754, "grad_norm": 0.12037906795740128, "learning_rate": 8.802753586849486e-05, "loss": 0.0064, "step": 96100 }, { "epoch": 11.343794242567249, "grad_norm": 0.2243398129940033, "learning_rate": 8.801547217140438e-05, "loss": 0.007, "step": 96150 }, { "epoch": 11.349693251533742, "grad_norm": 0.20356541872024536, "learning_rate": 8.800340322708291e-05, "loss": 0.0067, "step": 96200 }, { "epoch": 11.355592260500236, "grad_norm": 0.2166256606578827, "learning_rate": 8.799132903719635e-05, "loss": 0.0067, "step": 96250 }, { "epoch": 11.361491269466729, "grad_norm": 0.235858753323555, "learning_rate": 8.797924960341128e-05, "loss": 0.008, "step": 96300 }, { "epoch": 11.367390278433223, "grad_norm": 0.08526378870010376, "learning_rate": 8.7967164927395e-05, "loss": 0.0082, "step": 96350 }, { "epoch": 11.373289287399716, "grad_norm": 0.10236388444900513, "learning_rate": 8.795507501081555e-05, "loss": 0.0069, "step": 96400 }, { "epoch": 11.37918829636621, "grad_norm": 0.20535646378993988, "learning_rate": 8.794297985534167e-05, "loss": 0.0084, "step": 96450 }, { "epoch": 11.385087305332704, "grad_norm": 0.14510419964790344, "learning_rate": 8.793087946264287e-05, "loss": 0.0077, "step": 96500 }, { "epoch": 11.390986314299198, "grad_norm": 0.04044996201992035, "learning_rate": 8.791877383438933e-05, "loss": 0.009, "step": 96550 }, { "epoch": 11.39688532326569, "grad_norm": 0.21724680066108704, "learning_rate": 8.790666297225196e-05, "loss": 0.0063, "step": 96600 }, { "epoch": 11.402784332232185, "grad_norm": 0.12388258427381516, "learning_rate": 8.789454687790246e-05, "loss": 0.0072, "step": 96650 }, { "epoch": 11.408683341198678, "grad_norm": 0.24979831278324127, "learning_rate": 8.788242555301315e-05, "loss": 0.0085, "step": 96700 }, { "epoch": 11.414582350165173, "grad_norm": 0.18225950002670288, "learning_rate": 8.787029899925717e-05, "loss": 0.0105, "step": 96750 }, { "epoch": 11.420481359131665, "grad_norm": 0.016844524070620537, "learning_rate": 8.785816721830829e-05, "loss": 0.0072, "step": 96800 }, { "epoch": 11.42638036809816, "grad_norm": 0.1336376965045929, "learning_rate": 8.784603021184106e-05, "loss": 0.0077, "step": 96850 }, { "epoch": 11.432279377064653, "grad_norm": 0.127128005027771, "learning_rate": 8.783388798153074e-05, "loss": 0.0077, "step": 96900 }, { "epoch": 11.438178386031147, "grad_norm": 0.08819430321455002, "learning_rate": 8.782174052905332e-05, "loss": 0.0074, "step": 96950 }, { "epoch": 11.44407739499764, "grad_norm": 0.14740097522735596, "learning_rate": 8.780958785608546e-05, "loss": 0.0075, "step": 97000 }, { "epoch": 11.44407739499764, "eval_cer": 0.09036144578313253, "eval_loss": 0.004088588524609804, "eval_runtime": 2.0362, "eval_samples_per_second": 49.112, "eval_steps_per_second": 1.964, "eval_wer": 0.28, "step": 97000 }, { "epoch": 11.449976403964135, "grad_norm": 0.11942560225725174, "learning_rate": 8.779742996430462e-05, "loss": 0.0073, "step": 97050 }, { "epoch": 11.455875412930627, "grad_norm": 0.09031999111175537, "learning_rate": 8.778526685538893e-05, "loss": 0.0065, "step": 97100 }, { "epoch": 11.461774421897122, "grad_norm": 0.06064886599779129, "learning_rate": 8.777309853101724e-05, "loss": 0.0079, "step": 97150 }, { "epoch": 11.467673430863615, "grad_norm": 0.23460987210273743, "learning_rate": 8.776092499286912e-05, "loss": 0.0071, "step": 97200 }, { "epoch": 11.473572439830109, "grad_norm": 0.17366042733192444, "learning_rate": 8.77487462426249e-05, "loss": 0.0081, "step": 97250 }, { "epoch": 11.479471448796602, "grad_norm": 0.13195037841796875, "learning_rate": 8.773656228196558e-05, "loss": 0.0075, "step": 97300 }, { "epoch": 11.485370457763096, "grad_norm": 0.2754536271095276, "learning_rate": 8.772437311257289e-05, "loss": 0.0064, "step": 97350 }, { "epoch": 11.49126946672959, "grad_norm": 0.2071598619222641, "learning_rate": 8.77121787361293e-05, "loss": 0.0056, "step": 97400 }, { "epoch": 11.497168475696084, "grad_norm": 0.2104528695344925, "learning_rate": 8.769997915431798e-05, "loss": 0.0083, "step": 97450 }, { "epoch": 11.503067484662576, "grad_norm": 0.09873072057962418, "learning_rate": 8.768777436882282e-05, "loss": 0.007, "step": 97500 }, { "epoch": 11.508966493629071, "grad_norm": 0.014137706719338894, "learning_rate": 8.767556438132845e-05, "loss": 0.0064, "step": 97550 }, { "epoch": 11.514865502595564, "grad_norm": 0.06526192277669907, "learning_rate": 8.766334919352017e-05, "loss": 0.0081, "step": 97600 }, { "epoch": 11.520764511562058, "grad_norm": 0.15760919451713562, "learning_rate": 8.765112880708406e-05, "loss": 0.0067, "step": 97650 }, { "epoch": 11.526663520528551, "grad_norm": 0.26734450459480286, "learning_rate": 8.763890322370686e-05, "loss": 0.0075, "step": 97700 }, { "epoch": 11.532562529495046, "grad_norm": 0.15060757100582123, "learning_rate": 8.762667244507608e-05, "loss": 0.0063, "step": 97750 }, { "epoch": 11.538461538461538, "grad_norm": 0.3089323937892914, "learning_rate": 8.761443647287988e-05, "loss": 0.0088, "step": 97800 }, { "epoch": 11.544360547428033, "grad_norm": 0.25196516513824463, "learning_rate": 8.760219530880722e-05, "loss": 0.0082, "step": 97850 }, { "epoch": 11.550259556394526, "grad_norm": 0.055636052042245865, "learning_rate": 8.75899489545477e-05, "loss": 0.0063, "step": 97900 }, { "epoch": 11.55615856536102, "grad_norm": 0.2658281922340393, "learning_rate": 8.757769741179169e-05, "loss": 0.0075, "step": 97950 }, { "epoch": 11.562057574327513, "grad_norm": 0.10172140598297119, "learning_rate": 8.756544068223027e-05, "loss": 0.007, "step": 98000 }, { "epoch": 11.562057574327513, "eval_cer": 0.08519793459552495, "eval_loss": 0.001702439389191568, "eval_runtime": 2.0477, "eval_samples_per_second": 48.836, "eval_steps_per_second": 1.953, "eval_wer": 0.26, "step": 98000 }, { "epoch": 11.567956583294006, "grad_norm": 0.19657890498638153, "learning_rate": 8.755317876755518e-05, "loss": 0.0075, "step": 98050 }, { "epoch": 11.5738555922605, "grad_norm": 0.1614706814289093, "learning_rate": 8.754091166945895e-05, "loss": 0.0082, "step": 98100 }, { "epoch": 11.579754601226995, "grad_norm": 0.017904561012983322, "learning_rate": 8.752863938963478e-05, "loss": 0.007, "step": 98150 }, { "epoch": 11.585653610193487, "grad_norm": 0.41376060247421265, "learning_rate": 8.751636192977659e-05, "loss": 0.0087, "step": 98200 }, { "epoch": 11.59155261915998, "grad_norm": 0.24715644121170044, "learning_rate": 8.750407929157905e-05, "loss": 0.007, "step": 98250 }, { "epoch": 11.597451628126475, "grad_norm": 0.1847107708454132, "learning_rate": 8.74917914767375e-05, "loss": 0.0078, "step": 98300 }, { "epoch": 11.60335063709297, "grad_norm": 0.24897797405719757, "learning_rate": 8.7479498486948e-05, "loss": 0.0081, "step": 98350 }, { "epoch": 11.609249646059462, "grad_norm": 0.14734652638435364, "learning_rate": 8.746720032390737e-05, "loss": 0.007, "step": 98400 }, { "epoch": 11.615148655025955, "grad_norm": 0.14414750039577484, "learning_rate": 8.74548969893131e-05, "loss": 0.0079, "step": 98450 }, { "epoch": 11.62104766399245, "grad_norm": 0.0783819779753685, "learning_rate": 8.744258848486337e-05, "loss": 0.0079, "step": 98500 }, { "epoch": 11.626946672958942, "grad_norm": 0.08586391806602478, "learning_rate": 8.743027481225716e-05, "loss": 0.0055, "step": 98550 }, { "epoch": 11.632845681925437, "grad_norm": 0.24546636641025543, "learning_rate": 8.741795597319409e-05, "loss": 0.0096, "step": 98600 }, { "epoch": 11.63874469089193, "grad_norm": 0.1435178816318512, "learning_rate": 8.74056319693745e-05, "loss": 0.0075, "step": 98650 }, { "epoch": 11.644643699858424, "grad_norm": 0.5370368361473083, "learning_rate": 8.739330280249949e-05, "loss": 0.0081, "step": 98700 }, { "epoch": 11.650542708824917, "grad_norm": 0.4124765992164612, "learning_rate": 8.73809684742708e-05, "loss": 0.0076, "step": 98750 }, { "epoch": 11.656441717791411, "grad_norm": 0.1764090210199356, "learning_rate": 8.736862898639096e-05, "loss": 0.0076, "step": 98800 }, { "epoch": 11.662340726757904, "grad_norm": 0.22164978086948395, "learning_rate": 8.735628434056316e-05, "loss": 0.0084, "step": 98850 }, { "epoch": 11.668239735724399, "grad_norm": 0.12077748030424118, "learning_rate": 8.73439345384913e-05, "loss": 0.008, "step": 98900 }, { "epoch": 11.674138744690891, "grad_norm": 0.38006260991096497, "learning_rate": 8.733157958188003e-05, "loss": 0.0087, "step": 98950 }, { "epoch": 11.680037753657386, "grad_norm": 0.1288873255252838, "learning_rate": 8.731921947243469e-05, "loss": 0.0084, "step": 99000 }, { "epoch": 11.680037753657386, "eval_cer": 0.08605851979345955, "eval_loss": 0.0019495668821036816, "eval_runtime": 2.0286, "eval_samples_per_second": 49.295, "eval_steps_per_second": 1.972, "eval_wer": 0.27, "step": 99000 }, { "epoch": 11.685936762623879, "grad_norm": 0.18233485519886017, "learning_rate": 8.730685421186133e-05, "loss": 0.0084, "step": 99050 }, { "epoch": 11.691835771590373, "grad_norm": 0.12615853548049927, "learning_rate": 8.729448380186669e-05, "loss": 0.0065, "step": 99100 }, { "epoch": 11.697734780556866, "grad_norm": 0.21812477707862854, "learning_rate": 8.728210824415827e-05, "loss": 0.0072, "step": 99150 }, { "epoch": 11.70363378952336, "grad_norm": 0.1088656559586525, "learning_rate": 8.726972754044427e-05, "loss": 0.0084, "step": 99200 }, { "epoch": 11.709532798489853, "grad_norm": 0.10885775834321976, "learning_rate": 8.725734169243354e-05, "loss": 0.0068, "step": 99250 }, { "epoch": 11.715431807456348, "grad_norm": 0.12128423899412155, "learning_rate": 8.724495070183572e-05, "loss": 0.0078, "step": 99300 }, { "epoch": 11.72133081642284, "grad_norm": 0.18469735980033875, "learning_rate": 8.723255457036108e-05, "loss": 0.0085, "step": 99350 }, { "epoch": 11.727229825389335, "grad_norm": 0.3551676273345947, "learning_rate": 8.722015329972069e-05, "loss": 0.0075, "step": 99400 }, { "epoch": 11.733128834355828, "grad_norm": 0.40557342767715454, "learning_rate": 8.720774689162626e-05, "loss": 0.007, "step": 99450 }, { "epoch": 11.739027843322322, "grad_norm": 0.2182224690914154, "learning_rate": 8.719533534779023e-05, "loss": 0.0083, "step": 99500 }, { "epoch": 11.744926852288815, "grad_norm": 0.10795916616916656, "learning_rate": 8.718291866992576e-05, "loss": 0.0063, "step": 99550 }, { "epoch": 11.75082586125531, "grad_norm": 0.19202680885791779, "learning_rate": 8.717049685974673e-05, "loss": 0.0075, "step": 99600 }, { "epoch": 11.756724870221802, "grad_norm": 0.37963131070137024, "learning_rate": 8.715806991896765e-05, "loss": 0.0081, "step": 99650 }, { "epoch": 11.762623879188297, "grad_norm": 0.1796158105134964, "learning_rate": 8.714563784930383e-05, "loss": 0.0066, "step": 99700 }, { "epoch": 11.76852288815479, "grad_norm": 0.24277663230895996, "learning_rate": 8.713320065247128e-05, "loss": 0.0083, "step": 99750 }, { "epoch": 11.774421897121284, "grad_norm": 0.23389078676700592, "learning_rate": 8.712075833018664e-05, "loss": 0.0082, "step": 99800 }, { "epoch": 11.780320906087777, "grad_norm": 0.1991679072380066, "learning_rate": 8.710831088416736e-05, "loss": 0.0076, "step": 99850 }, { "epoch": 11.786219915054271, "grad_norm": 0.1683582067489624, "learning_rate": 8.70958583161315e-05, "loss": 0.0082, "step": 99900 }, { "epoch": 11.792118924020764, "grad_norm": 0.3431551158428192, "learning_rate": 8.708340062779791e-05, "loss": 0.0077, "step": 99950 }, { "epoch": 11.798017932987259, "grad_norm": 0.24333733320236206, "learning_rate": 8.707093782088609e-05, "loss": 0.0073, "step": 100000 }, { "epoch": 11.798017932987259, "eval_cer": 0.08777969018932874, "eval_loss": 0.0017333494033664465, "eval_runtime": 2.0198, "eval_samples_per_second": 49.51, "eval_steps_per_second": 1.98, "eval_wer": 0.27, "step": 100000 }, { "epoch": 11.803916941953752, "grad_norm": 0.2951453626155853, "learning_rate": 8.705846989711626e-05, "loss": 0.0072, "step": 100050 }, { "epoch": 11.809815950920246, "grad_norm": 0.1465378850698471, "learning_rate": 8.704599685820937e-05, "loss": 0.0081, "step": 100100 }, { "epoch": 11.815714959886739, "grad_norm": 0.14550457894802094, "learning_rate": 8.703351870588707e-05, "loss": 0.0073, "step": 100150 }, { "epoch": 11.821613968853233, "grad_norm": 0.20848535001277924, "learning_rate": 8.702103544187167e-05, "loss": 0.0074, "step": 100200 }, { "epoch": 11.827512977819726, "grad_norm": 0.03969821706414223, "learning_rate": 8.700854706788624e-05, "loss": 0.0076, "step": 100250 }, { "epoch": 11.83341198678622, "grad_norm": 0.15203282237052917, "learning_rate": 8.699605358565454e-05, "loss": 0.0074, "step": 100300 }, { "epoch": 11.839310995752713, "grad_norm": 0.16845697164535522, "learning_rate": 8.698355499690102e-05, "loss": 0.0071, "step": 100350 }, { "epoch": 11.845210004719208, "grad_norm": 0.04986455664038658, "learning_rate": 8.697105130335085e-05, "loss": 0.0091, "step": 100400 }, { "epoch": 11.8511090136857, "grad_norm": 0.2429775446653366, "learning_rate": 8.69585425067299e-05, "loss": 0.0083, "step": 100450 }, { "epoch": 11.857008022652195, "grad_norm": 0.026648007333278656, "learning_rate": 8.694602860876475e-05, "loss": 0.0071, "step": 100500 }, { "epoch": 11.862907031618688, "grad_norm": 0.1714363396167755, "learning_rate": 8.693350961118266e-05, "loss": 0.009, "step": 100550 }, { "epoch": 11.868806040585183, "grad_norm": 0.4891417622566223, "learning_rate": 8.692098551571164e-05, "loss": 0.0089, "step": 100600 }, { "epoch": 11.874705049551675, "grad_norm": 0.07812876999378204, "learning_rate": 8.690845632408036e-05, "loss": 0.0091, "step": 100650 }, { "epoch": 11.88060405851817, "grad_norm": 0.24924060702323914, "learning_rate": 8.68959220380182e-05, "loss": 0.0077, "step": 100700 }, { "epoch": 11.886503067484663, "grad_norm": 0.16628946363925934, "learning_rate": 8.688338265925526e-05, "loss": 0.0082, "step": 100750 }, { "epoch": 11.892402076451155, "grad_norm": 0.09424370527267456, "learning_rate": 8.687083818952235e-05, "loss": 0.0056, "step": 100800 }, { "epoch": 11.89830108541765, "grad_norm": 0.08755042403936386, "learning_rate": 8.685828863055097e-05, "loss": 0.0066, "step": 100850 }, { "epoch": 11.904200094384144, "grad_norm": 0.20151649415493011, "learning_rate": 8.68457339840733e-05, "loss": 0.0065, "step": 100900 }, { "epoch": 11.910099103350637, "grad_norm": 0.22298046946525574, "learning_rate": 8.683317425182224e-05, "loss": 0.0067, "step": 100950 }, { "epoch": 11.91599811231713, "grad_norm": 0.12289423495531082, "learning_rate": 8.682060943553143e-05, "loss": 0.0076, "step": 101000 }, { "epoch": 11.91599811231713, "eval_cer": 0.08605851979345955, "eval_loss": 0.003042525378987193, "eval_runtime": 2.0472, "eval_samples_per_second": 48.847, "eval_steps_per_second": 1.954, "eval_wer": 0.27, "step": 101000 }, { "epoch": 11.921897121283624, "grad_norm": 0.04431946203112602, "learning_rate": 8.680803953693514e-05, "loss": 0.0078, "step": 101050 }, { "epoch": 11.927796130250117, "grad_norm": 0.18711991608142853, "learning_rate": 8.679546455776839e-05, "loss": 0.007, "step": 101100 }, { "epoch": 11.933695139216612, "grad_norm": 0.022599412128329277, "learning_rate": 8.678288449976691e-05, "loss": 0.0076, "step": 101150 }, { "epoch": 11.939594148183104, "grad_norm": 0.13755300641059875, "learning_rate": 8.677029936466707e-05, "loss": 0.0078, "step": 101200 }, { "epoch": 11.945493157149599, "grad_norm": 0.1594894528388977, "learning_rate": 8.675770915420602e-05, "loss": 0.0066, "step": 101250 }, { "epoch": 11.951392166116092, "grad_norm": 0.1468011885881424, "learning_rate": 8.674511387012154e-05, "loss": 0.0082, "step": 101300 }, { "epoch": 11.957291175082586, "grad_norm": 0.14093299210071564, "learning_rate": 8.673251351415218e-05, "loss": 0.0073, "step": 101350 }, { "epoch": 11.963190184049079, "grad_norm": 0.13182125985622406, "learning_rate": 8.671990808803712e-05, "loss": 0.0081, "step": 101400 }, { "epoch": 11.969089193015574, "grad_norm": 0.27380552887916565, "learning_rate": 8.670729759351629e-05, "loss": 0.0081, "step": 101450 }, { "epoch": 11.974988201982066, "grad_norm": 0.10284940153360367, "learning_rate": 8.669468203233026e-05, "loss": 0.0067, "step": 101500 }, { "epoch": 11.980887210948561, "grad_norm": 0.12864384055137634, "learning_rate": 8.668206140622042e-05, "loss": 0.0089, "step": 101550 }, { "epoch": 11.986786219915054, "grad_norm": 0.18227070569992065, "learning_rate": 8.666943571692872e-05, "loss": 0.0079, "step": 101600 }, { "epoch": 11.992685228881548, "grad_norm": 0.17785653471946716, "learning_rate": 8.665680496619788e-05, "loss": 0.008, "step": 101650 }, { "epoch": 11.998584237848041, "grad_norm": 0.12726128101348877, "learning_rate": 8.664416915577132e-05, "loss": 0.0079, "step": 101700 }, { "epoch": 12.004483246814535, "grad_norm": 0.08898746967315674, "learning_rate": 8.663152828739312e-05, "loss": 0.0066, "step": 101750 }, { "epoch": 12.010382255781028, "grad_norm": 0.1845828890800476, "learning_rate": 8.661888236280813e-05, "loss": 0.0068, "step": 101800 }, { "epoch": 12.016281264747523, "grad_norm": 0.05294705554842949, "learning_rate": 8.66062313837618e-05, "loss": 0.0048, "step": 101850 }, { "epoch": 12.022180273714016, "grad_norm": 0.08254942297935486, "learning_rate": 8.659357535200038e-05, "loss": 0.0069, "step": 101900 }, { "epoch": 12.02807928268051, "grad_norm": 0.013871526345610619, "learning_rate": 8.658091426927072e-05, "loss": 0.0053, "step": 101950 }, { "epoch": 12.033978291647003, "grad_norm": 0.2461799830198288, "learning_rate": 8.656824813732045e-05, "loss": 0.0065, "step": 102000 }, { "epoch": 12.033978291647003, "eval_cer": 0.08347676419965576, "eval_loss": 0.0006551109254360199, "eval_runtime": 2.0557, "eval_samples_per_second": 48.645, "eval_steps_per_second": 1.946, "eval_wer": 0.26, "step": 102000 }, { "epoch": 12.039877300613497, "grad_norm": 0.4593227207660675, "learning_rate": 8.655557695789784e-05, "loss": 0.0065, "step": 102050 }, { "epoch": 12.04577630957999, "grad_norm": 0.12839196622371674, "learning_rate": 8.654290073275191e-05, "loss": 0.0071, "step": 102100 }, { "epoch": 12.051675318546485, "grad_norm": 0.21570990979671478, "learning_rate": 8.653021946363229e-05, "loss": 0.0075, "step": 102150 }, { "epoch": 12.057574327512977, "grad_norm": 0.22989559173583984, "learning_rate": 8.65175331522894e-05, "loss": 0.0073, "step": 102200 }, { "epoch": 12.063473336479472, "grad_norm": 0.07089775055646896, "learning_rate": 8.650484180047431e-05, "loss": 0.0061, "step": 102250 }, { "epoch": 12.069372345445965, "grad_norm": 0.25162389874458313, "learning_rate": 8.64921454099388e-05, "loss": 0.0067, "step": 102300 }, { "epoch": 12.07527135441246, "grad_norm": 0.05315506458282471, "learning_rate": 8.64794439824353e-05, "loss": 0.0057, "step": 102350 }, { "epoch": 12.081170363378952, "grad_norm": 0.31001877784729004, "learning_rate": 8.646673751971703e-05, "loss": 0.0074, "step": 102400 }, { "epoch": 12.087069372345447, "grad_norm": 0.13728658854961395, "learning_rate": 8.64540260235378e-05, "loss": 0.006, "step": 102450 }, { "epoch": 12.09296838131194, "grad_norm": 0.09372162073850632, "learning_rate": 8.644130949565219e-05, "loss": 0.0067, "step": 102500 }, { "epoch": 12.098867390278434, "grad_norm": 0.22072845697402954, "learning_rate": 8.642858793781544e-05, "loss": 0.0066, "step": 102550 }, { "epoch": 12.104766399244927, "grad_norm": 0.2346402406692505, "learning_rate": 8.64158613517835e-05, "loss": 0.0067, "step": 102600 }, { "epoch": 12.110665408211421, "grad_norm": 0.5796462893486023, "learning_rate": 8.640312973931297e-05, "loss": 0.0072, "step": 102650 }, { "epoch": 12.116564417177914, "grad_norm": 0.15821319818496704, "learning_rate": 8.639039310216123e-05, "loss": 0.0059, "step": 102700 }, { "epoch": 12.122463426144408, "grad_norm": 0.18251626193523407, "learning_rate": 8.637765144208627e-05, "loss": 0.0074, "step": 102750 }, { "epoch": 12.128362435110901, "grad_norm": 0.28908371925354004, "learning_rate": 8.636490476084682e-05, "loss": 0.0066, "step": 102800 }, { "epoch": 12.134261444077396, "grad_norm": 0.3735184669494629, "learning_rate": 8.635215306020228e-05, "loss": 0.0062, "step": 102850 }, { "epoch": 12.140160453043888, "grad_norm": 0.1393624246120453, "learning_rate": 8.633939634191277e-05, "loss": 0.0073, "step": 102900 }, { "epoch": 12.146059462010383, "grad_norm": 0.1818254590034485, "learning_rate": 8.632663460773907e-05, "loss": 0.0069, "step": 102950 }, { "epoch": 12.151958470976876, "grad_norm": 0.27222365140914917, "learning_rate": 8.631386785944265e-05, "loss": 0.0088, "step": 103000 }, { "epoch": 12.151958470976876, "eval_cer": 0.08347676419965576, "eval_loss": 0.0007842204649932683, "eval_runtime": 2.0763, "eval_samples_per_second": 48.163, "eval_steps_per_second": 1.927, "eval_wer": 0.26, "step": 103000 }, { "epoch": 12.15785747994337, "grad_norm": 0.48060837388038635, "learning_rate": 8.630109609878572e-05, "loss": 0.0056, "step": 103050 }, { "epoch": 12.163756488909863, "grad_norm": 0.023852568119764328, "learning_rate": 8.628831932753114e-05, "loss": 0.0064, "step": 103100 }, { "epoch": 12.169655497876358, "grad_norm": 0.11934870481491089, "learning_rate": 8.627553754744248e-05, "loss": 0.0065, "step": 103150 }, { "epoch": 12.17555450684285, "grad_norm": 0.21589864790439606, "learning_rate": 8.626275076028397e-05, "loss": 0.0067, "step": 103200 }, { "epoch": 12.181453515809345, "grad_norm": 0.37388715147972107, "learning_rate": 8.624995896782058e-05, "loss": 0.009, "step": 103250 }, { "epoch": 12.187352524775838, "grad_norm": 0.19823317229747772, "learning_rate": 8.623716217181795e-05, "loss": 0.0077, "step": 103300 }, { "epoch": 12.19325153374233, "grad_norm": 0.18056407570838928, "learning_rate": 8.622436037404238e-05, "loss": 0.0068, "step": 103350 }, { "epoch": 12.199150542708825, "grad_norm": 0.1579950749874115, "learning_rate": 8.62115535762609e-05, "loss": 0.0066, "step": 103400 }, { "epoch": 12.205049551675318, "grad_norm": 0.19041360914707184, "learning_rate": 8.619874178024124e-05, "loss": 0.0067, "step": 103450 }, { "epoch": 12.210948560641812, "grad_norm": 0.12946000695228577, "learning_rate": 8.618592498775175e-05, "loss": 0.0068, "step": 103500 }, { "epoch": 12.216847569608305, "grad_norm": 0.13238298892974854, "learning_rate": 8.617310320056156e-05, "loss": 0.0065, "step": 103550 }, { "epoch": 12.2227465785748, "grad_norm": 0.15099342167377472, "learning_rate": 8.616027642044043e-05, "loss": 0.0071, "step": 103600 }, { "epoch": 12.228645587541292, "grad_norm": 0.1578177511692047, "learning_rate": 8.61474446491588e-05, "loss": 0.0076, "step": 103650 }, { "epoch": 12.234544596507787, "grad_norm": 0.06069856137037277, "learning_rate": 8.613460788848788e-05, "loss": 0.0066, "step": 103700 }, { "epoch": 12.24044360547428, "grad_norm": 0.23041239380836487, "learning_rate": 8.612176614019947e-05, "loss": 0.0055, "step": 103750 }, { "epoch": 12.246342614440774, "grad_norm": 0.10785969346761703, "learning_rate": 8.61089194060661e-05, "loss": 0.0062, "step": 103800 }, { "epoch": 12.252241623407267, "grad_norm": 0.09266254305839539, "learning_rate": 8.609606768786101e-05, "loss": 0.0058, "step": 103850 }, { "epoch": 12.258140632373761, "grad_norm": 0.2821596562862396, "learning_rate": 8.608321098735811e-05, "loss": 0.0066, "step": 103900 }, { "epoch": 12.264039641340254, "grad_norm": 0.304402619600296, "learning_rate": 8.607034930633198e-05, "loss": 0.0076, "step": 103950 }, { "epoch": 12.269938650306749, "grad_norm": 0.1023641973733902, "learning_rate": 8.605748264655791e-05, "loss": 0.0079, "step": 104000 }, { "epoch": 12.269938650306749, "eval_cer": 0.08605851979345955, "eval_loss": 0.001497191609814763, "eval_runtime": 2.0485, "eval_samples_per_second": 48.817, "eval_steps_per_second": 1.953, "eval_wer": 0.27, "step": 104000 }, { "epoch": 12.275837659273241, "grad_norm": 0.1716780960559845, "learning_rate": 8.604461100981187e-05, "loss": 0.0056, "step": 104050 }, { "epoch": 12.281736668239736, "grad_norm": 0.11664465814828873, "learning_rate": 8.603173439787051e-05, "loss": 0.0074, "step": 104100 }, { "epoch": 12.287635677206229, "grad_norm": 0.11298539489507675, "learning_rate": 8.601885281251119e-05, "loss": 0.0078, "step": 104150 }, { "epoch": 12.293534686172723, "grad_norm": 0.17360258102416992, "learning_rate": 8.600596625551191e-05, "loss": 0.0061, "step": 104200 }, { "epoch": 12.299433695139216, "grad_norm": 0.09065496176481247, "learning_rate": 8.599307472865144e-05, "loss": 0.0068, "step": 104250 }, { "epoch": 12.30533270410571, "grad_norm": 0.16618533432483673, "learning_rate": 8.598017823370912e-05, "loss": 0.0071, "step": 104300 }, { "epoch": 12.311231713072203, "grad_norm": 0.24793203175067902, "learning_rate": 8.596727677246507e-05, "loss": 0.0069, "step": 104350 }, { "epoch": 12.317130722038698, "grad_norm": 0.06984519958496094, "learning_rate": 8.595437034670007e-05, "loss": 0.0063, "step": 104400 }, { "epoch": 12.32302973100519, "grad_norm": 0.05576161667704582, "learning_rate": 8.594145895819554e-05, "loss": 0.0087, "step": 104450 }, { "epoch": 12.328928739971685, "grad_norm": 0.11082901060581207, "learning_rate": 8.592854260873367e-05, "loss": 0.0067, "step": 104500 }, { "epoch": 12.334827748938178, "grad_norm": 0.20085814595222473, "learning_rate": 8.591562130009727e-05, "loss": 0.0066, "step": 104550 }, { "epoch": 12.340726757904672, "grad_norm": 0.13007061183452606, "learning_rate": 8.590269503406985e-05, "loss": 0.0071, "step": 104600 }, { "epoch": 12.346625766871165, "grad_norm": 0.23151615262031555, "learning_rate": 8.588976381243561e-05, "loss": 0.0077, "step": 104650 }, { "epoch": 12.35252477583766, "grad_norm": 0.291639506816864, "learning_rate": 8.587682763697943e-05, "loss": 0.0071, "step": 104700 }, { "epoch": 12.358423784804152, "grad_norm": 0.007878324948251247, "learning_rate": 8.58638865094869e-05, "loss": 0.0076, "step": 104750 }, { "epoch": 12.364322793770647, "grad_norm": 0.6719105839729309, "learning_rate": 8.585094043174424e-05, "loss": 0.0057, "step": 104800 }, { "epoch": 12.37022180273714, "grad_norm": 0.1129194125533104, "learning_rate": 8.583798940553837e-05, "loss": 0.0059, "step": 104850 }, { "epoch": 12.376120811703634, "grad_norm": 0.16520221531391144, "learning_rate": 8.582503343265694e-05, "loss": 0.0071, "step": 104900 }, { "epoch": 12.382019820670127, "grad_norm": 0.2492929846048355, "learning_rate": 8.58120725148882e-05, "loss": 0.0064, "step": 104950 }, { "epoch": 12.387918829636622, "grad_norm": 0.1499161720275879, "learning_rate": 8.579910665402118e-05, "loss": 0.0064, "step": 105000 }, { "epoch": 12.387918829636622, "eval_cer": 0.08777969018932874, "eval_loss": 0.0016984660178422928, "eval_runtime": 2.0497, "eval_samples_per_second": 48.788, "eval_steps_per_second": 1.952, "eval_wer": 0.27, "step": 105000 }, { "epoch": 12.393817838603114, "grad_norm": 0.09895317256450653, "learning_rate": 8.578613585184554e-05, "loss": 0.0062, "step": 105050 }, { "epoch": 12.399716847569609, "grad_norm": 0.2941280007362366, "learning_rate": 8.577316011015158e-05, "loss": 0.0064, "step": 105100 }, { "epoch": 12.405615856536102, "grad_norm": 0.14546294510364532, "learning_rate": 8.576017943073037e-05, "loss": 0.008, "step": 105150 }, { "epoch": 12.411514865502596, "grad_norm": 0.14685116708278656, "learning_rate": 8.57471938153736e-05, "loss": 0.0071, "step": 105200 }, { "epoch": 12.417413874469089, "grad_norm": 0.055700283497571945, "learning_rate": 8.573420326587366e-05, "loss": 0.0075, "step": 105250 }, { "epoch": 12.423312883435583, "grad_norm": 0.14060133695602417, "learning_rate": 8.572120778402362e-05, "loss": 0.0058, "step": 105300 }, { "epoch": 12.429211892402076, "grad_norm": 0.22233714163303375, "learning_rate": 8.570820737161723e-05, "loss": 0.007, "step": 105350 }, { "epoch": 12.43511090136857, "grad_norm": 0.1556732952594757, "learning_rate": 8.569520203044893e-05, "loss": 0.006, "step": 105400 }, { "epoch": 12.441009910335064, "grad_norm": 0.27460595965385437, "learning_rate": 8.568219176231383e-05, "loss": 0.0078, "step": 105450 }, { "epoch": 12.446908919301558, "grad_norm": 0.053984567523002625, "learning_rate": 8.56691765690077e-05, "loss": 0.0069, "step": 105500 }, { "epoch": 12.45280792826805, "grad_norm": 0.03072310797870159, "learning_rate": 8.565615645232705e-05, "loss": 0.0062, "step": 105550 }, { "epoch": 12.458706937234545, "grad_norm": 0.0945078581571579, "learning_rate": 8.5643131414069e-05, "loss": 0.006, "step": 105600 }, { "epoch": 12.464605946201038, "grad_norm": 0.16626445949077606, "learning_rate": 8.563010145603141e-05, "loss": 0.0082, "step": 105650 }, { "epoch": 12.470504955167533, "grad_norm": 0.06118094548583031, "learning_rate": 8.561706658001275e-05, "loss": 0.007, "step": 105700 }, { "epoch": 12.476403964134025, "grad_norm": 0.3558347821235657, "learning_rate": 8.560402678781225e-05, "loss": 0.0066, "step": 105750 }, { "epoch": 12.48230297310052, "grad_norm": 0.15512344241142273, "learning_rate": 8.559098208122973e-05, "loss": 0.0065, "step": 105800 }, { "epoch": 12.488201982067013, "grad_norm": 0.3440612256526947, "learning_rate": 8.55779324620658e-05, "loss": 0.0095, "step": 105850 }, { "epoch": 12.494100991033505, "grad_norm": 0.03167041391134262, "learning_rate": 8.556487793212162e-05, "loss": 0.0069, "step": 105900 }, { "epoch": 12.5, "grad_norm": 0.18298880755901337, "learning_rate": 8.555181849319913e-05, "loss": 0.0076, "step": 105950 }, { "epoch": 12.505899008966495, "grad_norm": 0.22017145156860352, "learning_rate": 8.553875414710088e-05, "loss": 0.0072, "step": 106000 }, { "epoch": 12.505899008966495, "eval_cer": 0.08777969018932874, "eval_loss": 0.002405815292149782, "eval_runtime": 2.0505, "eval_samples_per_second": 48.768, "eval_steps_per_second": 1.951, "eval_wer": 0.27, "step": 106000 }, { "epoch": 12.511798017932987, "grad_norm": 0.03425733000040054, "learning_rate": 8.552568489563018e-05, "loss": 0.0082, "step": 106050 }, { "epoch": 12.51769702689948, "grad_norm": 0.08394620567560196, "learning_rate": 8.551261074059089e-05, "loss": 0.0066, "step": 106100 }, { "epoch": 12.523596035865975, "grad_norm": 0.044703733175992966, "learning_rate": 8.549953168378767e-05, "loss": 0.0074, "step": 106150 }, { "epoch": 12.529495044832467, "grad_norm": 0.11732025444507599, "learning_rate": 8.54864477270258e-05, "loss": 0.006, "step": 106200 }, { "epoch": 12.535394053798962, "grad_norm": 0.1261468380689621, "learning_rate": 8.547335887211123e-05, "loss": 0.0068, "step": 106250 }, { "epoch": 12.541293062765455, "grad_norm": 0.215396448969841, "learning_rate": 8.546026512085062e-05, "loss": 0.0073, "step": 106300 }, { "epoch": 12.54719207173195, "grad_norm": 0.07195067405700684, "learning_rate": 8.544716647505127e-05, "loss": 0.0072, "step": 106350 }, { "epoch": 12.553091080698442, "grad_norm": 0.14933955669403076, "learning_rate": 8.543406293652117e-05, "loss": 0.007, "step": 106400 }, { "epoch": 12.558990089664936, "grad_norm": 0.11925100535154343, "learning_rate": 8.542095450706897e-05, "loss": 0.0071, "step": 106450 }, { "epoch": 12.56488909863143, "grad_norm": 0.06875821202993393, "learning_rate": 8.540784118850404e-05, "loss": 0.0068, "step": 106500 }, { "epoch": 12.570788107597924, "grad_norm": 0.09183896332979202, "learning_rate": 8.53947229826364e-05, "loss": 0.007, "step": 106550 }, { "epoch": 12.576687116564417, "grad_norm": 0.013214427046477795, "learning_rate": 8.538159989127672e-05, "loss": 0.0067, "step": 106600 }, { "epoch": 12.582586125530911, "grad_norm": 0.07425747066736221, "learning_rate": 8.536847191623635e-05, "loss": 0.008, "step": 106650 }, { "epoch": 12.588485134497404, "grad_norm": 0.19361512362957, "learning_rate": 8.535533905932738e-05, "loss": 0.0087, "step": 106700 }, { "epoch": 12.594384143463898, "grad_norm": 0.03805544227361679, "learning_rate": 8.534220132236248e-05, "loss": 0.0063, "step": 106750 }, { "epoch": 12.600283152430391, "grad_norm": 0.09311991184949875, "learning_rate": 8.532905870715505e-05, "loss": 0.0072, "step": 106800 }, { "epoch": 12.606182161396886, "grad_norm": 0.24390800297260284, "learning_rate": 8.531591121551916e-05, "loss": 0.008, "step": 106850 }, { "epoch": 12.612081170363378, "grad_norm": 0.06630432605743408, "learning_rate": 8.530275884926952e-05, "loss": 0.0058, "step": 106900 }, { "epoch": 12.617980179329873, "grad_norm": 0.10235299170017242, "learning_rate": 8.528960161022155e-05, "loss": 0.0074, "step": 106950 }, { "epoch": 12.623879188296366, "grad_norm": 0.154184952378273, "learning_rate": 8.527643950019131e-05, "loss": 0.0072, "step": 107000 }, { "epoch": 12.623879188296366, "eval_cer": 0.08777969018932874, "eval_loss": 0.0026281196624040604, "eval_runtime": 2.0261, "eval_samples_per_second": 49.356, "eval_steps_per_second": 1.974, "eval_wer": 0.27, "step": 107000 }, { "epoch": 12.62977819726286, "grad_norm": 0.1334500014781952, "learning_rate": 8.52632725209956e-05, "loss": 0.0071, "step": 107050 }, { "epoch": 12.635677206229353, "grad_norm": 0.16125133633613586, "learning_rate": 8.525010067445178e-05, "loss": 0.0067, "step": 107100 }, { "epoch": 12.641576215195848, "grad_norm": 0.25121748447418213, "learning_rate": 8.523692396237798e-05, "loss": 0.0078, "step": 107150 }, { "epoch": 12.64747522416234, "grad_norm": 0.37278178334236145, "learning_rate": 8.522374238659296e-05, "loss": 0.0078, "step": 107200 }, { "epoch": 12.653374233128835, "grad_norm": 0.03892282769083977, "learning_rate": 8.521055594891615e-05, "loss": 0.0075, "step": 107250 }, { "epoch": 12.659273242095328, "grad_norm": 0.07900988310575485, "learning_rate": 8.51973646511677e-05, "loss": 0.0062, "step": 107300 }, { "epoch": 12.665172251061822, "grad_norm": 0.12080066651105881, "learning_rate": 8.518416849516833e-05, "loss": 0.0071, "step": 107350 }, { "epoch": 12.671071260028315, "grad_norm": 0.07024568319320679, "learning_rate": 8.517096748273951e-05, "loss": 0.0076, "step": 107400 }, { "epoch": 12.67697026899481, "grad_norm": 0.21575407683849335, "learning_rate": 8.515776161570336e-05, "loss": 0.0057, "step": 107450 }, { "epoch": 12.682869277961302, "grad_norm": 0.029585793614387512, "learning_rate": 8.51445508958827e-05, "loss": 0.0064, "step": 107500 }, { "epoch": 12.688768286927797, "grad_norm": 0.420248419046402, "learning_rate": 8.513133532510096e-05, "loss": 0.0063, "step": 107550 }, { "epoch": 12.69466729589429, "grad_norm": 0.22214552760124207, "learning_rate": 8.511811490518227e-05, "loss": 0.0062, "step": 107600 }, { "epoch": 12.700566304860784, "grad_norm": 0.05363507941365242, "learning_rate": 8.510488963795142e-05, "loss": 0.0062, "step": 107650 }, { "epoch": 12.706465313827277, "grad_norm": 0.2389046549797058, "learning_rate": 8.509165952523391e-05, "loss": 0.0093, "step": 107700 }, { "epoch": 12.712364322793771, "grad_norm": 0.04745102673768997, "learning_rate": 8.507842456885585e-05, "loss": 0.0071, "step": 107750 }, { "epoch": 12.718263331760264, "grad_norm": 0.26417049765586853, "learning_rate": 8.506518477064406e-05, "loss": 0.0069, "step": 107800 }, { "epoch": 12.724162340726759, "grad_norm": 0.3307160437107086, "learning_rate": 8.5051940132426e-05, "loss": 0.0065, "step": 107850 }, { "epoch": 12.730061349693251, "grad_norm": 0.07193052023649216, "learning_rate": 8.503869065602983e-05, "loss": 0.0077, "step": 107900 }, { "epoch": 12.735960358659746, "grad_norm": 0.2561742663383484, "learning_rate": 8.502543634328435e-05, "loss": 0.0096, "step": 107950 }, { "epoch": 12.741859367626239, "grad_norm": 0.1848110407590866, "learning_rate": 8.501217719601904e-05, "loss": 0.0079, "step": 108000 }, { "epoch": 12.741859367626239, "eval_cer": 0.09036144578313253, "eval_loss": 0.0035792149137705564, "eval_runtime": 2.0735, "eval_samples_per_second": 48.227, "eval_steps_per_second": 1.929, "eval_wer": 0.28, "step": 108000 }, { "epoch": 12.747758376592733, "grad_norm": 0.027507256716489792, "learning_rate": 8.499891321606404e-05, "loss": 0.0063, "step": 108050 }, { "epoch": 12.753657385559226, "grad_norm": 0.05076661333441734, "learning_rate": 8.498564440525014e-05, "loss": 0.0071, "step": 108100 }, { "epoch": 12.75955639452572, "grad_norm": 0.2718234658241272, "learning_rate": 8.497237076540887e-05, "loss": 0.0077, "step": 108150 }, { "epoch": 12.765455403492213, "grad_norm": 0.18327569961547852, "learning_rate": 8.495909229837234e-05, "loss": 0.0077, "step": 108200 }, { "epoch": 12.771354412458708, "grad_norm": 0.1757778525352478, "learning_rate": 8.494580900597337e-05, "loss": 0.0065, "step": 108250 }, { "epoch": 12.7772534214252, "grad_norm": 0.25291886925697327, "learning_rate": 8.493252089004542e-05, "loss": 0.0084, "step": 108300 }, { "epoch": 12.783152430391695, "grad_norm": 0.3923588693141937, "learning_rate": 8.491922795242266e-05, "loss": 0.0089, "step": 108350 }, { "epoch": 12.789051439358188, "grad_norm": 0.05054664611816406, "learning_rate": 8.49059301949399e-05, "loss": 0.0075, "step": 108400 }, { "epoch": 12.79495044832468, "grad_norm": 0.15760356187820435, "learning_rate": 8.489262761943258e-05, "loss": 0.0053, "step": 108450 }, { "epoch": 12.800849457291175, "grad_norm": 0.20226244628429413, "learning_rate": 8.487932022773686e-05, "loss": 0.0073, "step": 108500 }, { "epoch": 12.80674846625767, "grad_norm": 0.21548672020435333, "learning_rate": 8.486600802168955e-05, "loss": 0.0066, "step": 108550 }, { "epoch": 12.812647475224162, "grad_norm": 0.07064046710729599, "learning_rate": 8.485269100312811e-05, "loss": 0.0088, "step": 108600 }, { "epoch": 12.818546484190655, "grad_norm": 0.1594395488500595, "learning_rate": 8.483936917389068e-05, "loss": 0.0064, "step": 108650 }, { "epoch": 12.82444549315715, "grad_norm": 0.02243776060640812, "learning_rate": 8.482604253581605e-05, "loss": 0.0069, "step": 108700 }, { "epoch": 12.830344502123642, "grad_norm": 0.06896430999040604, "learning_rate": 8.481271109074368e-05, "loss": 0.0074, "step": 108750 }, { "epoch": 12.836243511090137, "grad_norm": 0.05308222398161888, "learning_rate": 8.479937484051368e-05, "loss": 0.0051, "step": 108800 }, { "epoch": 12.84214252005663, "grad_norm": 0.24296465516090393, "learning_rate": 8.478603378696686e-05, "loss": 0.0069, "step": 108850 }, { "epoch": 12.848041529023124, "grad_norm": 0.1699138879776001, "learning_rate": 8.477268793194467e-05, "loss": 0.0059, "step": 108900 }, { "epoch": 12.853940537989617, "grad_norm": 0.20492982864379883, "learning_rate": 8.475933727728919e-05, "loss": 0.0075, "step": 108950 }, { "epoch": 12.859839546956112, "grad_norm": 0.17563951015472412, "learning_rate": 8.474598182484323e-05, "loss": 0.0082, "step": 109000 }, { "epoch": 12.859839546956112, "eval_cer": 0.08519793459552495, "eval_loss": 0.0016206876607611775, "eval_runtime": 2.0533, "eval_samples_per_second": 48.702, "eval_steps_per_second": 1.948, "eval_wer": 0.26, "step": 109000 }, { "epoch": 12.865738555922604, "grad_norm": 0.010940810665488243, "learning_rate": 8.473262157645024e-05, "loss": 0.0064, "step": 109050 }, { "epoch": 12.871637564889099, "grad_norm": 0.02630246803164482, "learning_rate": 8.471925653395426e-05, "loss": 0.0085, "step": 109100 }, { "epoch": 12.877536573855592, "grad_norm": 0.03174159303307533, "learning_rate": 8.47058866992001e-05, "loss": 0.0071, "step": 109150 }, { "epoch": 12.883435582822086, "grad_norm": 0.08538657426834106, "learning_rate": 8.469251207403317e-05, "loss": 0.0082, "step": 109200 }, { "epoch": 12.889334591788579, "grad_norm": 0.18437978625297546, "learning_rate": 8.467913266029957e-05, "loss": 0.0073, "step": 109250 }, { "epoch": 12.895233600755073, "grad_norm": 0.09950319677591324, "learning_rate": 8.466574845984601e-05, "loss": 0.0052, "step": 109300 }, { "epoch": 12.901132609721566, "grad_norm": 0.15241427719593048, "learning_rate": 8.465235947451991e-05, "loss": 0.0069, "step": 109350 }, { "epoch": 12.90703161868806, "grad_norm": 0.1715310662984848, "learning_rate": 8.463896570616934e-05, "loss": 0.0083, "step": 109400 }, { "epoch": 12.912930627654553, "grad_norm": 0.10531897097826004, "learning_rate": 8.462556715664303e-05, "loss": 0.0065, "step": 109450 }, { "epoch": 12.918829636621048, "grad_norm": 0.18394586443901062, "learning_rate": 8.461216382779036e-05, "loss": 0.0071, "step": 109500 }, { "epoch": 12.92472864558754, "grad_norm": 0.11932658404111862, "learning_rate": 8.459875572146138e-05, "loss": 0.0066, "step": 109550 }, { "epoch": 12.930627654554035, "grad_norm": 0.05841100588440895, "learning_rate": 8.458534283950679e-05, "loss": 0.0063, "step": 109600 }, { "epoch": 12.936526663520528, "grad_norm": 0.15441353619098663, "learning_rate": 8.457192518377795e-05, "loss": 0.0062, "step": 109650 }, { "epoch": 12.942425672487023, "grad_norm": 0.3096713423728943, "learning_rate": 8.45585027561269e-05, "loss": 0.0067, "step": 109700 }, { "epoch": 12.948324681453515, "grad_norm": 0.15333284437656403, "learning_rate": 8.454507555840632e-05, "loss": 0.0074, "step": 109750 }, { "epoch": 12.95422369042001, "grad_norm": 0.055634111166000366, "learning_rate": 8.453164359246953e-05, "loss": 0.0064, "step": 109800 }, { "epoch": 12.960122699386503, "grad_norm": 0.14843438565731049, "learning_rate": 8.451820686017053e-05, "loss": 0.0068, "step": 109850 }, { "epoch": 12.966021708352997, "grad_norm": 0.1176924780011177, "learning_rate": 8.4504765363364e-05, "loss": 0.007, "step": 109900 }, { "epoch": 12.97192071731949, "grad_norm": 0.15367069840431213, "learning_rate": 8.449131910390525e-05, "loss": 0.0076, "step": 109950 }, { "epoch": 12.977819726285984, "grad_norm": 0.03803432732820511, "learning_rate": 8.447786808365022e-05, "loss": 0.0075, "step": 110000 }, { "epoch": 12.977819726285984, "eval_cer": 0.08777969018932874, "eval_loss": 0.001433421391993761, "eval_runtime": 2.0688, "eval_samples_per_second": 48.337, "eval_steps_per_second": 1.933, "eval_wer": 0.27, "step": 110000 }, { "epoch": 12.983718735252477, "grad_norm": 0.05968334153294563, "learning_rate": 8.446441230445559e-05, "loss": 0.0093, "step": 110050 }, { "epoch": 12.989617744218972, "grad_norm": 0.03961614891886711, "learning_rate": 8.445095176817859e-05, "loss": 0.0077, "step": 110100 }, { "epoch": 12.995516753185465, "grad_norm": 0.06907562166452408, "learning_rate": 8.44374864766772e-05, "loss": 0.007, "step": 110150 }, { "epoch": 13.001415762151959, "grad_norm": 0.22046992182731628, "learning_rate": 8.442401643181e-05, "loss": 0.0065, "step": 110200 }, { "epoch": 13.007314771118452, "grad_norm": 0.2642599046230316, "learning_rate": 8.441054163543626e-05, "loss": 0.0061, "step": 110250 }, { "epoch": 13.013213780084946, "grad_norm": 0.05454261228442192, "learning_rate": 8.439706208941587e-05, "loss": 0.0054, "step": 110300 }, { "epoch": 13.019112789051439, "grad_norm": 0.22059613466262817, "learning_rate": 8.438357779560941e-05, "loss": 0.0058, "step": 110350 }, { "epoch": 13.025011798017934, "grad_norm": 0.20157091319561005, "learning_rate": 8.43700887558781e-05, "loss": 0.0067, "step": 110400 }, { "epoch": 13.030910806984426, "grad_norm": 0.23848478496074677, "learning_rate": 8.435659497208382e-05, "loss": 0.0052, "step": 110450 }, { "epoch": 13.036809815950921, "grad_norm": 0.15281234681606293, "learning_rate": 8.43430964460891e-05, "loss": 0.0058, "step": 110500 }, { "epoch": 13.042708824917414, "grad_norm": 0.21112264692783356, "learning_rate": 8.432959317975712e-05, "loss": 0.0065, "step": 110550 }, { "epoch": 13.048607833883908, "grad_norm": 0.017480159178376198, "learning_rate": 8.431608517495171e-05, "loss": 0.0063, "step": 110600 }, { "epoch": 13.054506842850401, "grad_norm": 0.08212009072303772, "learning_rate": 8.430257243353739e-05, "loss": 0.0067, "step": 110650 }, { "epoch": 13.060405851816896, "grad_norm": 0.18854698538780212, "learning_rate": 8.428905495737928e-05, "loss": 0.0081, "step": 110700 }, { "epoch": 13.066304860783388, "grad_norm": 0.17078040540218353, "learning_rate": 8.42755327483432e-05, "loss": 0.0054, "step": 110750 }, { "epoch": 13.072203869749883, "grad_norm": 0.1882653832435608, "learning_rate": 8.42620058082956e-05, "loss": 0.0064, "step": 110800 }, { "epoch": 13.078102878716376, "grad_norm": 0.06749177724123001, "learning_rate": 8.42484741391036e-05, "loss": 0.0062, "step": 110850 }, { "epoch": 13.08400188768287, "grad_norm": 0.19454729557037354, "learning_rate": 8.423493774263493e-05, "loss": 0.0049, "step": 110900 }, { "epoch": 13.089900896649363, "grad_norm": 0.08662082999944687, "learning_rate": 8.422139662075802e-05, "loss": 0.0063, "step": 110950 }, { "epoch": 13.095799905615857, "grad_norm": 0.23812668025493622, "learning_rate": 8.420785077534196e-05, "loss": 0.0063, "step": 111000 }, { "epoch": 13.095799905615857, "eval_cer": 0.08605851979345955, "eval_loss": 0.0009701797389425337, "eval_runtime": 2.0444, "eval_samples_per_second": 48.913, "eval_steps_per_second": 1.957, "eval_wer": 0.27, "step": 111000 }, { "epoch": 13.10169891458235, "grad_norm": 0.101280577480793, "learning_rate": 8.419430020825642e-05, "loss": 0.0068, "step": 111050 }, { "epoch": 13.107597923548845, "grad_norm": 0.2930939793586731, "learning_rate": 8.41807449213718e-05, "loss": 0.0061, "step": 111100 }, { "epoch": 13.113496932515337, "grad_norm": 0.8037791848182678, "learning_rate": 8.416718491655911e-05, "loss": 0.0056, "step": 111150 }, { "epoch": 13.11939594148183, "grad_norm": 0.10459987819194794, "learning_rate": 8.415362019569002e-05, "loss": 0.0066, "step": 111200 }, { "epoch": 13.125294950448325, "grad_norm": 0.044608715921640396, "learning_rate": 8.414005076063684e-05, "loss": 0.005, "step": 111250 }, { "epoch": 13.131193959414817, "grad_norm": 0.03595637530088425, "learning_rate": 8.412647661327259e-05, "loss": 0.0069, "step": 111300 }, { "epoch": 13.137092968381312, "grad_norm": 0.126630499958992, "learning_rate": 8.411289775547086e-05, "loss": 0.0062, "step": 111350 }, { "epoch": 13.142991977347805, "grad_norm": 0.02173890359699726, "learning_rate": 8.409931418910591e-05, "loss": 0.0073, "step": 111400 }, { "epoch": 13.1488909863143, "grad_norm": 0.09442427009344101, "learning_rate": 8.408572591605268e-05, "loss": 0.0058, "step": 111450 }, { "epoch": 13.154789995280792, "grad_norm": 0.11156152933835983, "learning_rate": 8.407213293818676e-05, "loss": 0.0059, "step": 111500 }, { "epoch": 13.160689004247287, "grad_norm": 0.02999584563076496, "learning_rate": 8.405853525738434e-05, "loss": 0.0066, "step": 111550 }, { "epoch": 13.16658801321378, "grad_norm": 0.18042443692684174, "learning_rate": 8.404493287552232e-05, "loss": 0.0063, "step": 111600 }, { "epoch": 13.172487022180274, "grad_norm": 0.10272131860256195, "learning_rate": 8.403132579447821e-05, "loss": 0.0055, "step": 111650 }, { "epoch": 13.178386031146767, "grad_norm": 0.06108364835381508, "learning_rate": 8.401771401613016e-05, "loss": 0.006, "step": 111700 }, { "epoch": 13.184285040113261, "grad_norm": 0.2523605227470398, "learning_rate": 8.400409754235704e-05, "loss": 0.0071, "step": 111750 }, { "epoch": 13.190184049079754, "grad_norm": 0.08902260661125183, "learning_rate": 8.399047637503825e-05, "loss": 0.0055, "step": 111800 }, { "epoch": 13.196083058046248, "grad_norm": 0.07318919152021408, "learning_rate": 8.397685051605394e-05, "loss": 0.0049, "step": 111850 }, { "epoch": 13.201982067012741, "grad_norm": 0.2580294907093048, "learning_rate": 8.396321996728489e-05, "loss": 0.0056, "step": 111900 }, { "epoch": 13.207881075979236, "grad_norm": 0.15482476353645325, "learning_rate": 8.394958473061247e-05, "loss": 0.0064, "step": 111950 }, { "epoch": 13.213780084945729, "grad_norm": 0.18896089494228363, "learning_rate": 8.393594480791875e-05, "loss": 0.0064, "step": 112000 }, { "epoch": 13.213780084945729, "eval_cer": 0.08605851979345955, "eval_loss": 0.0014449466252699494, "eval_runtime": 2.0956, "eval_samples_per_second": 47.72, "eval_steps_per_second": 1.909, "eval_wer": 0.27, "step": 112000 }, { "epoch": 13.219679093912223, "grad_norm": 0.3604534864425659, "learning_rate": 8.392230020108643e-05, "loss": 0.0068, "step": 112050 }, { "epoch": 13.225578102878716, "grad_norm": 0.28060588240623474, "learning_rate": 8.390865091199887e-05, "loss": 0.0077, "step": 112100 }, { "epoch": 13.23147711184521, "grad_norm": 0.1734125167131424, "learning_rate": 8.389499694254006e-05, "loss": 0.0074, "step": 112150 }, { "epoch": 13.237376120811703, "grad_norm": 0.17434974014759064, "learning_rate": 8.388133829459464e-05, "loss": 0.0063, "step": 112200 }, { "epoch": 13.243275129778198, "grad_norm": 0.09884390234947205, "learning_rate": 8.38676749700479e-05, "loss": 0.0073, "step": 112250 }, { "epoch": 13.24917413874469, "grad_norm": 0.04471452534198761, "learning_rate": 8.385400697078577e-05, "loss": 0.0079, "step": 112300 }, { "epoch": 13.255073147711185, "grad_norm": 0.06790361553430557, "learning_rate": 8.384033429869482e-05, "loss": 0.0063, "step": 112350 }, { "epoch": 13.260972156677678, "grad_norm": 0.09338211268186569, "learning_rate": 8.382665695566228e-05, "loss": 0.0064, "step": 112400 }, { "epoch": 13.266871165644172, "grad_norm": 0.13828980922698975, "learning_rate": 8.381297494357602e-05, "loss": 0.0074, "step": 112450 }, { "epoch": 13.272770174610665, "grad_norm": 0.04494787007570267, "learning_rate": 8.379928826432455e-05, "loss": 0.0062, "step": 112500 }, { "epoch": 13.27866918357716, "grad_norm": 0.1378510445356369, "learning_rate": 8.378559691979702e-05, "loss": 0.0067, "step": 112550 }, { "epoch": 13.284568192543652, "grad_norm": 0.1802271604537964, "learning_rate": 8.377190091188324e-05, "loss": 0.0082, "step": 112600 }, { "epoch": 13.290467201510147, "grad_norm": 0.19431383907794952, "learning_rate": 8.375820024247366e-05, "loss": 0.0062, "step": 112650 }, { "epoch": 13.29636621047664, "grad_norm": 0.07694712281227112, "learning_rate": 8.374449491345935e-05, "loss": 0.0079, "step": 112700 }, { "epoch": 13.302265219443134, "grad_norm": 0.17364485561847687, "learning_rate": 8.373078492673205e-05, "loss": 0.0051, "step": 112750 }, { "epoch": 13.308164228409627, "grad_norm": 0.012390322051942348, "learning_rate": 8.371707028418412e-05, "loss": 0.0068, "step": 112800 }, { "epoch": 13.314063237376121, "grad_norm": 0.11881326884031296, "learning_rate": 8.37033509877086e-05, "loss": 0.0056, "step": 112850 }, { "epoch": 13.319962246342614, "grad_norm": 0.12254542112350464, "learning_rate": 8.368962703919913e-05, "loss": 0.005, "step": 112900 }, { "epoch": 13.325861255309109, "grad_norm": 0.21270984411239624, "learning_rate": 8.367589844055003e-05, "loss": 0.0078, "step": 112950 }, { "epoch": 13.331760264275601, "grad_norm": 0.147696390748024, "learning_rate": 8.366216519365621e-05, "loss": 0.0071, "step": 113000 }, { "epoch": 13.331760264275601, "eval_cer": 0.09122203098106713, "eval_loss": 0.0016989619471132755, "eval_runtime": 2.0844, "eval_samples_per_second": 47.974, "eval_steps_per_second": 1.919, "eval_wer": 0.27, "step": 113000 }, { "epoch": 13.337659273242096, "grad_norm": 0.12660951912403107, "learning_rate": 8.36484273004133e-05, "loss": 0.0075, "step": 113050 }, { "epoch": 13.343558282208589, "grad_norm": 0.028595050796866417, "learning_rate": 8.363468476271747e-05, "loss": 0.0057, "step": 113100 }, { "epoch": 13.349457291175083, "grad_norm": 0.3007996082305908, "learning_rate": 8.362093758246562e-05, "loss": 0.0073, "step": 113150 }, { "epoch": 13.355356300141576, "grad_norm": 0.1511448174715042, "learning_rate": 8.360718576155527e-05, "loss": 0.0066, "step": 113200 }, { "epoch": 13.36125530910807, "grad_norm": 0.012748025357723236, "learning_rate": 8.359342930188451e-05, "loss": 0.0059, "step": 113250 }, { "epoch": 13.367154318074563, "grad_norm": 0.371084600687027, "learning_rate": 8.35796682053522e-05, "loss": 0.0081, "step": 113300 }, { "epoch": 13.373053327041058, "grad_norm": 0.07596989721059799, "learning_rate": 8.356590247385773e-05, "loss": 0.0057, "step": 113350 }, { "epoch": 13.37895233600755, "grad_norm": 0.03968050703406334, "learning_rate": 8.355213210930118e-05, "loss": 0.007, "step": 113400 }, { "epoch": 13.384851344974045, "grad_norm": 0.14863695204257965, "learning_rate": 8.353835711358322e-05, "loss": 0.0063, "step": 113450 }, { "epoch": 13.390750353940538, "grad_norm": 0.13007861375808716, "learning_rate": 8.352457748860524e-05, "loss": 0.0057, "step": 113500 }, { "epoch": 13.396649362907032, "grad_norm": 0.14501604437828064, "learning_rate": 8.351079323626922e-05, "loss": 0.0069, "step": 113550 }, { "epoch": 13.402548371873525, "grad_norm": 0.14601066708564758, "learning_rate": 8.349700435847778e-05, "loss": 0.0065, "step": 113600 }, { "epoch": 13.40844738084002, "grad_norm": 0.05088463053107262, "learning_rate": 8.348321085713418e-05, "loss": 0.0063, "step": 113650 }, { "epoch": 13.414346389806513, "grad_norm": 0.18979716300964355, "learning_rate": 8.346941273414231e-05, "loss": 0.006, "step": 113700 }, { "epoch": 13.420245398773005, "grad_norm": 0.15315541625022888, "learning_rate": 8.345560999140672e-05, "loss": 0.0074, "step": 113750 }, { "epoch": 13.4261444077395, "grad_norm": 0.11051509529352188, "learning_rate": 8.344180263083257e-05, "loss": 0.0065, "step": 113800 }, { "epoch": 13.432043416705993, "grad_norm": 0.13241691887378693, "learning_rate": 8.34279906543257e-05, "loss": 0.0054, "step": 113850 }, { "epoch": 13.437942425672487, "grad_norm": 0.10094498097896576, "learning_rate": 8.341417406379255e-05, "loss": 0.0059, "step": 113900 }, { "epoch": 13.44384143463898, "grad_norm": 0.05713101848959923, "learning_rate": 8.34003528611402e-05, "loss": 0.006, "step": 113950 }, { "epoch": 13.449740443605474, "grad_norm": 0.0792035236954689, "learning_rate": 8.33865270482764e-05, "loss": 0.0063, "step": 114000 }, { "epoch": 13.449740443605474, "eval_cer": 0.08777969018932874, "eval_loss": 0.002163813216611743, "eval_runtime": 2.0212, "eval_samples_per_second": 49.474, "eval_steps_per_second": 1.979, "eval_wer": 0.27, "step": 114000 }, { "epoch": 13.455639452571967, "grad_norm": 0.1218341812491417, "learning_rate": 8.337269662710949e-05, "loss": 0.0064, "step": 114050 }, { "epoch": 13.461538461538462, "grad_norm": 0.16186785697937012, "learning_rate": 8.335886159954846e-05, "loss": 0.0038, "step": 114100 }, { "epoch": 13.467437470504954, "grad_norm": 0.05460064113140106, "learning_rate": 8.334502196750294e-05, "loss": 0.0084, "step": 114150 }, { "epoch": 13.473336479471449, "grad_norm": 0.060916196554899216, "learning_rate": 8.333117773288324e-05, "loss": 0.0067, "step": 114200 }, { "epoch": 13.479235488437942, "grad_norm": 0.16761784255504608, "learning_rate": 8.33173288976002e-05, "loss": 0.0063, "step": 114250 }, { "epoch": 13.485134497404436, "grad_norm": 0.156794473528862, "learning_rate": 8.330347546356541e-05, "loss": 0.0053, "step": 114300 }, { "epoch": 13.491033506370929, "grad_norm": 0.16519209742546082, "learning_rate": 8.328961743269102e-05, "loss": 0.0076, "step": 114350 }, { "epoch": 13.496932515337424, "grad_norm": 0.2741299569606781, "learning_rate": 8.327575480688985e-05, "loss": 0.0069, "step": 114400 }, { "epoch": 13.502831524303916, "grad_norm": 0.1349022537469864, "learning_rate": 8.326188758807532e-05, "loss": 0.0058, "step": 114450 }, { "epoch": 13.50873053327041, "grad_norm": 0.14235830307006836, "learning_rate": 8.324801577816152e-05, "loss": 0.0063, "step": 114500 }, { "epoch": 13.514629542236904, "grad_norm": 0.20688217878341675, "learning_rate": 8.323413937906316e-05, "loss": 0.0061, "step": 114550 }, { "epoch": 13.520528551203398, "grad_norm": 0.013508409261703491, "learning_rate": 8.32202583926956e-05, "loss": 0.0061, "step": 114600 }, { "epoch": 13.526427560169891, "grad_norm": 0.02650420553982258, "learning_rate": 8.320637282097479e-05, "loss": 0.0067, "step": 114650 }, { "epoch": 13.532326569136385, "grad_norm": 0.16433188319206238, "learning_rate": 8.319248266581735e-05, "loss": 0.0055, "step": 114700 }, { "epoch": 13.538225578102878, "grad_norm": 0.1942131519317627, "learning_rate": 8.317858792914051e-05, "loss": 0.0059, "step": 114750 }, { "epoch": 13.544124587069373, "grad_norm": 0.21561923623085022, "learning_rate": 8.316468861286217e-05, "loss": 0.0044, "step": 114800 }, { "epoch": 13.550023596035865, "grad_norm": 0.05844059959053993, "learning_rate": 8.315078471890081e-05, "loss": 0.0063, "step": 114850 }, { "epoch": 13.55592260500236, "grad_norm": 0.30488401651382446, "learning_rate": 8.313687624917559e-05, "loss": 0.0062, "step": 114900 }, { "epoch": 13.561821613968853, "grad_norm": 0.1414973884820938, "learning_rate": 8.312296320560625e-05, "loss": 0.0067, "step": 114950 }, { "epoch": 13.567720622935347, "grad_norm": 0.1953800618648529, "learning_rate": 8.310904559011323e-05, "loss": 0.0062, "step": 115000 }, { "epoch": 13.567720622935347, "eval_cer": 0.08605851979345955, "eval_loss": 0.0015380436088889837, "eval_runtime": 2.0388, "eval_samples_per_second": 49.047, "eval_steps_per_second": 1.962, "eval_wer": 0.27, "step": 115000 }, { "epoch": 13.57361963190184, "grad_norm": 0.0632181242108345, "learning_rate": 8.309512340461753e-05, "loss": 0.0045, "step": 115050 }, { "epoch": 13.579518640868335, "grad_norm": 0.23106397688388824, "learning_rate": 8.308119665104081e-05, "loss": 0.0076, "step": 115100 }, { "epoch": 13.585417649834827, "grad_norm": 0.18314982950687408, "learning_rate": 8.306726533130541e-05, "loss": 0.0061, "step": 115150 }, { "epoch": 13.591316658801322, "grad_norm": 0.08104858547449112, "learning_rate": 8.305332944733419e-05, "loss": 0.0081, "step": 115200 }, { "epoch": 13.597215667767815, "grad_norm": 0.3119395077228546, "learning_rate": 8.303938900105077e-05, "loss": 0.006, "step": 115250 }, { "epoch": 13.60311467673431, "grad_norm": 0.2528354823589325, "learning_rate": 8.302544399437928e-05, "loss": 0.0072, "step": 115300 }, { "epoch": 13.609013685700802, "grad_norm": 0.11670029163360596, "learning_rate": 8.301149442924454e-05, "loss": 0.0071, "step": 115350 }, { "epoch": 13.614912694667296, "grad_norm": 0.14290179312229156, "learning_rate": 8.299754030757202e-05, "loss": 0.0065, "step": 115400 }, { "epoch": 13.62081170363379, "grad_norm": 0.12578532099723816, "learning_rate": 8.298358163128777e-05, "loss": 0.0071, "step": 115450 }, { "epoch": 13.626710712600284, "grad_norm": 0.02761898562312126, "learning_rate": 8.29696184023185e-05, "loss": 0.0069, "step": 115500 }, { "epoch": 13.632609721566777, "grad_norm": 0.09234311431646347, "learning_rate": 8.295565062259152e-05, "loss": 0.0051, "step": 115550 }, { "epoch": 13.638508730533271, "grad_norm": 0.20395894348621368, "learning_rate": 8.294167829403481e-05, "loss": 0.0083, "step": 115600 }, { "epoch": 13.644407739499764, "grad_norm": 0.08554045110940933, "learning_rate": 8.292770141857695e-05, "loss": 0.0065, "step": 115650 }, { "epoch": 13.650306748466258, "grad_norm": 0.5442200303077698, "learning_rate": 8.291371999814712e-05, "loss": 0.0065, "step": 115700 }, { "epoch": 13.656205757432751, "grad_norm": 0.143574059009552, "learning_rate": 8.28997340346752e-05, "loss": 0.0071, "step": 115750 }, { "epoch": 13.662104766399246, "grad_norm": 0.2990308403968811, "learning_rate": 8.288574353009164e-05, "loss": 0.0064, "step": 115800 }, { "epoch": 13.668003775365738, "grad_norm": 0.16593655943870544, "learning_rate": 8.287174848632752e-05, "loss": 0.0068, "step": 115850 }, { "epoch": 13.673902784332233, "grad_norm": 0.0657423585653305, "learning_rate": 8.285774890531459e-05, "loss": 0.0061, "step": 115900 }, { "epoch": 13.679801793298726, "grad_norm": 0.09734640270471573, "learning_rate": 8.284374478898518e-05, "loss": 0.0067, "step": 115950 }, { "epoch": 13.68570080226522, "grad_norm": 0.0737544447183609, "learning_rate": 8.282973613927225e-05, "loss": 0.0059, "step": 116000 }, { "epoch": 13.68570080226522, "eval_cer": 0.08519793459552495, "eval_loss": 0.001486246008425951, "eval_runtime": 2.0871, "eval_samples_per_second": 47.913, "eval_steps_per_second": 1.917, "eval_wer": 0.26, "step": 116000 }, { "epoch": 13.691599811231713, "grad_norm": 0.22785525023937225, "learning_rate": 8.281572295810939e-05, "loss": 0.0068, "step": 116050 }, { "epoch": 13.697498820198208, "grad_norm": 0.08075135946273804, "learning_rate": 8.280170524743087e-05, "loss": 0.0068, "step": 116100 }, { "epoch": 13.7033978291647, "grad_norm": 0.1295725554227829, "learning_rate": 8.27876830091715e-05, "loss": 0.0067, "step": 116150 }, { "epoch": 13.709296838131195, "grad_norm": 0.2512216866016388, "learning_rate": 8.277365624526675e-05, "loss": 0.0061, "step": 116200 }, { "epoch": 13.715195847097688, "grad_norm": 0.045879341661930084, "learning_rate": 8.275962495765274e-05, "loss": 0.0064, "step": 116250 }, { "epoch": 13.72109485606418, "grad_norm": 0.22053220868110657, "learning_rate": 8.274558914826618e-05, "loss": 0.0066, "step": 116300 }, { "epoch": 13.726993865030675, "grad_norm": 0.04272620752453804, "learning_rate": 8.273154881904441e-05, "loss": 0.0057, "step": 116350 }, { "epoch": 13.73289287399717, "grad_norm": 0.20550371706485748, "learning_rate": 8.271750397192542e-05, "loss": 0.0073, "step": 116400 }, { "epoch": 13.738791882963662, "grad_norm": 0.14313508570194244, "learning_rate": 8.270345460884777e-05, "loss": 0.0054, "step": 116450 }, { "epoch": 13.744690891930155, "grad_norm": 0.12582620978355408, "learning_rate": 8.268940073175071e-05, "loss": 0.0068, "step": 116500 }, { "epoch": 13.75058990089665, "grad_norm": 0.1423470824956894, "learning_rate": 8.267534234257408e-05, "loss": 0.0067, "step": 116550 }, { "epoch": 13.756488909863142, "grad_norm": 0.0397995226085186, "learning_rate": 8.266127944325833e-05, "loss": 0.0057, "step": 116600 }, { "epoch": 13.762387918829637, "grad_norm": 0.26475539803504944, "learning_rate": 8.264721203574452e-05, "loss": 0.0062, "step": 116650 }, { "epoch": 13.76828692779613, "grad_norm": 0.028605498373508453, "learning_rate": 8.26331401219744e-05, "loss": 0.0052, "step": 116700 }, { "epoch": 13.774185936762624, "grad_norm": 0.15994051098823547, "learning_rate": 8.26190637038903e-05, "loss": 0.0064, "step": 116750 }, { "epoch": 13.780084945729117, "grad_norm": 0.20785708725452423, "learning_rate": 8.260498278343514e-05, "loss": 0.0086, "step": 116800 }, { "epoch": 13.785983954695611, "grad_norm": 0.12886598706245422, "learning_rate": 8.259089736255252e-05, "loss": 0.0045, "step": 116850 }, { "epoch": 13.791882963662104, "grad_norm": 0.13774077594280243, "learning_rate": 8.257680744318662e-05, "loss": 0.0071, "step": 116900 }, { "epoch": 13.797781972628599, "grad_norm": 0.180731862783432, "learning_rate": 8.256271302728225e-05, "loss": 0.0063, "step": 116950 }, { "epoch": 13.803680981595091, "grad_norm": 0.038787078112363815, "learning_rate": 8.254861411678485e-05, "loss": 0.0069, "step": 117000 }, { "epoch": 13.803680981595091, "eval_cer": 0.08519793459552495, "eval_loss": 0.0012168195098638535, "eval_runtime": 2.0877, "eval_samples_per_second": 47.899, "eval_steps_per_second": 1.916, "eval_wer": 0.26, "step": 117000 }, { "epoch": 13.809579990561586, "grad_norm": 0.037867221981287, "learning_rate": 8.253451071364049e-05, "loss": 0.0064, "step": 117050 }, { "epoch": 13.815478999528079, "grad_norm": 0.21917757391929626, "learning_rate": 8.252040281979583e-05, "loss": 0.006, "step": 117100 }, { "epoch": 13.821378008494573, "grad_norm": 0.2018294334411621, "learning_rate": 8.250629043719818e-05, "loss": 0.0074, "step": 117150 }, { "epoch": 13.827277017461066, "grad_norm": 0.11660819500684738, "learning_rate": 8.249217356779544e-05, "loss": 0.0065, "step": 117200 }, { "epoch": 13.83317602642756, "grad_norm": 0.05005534365773201, "learning_rate": 8.247805221353615e-05, "loss": 0.0059, "step": 117250 }, { "epoch": 13.839075035394053, "grad_norm": 0.09540936350822449, "learning_rate": 8.246392637636945e-05, "loss": 0.0083, "step": 117300 }, { "epoch": 13.844974044360548, "grad_norm": 0.06600872427225113, "learning_rate": 8.244979605824517e-05, "loss": 0.0067, "step": 117350 }, { "epoch": 13.85087305332704, "grad_norm": 0.2546648681163788, "learning_rate": 8.243566126111363e-05, "loss": 0.0085, "step": 117400 }, { "epoch": 13.856772062293535, "grad_norm": 0.23936015367507935, "learning_rate": 8.242152198692588e-05, "loss": 0.0068, "step": 117450 }, { "epoch": 13.862671071260028, "grad_norm": 0.09030961245298386, "learning_rate": 8.240737823763354e-05, "loss": 0.0077, "step": 117500 }, { "epoch": 13.868570080226522, "grad_norm": 0.0963592529296875, "learning_rate": 8.239323001518884e-05, "loss": 0.0059, "step": 117550 }, { "epoch": 13.874469089193015, "grad_norm": 0.08083092421293259, "learning_rate": 8.237907732154467e-05, "loss": 0.0064, "step": 117600 }, { "epoch": 13.88036809815951, "grad_norm": 0.033592235296964645, "learning_rate": 8.236492015865448e-05, "loss": 0.0054, "step": 117650 }, { "epoch": 13.886267107126002, "grad_norm": 0.31403952836990356, "learning_rate": 8.23507585284724e-05, "loss": 0.0063, "step": 117700 }, { "epoch": 13.892166116092497, "grad_norm": 0.3039640486240387, "learning_rate": 8.233659243295311e-05, "loss": 0.0067, "step": 117750 }, { "epoch": 13.89806512505899, "grad_norm": 0.22703981399536133, "learning_rate": 8.232242187405194e-05, "loss": 0.0074, "step": 117800 }, { "epoch": 13.903964134025484, "grad_norm": 0.15898747742176056, "learning_rate": 8.230824685372486e-05, "loss": 0.0072, "step": 117850 }, { "epoch": 13.909863142991977, "grad_norm": 0.0565611906349659, "learning_rate": 8.229406737392843e-05, "loss": 0.0067, "step": 117900 }, { "epoch": 13.915762151958472, "grad_norm": 0.04561234265565872, "learning_rate": 8.227988343661981e-05, "loss": 0.0063, "step": 117950 }, { "epoch": 13.921661160924964, "grad_norm": 0.2497343271970749, "learning_rate": 8.22656950437568e-05, "loss": 0.0056, "step": 118000 }, { "epoch": 13.921661160924964, "eval_cer": 0.08519793459552495, "eval_loss": 0.0009529637172818184, "eval_runtime": 2.0192, "eval_samples_per_second": 49.523, "eval_steps_per_second": 1.981, "eval_wer": 0.26, "step": 118000 }, { "epoch": 13.927560169891459, "grad_norm": 0.04625144973397255, "learning_rate": 8.225150219729781e-05, "loss": 0.0063, "step": 118050 }, { "epoch": 13.933459178857952, "grad_norm": 0.1198304146528244, "learning_rate": 8.223730489920187e-05, "loss": 0.0063, "step": 118100 }, { "epoch": 13.939358187824446, "grad_norm": 0.053552769124507904, "learning_rate": 8.222310315142858e-05, "loss": 0.0075, "step": 118150 }, { "epoch": 13.945257196790939, "grad_norm": 0.1730073243379593, "learning_rate": 8.220889695593823e-05, "loss": 0.0064, "step": 118200 }, { "epoch": 13.951156205757433, "grad_norm": 0.17591719329357147, "learning_rate": 8.219468631469168e-05, "loss": 0.0094, "step": 118250 }, { "epoch": 13.957055214723926, "grad_norm": 0.13458940386772156, "learning_rate": 8.218047122965038e-05, "loss": 0.008, "step": 118300 }, { "epoch": 13.96295422369042, "grad_norm": 0.07871360331773758, "learning_rate": 8.216625170277646e-05, "loss": 0.0051, "step": 118350 }, { "epoch": 13.968853232656913, "grad_norm": 0.12610037624835968, "learning_rate": 8.215202773603258e-05, "loss": 0.0065, "step": 118400 }, { "epoch": 13.974752241623408, "grad_norm": 0.16145330667495728, "learning_rate": 8.213779933138211e-05, "loss": 0.0061, "step": 118450 }, { "epoch": 13.9806512505899, "grad_norm": 0.13524040579795837, "learning_rate": 8.212356649078892e-05, "loss": 0.0075, "step": 118500 }, { "epoch": 13.986550259556395, "grad_norm": 0.20117215812206268, "learning_rate": 8.21093292162176e-05, "loss": 0.0075, "step": 118550 }, { "epoch": 13.992449268522888, "grad_norm": 0.1817513406276703, "learning_rate": 8.209508750963328e-05, "loss": 0.0055, "step": 118600 }, { "epoch": 13.998348277489383, "grad_norm": 0.18174846470355988, "learning_rate": 8.208084137300175e-05, "loss": 0.0078, "step": 118650 }, { "epoch": 14.004247286455875, "grad_norm": 0.18918964266777039, "learning_rate": 8.206659080828936e-05, "loss": 0.0056, "step": 118700 }, { "epoch": 14.01014629542237, "grad_norm": 0.17061558365821838, "learning_rate": 8.20523358174631e-05, "loss": 0.0047, "step": 118750 }, { "epoch": 14.016045304388863, "grad_norm": 0.1890677660703659, "learning_rate": 8.203807640249062e-05, "loss": 0.0061, "step": 118800 }, { "epoch": 14.021944313355357, "grad_norm": 0.1347930133342743, "learning_rate": 8.202381256534006e-05, "loss": 0.0061, "step": 118850 }, { "epoch": 14.02784332232185, "grad_norm": 0.12139833718538284, "learning_rate": 8.200954430798027e-05, "loss": 0.0055, "step": 118900 }, { "epoch": 14.033742331288343, "grad_norm": 0.04915774613618851, "learning_rate": 8.19952716323807e-05, "loss": 0.0057, "step": 118950 }, { "epoch": 14.039641340254837, "grad_norm": 0.04792744293808937, "learning_rate": 8.198099454051136e-05, "loss": 0.0058, "step": 119000 }, { "epoch": 14.039641340254837, "eval_cer": 0.08950086058519793, "eval_loss": 0.0013252744683995843, "eval_runtime": 2.0699, "eval_samples_per_second": 48.311, "eval_steps_per_second": 1.932, "eval_wer": 0.27, "step": 119000 }, { "epoch": 14.04554034922133, "grad_norm": 0.12971197068691254, "learning_rate": 8.196671303434291e-05, "loss": 0.0051, "step": 119050 }, { "epoch": 14.051439358187825, "grad_norm": 0.0662168338894844, "learning_rate": 8.195242711584661e-05, "loss": 0.0066, "step": 119100 }, { "epoch": 14.057338367154317, "grad_norm": 0.13742464780807495, "learning_rate": 8.193813678699434e-05, "loss": 0.0058, "step": 119150 }, { "epoch": 14.063237376120812, "grad_norm": 0.21038055419921875, "learning_rate": 8.192384204975857e-05, "loss": 0.0065, "step": 119200 }, { "epoch": 14.069136385087305, "grad_norm": 0.07625085860490799, "learning_rate": 8.190954290611238e-05, "loss": 0.0065, "step": 119250 }, { "epoch": 14.075035394053799, "grad_norm": 0.18297863006591797, "learning_rate": 8.189523935802948e-05, "loss": 0.0063, "step": 119300 }, { "epoch": 14.080934403020292, "grad_norm": 0.04452533274888992, "learning_rate": 8.188093140748413e-05, "loss": 0.0056, "step": 119350 }, { "epoch": 14.086833411986786, "grad_norm": 0.1060747504234314, "learning_rate": 8.18666190564513e-05, "loss": 0.0049, "step": 119400 }, { "epoch": 14.09273242095328, "grad_norm": 0.07499154657125473, "learning_rate": 8.185230230690647e-05, "loss": 0.0055, "step": 119450 }, { "epoch": 14.098631429919774, "grad_norm": 0.14661350846290588, "learning_rate": 8.183798116082575e-05, "loss": 0.0066, "step": 119500 }, { "epoch": 14.104530438886266, "grad_norm": 0.11344227939844131, "learning_rate": 8.18236556201859e-05, "loss": 0.0063, "step": 119550 }, { "epoch": 14.110429447852761, "grad_norm": 0.12267418205738068, "learning_rate": 8.180932568696427e-05, "loss": 0.0054, "step": 119600 }, { "epoch": 14.116328456819254, "grad_norm": 0.061619311571121216, "learning_rate": 8.179499136313877e-05, "loss": 0.006, "step": 119650 }, { "epoch": 14.122227465785748, "grad_norm": 0.07647037506103516, "learning_rate": 8.178065265068795e-05, "loss": 0.0054, "step": 119700 }, { "epoch": 14.128126474752241, "grad_norm": 0.07726386189460754, "learning_rate": 8.176630955159099e-05, "loss": 0.0055, "step": 119750 }, { "epoch": 14.134025483718736, "grad_norm": 0.0860302597284317, "learning_rate": 8.175196206782765e-05, "loss": 0.0049, "step": 119800 }, { "epoch": 14.139924492685228, "grad_norm": 0.12388230115175247, "learning_rate": 8.173761020137827e-05, "loss": 0.0067, "step": 119850 }, { "epoch": 14.145823501651723, "grad_norm": 0.16080453991889954, "learning_rate": 8.172325395422384e-05, "loss": 0.0054, "step": 119900 }, { "epoch": 14.151722510618216, "grad_norm": 0.07633031904697418, "learning_rate": 8.170889332834595e-05, "loss": 0.0055, "step": 119950 }, { "epoch": 14.15762151958471, "grad_norm": 0.10558518767356873, "learning_rate": 8.169452832572675e-05, "loss": 0.0058, "step": 120000 }, { "epoch": 14.15762151958471, "eval_cer": 0.08519793459552495, "eval_loss": 0.0014750545378774405, "eval_runtime": 2.0782, "eval_samples_per_second": 48.118, "eval_steps_per_second": 1.925, "eval_wer": 0.26, "step": 120000 }, { "epoch": 14.163520528551203, "grad_norm": 0.3351081311702728, "learning_rate": 8.168015894834903e-05, "loss": 0.0064, "step": 120050 }, { "epoch": 14.169419537517697, "grad_norm": 0.16603878140449524, "learning_rate": 8.166578519819622e-05, "loss": 0.0071, "step": 120100 }, { "epoch": 14.17531854648419, "grad_norm": 0.16547593474388123, "learning_rate": 8.165140707725223e-05, "loss": 0.0072, "step": 120150 }, { "epoch": 14.181217555450685, "grad_norm": 0.1274362951517105, "learning_rate": 8.163702458750173e-05, "loss": 0.0057, "step": 120200 }, { "epoch": 14.187116564417177, "grad_norm": 0.22634366154670715, "learning_rate": 8.162263773092989e-05, "loss": 0.0053, "step": 120250 }, { "epoch": 14.193015573383672, "grad_norm": 0.1349925398826599, "learning_rate": 8.16082465095225e-05, "loss": 0.0056, "step": 120300 }, { "epoch": 14.198914582350165, "grad_norm": 0.18948504328727722, "learning_rate": 8.159385092526599e-05, "loss": 0.0049, "step": 120350 }, { "epoch": 14.20481359131666, "grad_norm": 0.13234055042266846, "learning_rate": 8.157945098014734e-05, "loss": 0.0061, "step": 120400 }, { "epoch": 14.210712600283152, "grad_norm": 0.09060535579919815, "learning_rate": 8.156504667615419e-05, "loss": 0.0063, "step": 120450 }, { "epoch": 14.216611609249647, "grad_norm": 0.16783268749713898, "learning_rate": 8.155063801527472e-05, "loss": 0.0053, "step": 120500 }, { "epoch": 14.22251061821614, "grad_norm": 0.08817880600690842, "learning_rate": 8.153622499949774e-05, "loss": 0.0044, "step": 120550 }, { "epoch": 14.228409627182634, "grad_norm": 0.07502227276563644, "learning_rate": 8.152180763081267e-05, "loss": 0.0048, "step": 120600 }, { "epoch": 14.234308636149127, "grad_norm": 0.28077152371406555, "learning_rate": 8.150738591120953e-05, "loss": 0.0063, "step": 120650 }, { "epoch": 14.240207645115621, "grad_norm": 0.12427005171775818, "learning_rate": 8.149295984267892e-05, "loss": 0.0055, "step": 120700 }, { "epoch": 14.246106654082114, "grad_norm": 0.05770284682512283, "learning_rate": 8.147852942721205e-05, "loss": 0.0052, "step": 120750 }, { "epoch": 14.252005663048608, "grad_norm": 0.04075107350945473, "learning_rate": 8.146409466680076e-05, "loss": 0.0054, "step": 120800 }, { "epoch": 14.257904672015101, "grad_norm": 0.15443956851959229, "learning_rate": 8.144965556343744e-05, "loss": 0.0058, "step": 120850 }, { "epoch": 14.263803680981596, "grad_norm": 0.018966052681207657, "learning_rate": 8.143521211911511e-05, "loss": 0.0064, "step": 120900 }, { "epoch": 14.269702689948089, "grad_norm": 0.07457196712493896, "learning_rate": 8.142076433582737e-05, "loss": 0.007, "step": 120950 }, { "epoch": 14.275601698914583, "grad_norm": 0.2132231593132019, "learning_rate": 8.140631221556845e-05, "loss": 0.0078, "step": 121000 }, { "epoch": 14.275601698914583, "eval_cer": 0.08519793459552495, "eval_loss": 0.001187554094940424, "eval_runtime": 2.0713, "eval_samples_per_second": 48.279, "eval_steps_per_second": 1.931, "eval_wer": 0.26, "step": 121000 }, { "epoch": 14.281500707881076, "grad_norm": 0.119953952729702, "learning_rate": 8.139185576033314e-05, "loss": 0.0044, "step": 121050 }, { "epoch": 14.28739971684757, "grad_norm": 0.41726553440093994, "learning_rate": 8.137739497211687e-05, "loss": 0.0067, "step": 121100 }, { "epoch": 14.293298725814063, "grad_norm": 0.051890600472688675, "learning_rate": 8.136292985291561e-05, "loss": 0.0073, "step": 121150 }, { "epoch": 14.299197734780558, "grad_norm": 0.04822566360235214, "learning_rate": 8.134846040472599e-05, "loss": 0.0057, "step": 121200 }, { "epoch": 14.30509674374705, "grad_norm": 0.10471218079328537, "learning_rate": 8.133398662954522e-05, "loss": 0.006, "step": 121250 }, { "epoch": 14.310995752713545, "grad_norm": 0.1637539118528366, "learning_rate": 8.131950852937109e-05, "loss": 0.0062, "step": 121300 }, { "epoch": 14.316894761680038, "grad_norm": 0.09060635417699814, "learning_rate": 8.130502610620197e-05, "loss": 0.005, "step": 121350 }, { "epoch": 14.32279377064653, "grad_norm": 0.20212818682193756, "learning_rate": 8.129053936203687e-05, "loss": 0.0066, "step": 121400 }, { "epoch": 14.328692779613025, "grad_norm": 0.01962762512266636, "learning_rate": 8.127604829887541e-05, "loss": 0.0063, "step": 121450 }, { "epoch": 14.33459178857952, "grad_norm": 0.17412328720092773, "learning_rate": 8.126155291871772e-05, "loss": 0.0055, "step": 121500 }, { "epoch": 14.340490797546012, "grad_norm": 0.3777942359447479, "learning_rate": 8.124705322356462e-05, "loss": 0.0063, "step": 121550 }, { "epoch": 14.346389806512505, "grad_norm": 0.14834320545196533, "learning_rate": 8.123254921541746e-05, "loss": 0.006, "step": 121600 }, { "epoch": 14.352288815479, "grad_norm": 0.06152249127626419, "learning_rate": 8.121804089627822e-05, "loss": 0.0054, "step": 121650 }, { "epoch": 14.358187824445492, "grad_norm": 0.10143056511878967, "learning_rate": 8.120352826814948e-05, "loss": 0.0065, "step": 121700 }, { "epoch": 14.364086833411987, "grad_norm": 0.10092070698738098, "learning_rate": 8.118901133303438e-05, "loss": 0.0056, "step": 121750 }, { "epoch": 14.36998584237848, "grad_norm": 0.11834419518709183, "learning_rate": 8.117449009293668e-05, "loss": 0.0047, "step": 121800 }, { "epoch": 14.375884851344974, "grad_norm": 0.23055487871170044, "learning_rate": 8.115996454986075e-05, "loss": 0.0062, "step": 121850 }, { "epoch": 14.381783860311467, "grad_norm": 0.17434817552566528, "learning_rate": 8.11454347058115e-05, "loss": 0.0059, "step": 121900 }, { "epoch": 14.387682869277961, "grad_norm": 0.29464730620384216, "learning_rate": 8.11309005627945e-05, "loss": 0.0062, "step": 121950 }, { "epoch": 14.393581878244454, "grad_norm": 0.18411412835121155, "learning_rate": 8.111636212281586e-05, "loss": 0.0071, "step": 122000 }, { "epoch": 14.393581878244454, "eval_cer": 0.08347676419965576, "eval_loss": 0.00041878654155880213, "eval_runtime": 2.0323, "eval_samples_per_second": 49.206, "eval_steps_per_second": 1.968, "eval_wer": 0.26, "step": 122000 }, { "epoch": 14.399480887210949, "grad_norm": 0.13188758492469788, "learning_rate": 8.11018193878823e-05, "loss": 0.0066, "step": 122050 }, { "epoch": 14.405379896177442, "grad_norm": 0.24230103194713593, "learning_rate": 8.108727236000115e-05, "loss": 0.0061, "step": 122100 }, { "epoch": 14.411278905143936, "grad_norm": 0.24930910766124725, "learning_rate": 8.107272104118033e-05, "loss": 0.0058, "step": 122150 }, { "epoch": 14.417177914110429, "grad_norm": 0.12995252013206482, "learning_rate": 8.105816543342832e-05, "loss": 0.0064, "step": 122200 }, { "epoch": 14.423076923076923, "grad_norm": 0.12071365118026733, "learning_rate": 8.104360553875422e-05, "loss": 0.0053, "step": 122250 }, { "epoch": 14.428975932043416, "grad_norm": 0.04114158824086189, "learning_rate": 8.102904135916772e-05, "loss": 0.0053, "step": 122300 }, { "epoch": 14.43487494100991, "grad_norm": 0.1823873370885849, "learning_rate": 8.101447289667907e-05, "loss": 0.0058, "step": 122350 }, { "epoch": 14.440773949976403, "grad_norm": 0.1243644654750824, "learning_rate": 8.099990015329919e-05, "loss": 0.0053, "step": 122400 }, { "epoch": 14.446672958942898, "grad_norm": 0.15086406469345093, "learning_rate": 8.09853231310395e-05, "loss": 0.0057, "step": 122450 }, { "epoch": 14.45257196790939, "grad_norm": 0.15240341424942017, "learning_rate": 8.097074183191204e-05, "loss": 0.006, "step": 122500 }, { "epoch": 14.458470976875885, "grad_norm": 0.21829403936862946, "learning_rate": 8.095615625792949e-05, "loss": 0.0067, "step": 122550 }, { "epoch": 14.464369985842378, "grad_norm": 0.12171335518360138, "learning_rate": 8.094156641110504e-05, "loss": 0.0057, "step": 122600 }, { "epoch": 14.470268994808873, "grad_norm": 0.2141185849905014, "learning_rate": 8.092697229345254e-05, "loss": 0.0073, "step": 122650 }, { "epoch": 14.476168003775365, "grad_norm": 0.04087654873728752, "learning_rate": 8.091237390698639e-05, "loss": 0.0069, "step": 122700 }, { "epoch": 14.48206701274186, "grad_norm": 0.09293626248836517, "learning_rate": 8.089777125372158e-05, "loss": 0.0054, "step": 122750 }, { "epoch": 14.487966021708353, "grad_norm": 0.2137743979692459, "learning_rate": 8.08831643356737e-05, "loss": 0.0052, "step": 122800 }, { "epoch": 14.493865030674847, "grad_norm": 0.09969241917133331, "learning_rate": 8.08685531548589e-05, "loss": 0.0055, "step": 122850 }, { "epoch": 14.49976403964134, "grad_norm": 0.012430801056325436, "learning_rate": 8.0853937713294e-05, "loss": 0.0058, "step": 122900 }, { "epoch": 14.505663048607834, "grad_norm": 0.0960211232304573, "learning_rate": 8.083931801299631e-05, "loss": 0.0061, "step": 122950 }, { "epoch": 14.511562057574327, "grad_norm": 0.18963764607906342, "learning_rate": 8.082469405598378e-05, "loss": 0.0061, "step": 123000 }, { "epoch": 14.511562057574327, "eval_cer": 0.08347676419965576, "eval_loss": 0.0011505885049700737, "eval_runtime": 2.0841, "eval_samples_per_second": 47.983, "eval_steps_per_second": 1.919, "eval_wer": 0.26, "step": 123000 }, { "epoch": 14.517461066540822, "grad_norm": 0.11820968240499496, "learning_rate": 8.081006584427495e-05, "loss": 0.0069, "step": 123050 }, { "epoch": 14.523360075507314, "grad_norm": 0.14490818977355957, "learning_rate": 8.079543337988893e-05, "loss": 0.0061, "step": 123100 }, { "epoch": 14.529259084473809, "grad_norm": 0.15547989308834076, "learning_rate": 8.078079666484539e-05, "loss": 0.0067, "step": 123150 }, { "epoch": 14.535158093440302, "grad_norm": 0.06153358146548271, "learning_rate": 8.076615570116468e-05, "loss": 0.0064, "step": 123200 }, { "epoch": 14.541057102406796, "grad_norm": 0.09497001022100449, "learning_rate": 8.075151049086762e-05, "loss": 0.0052, "step": 123250 }, { "epoch": 14.546956111373289, "grad_norm": 0.195977583527565, "learning_rate": 8.073686103597571e-05, "loss": 0.0048, "step": 123300 }, { "epoch": 14.552855120339784, "grad_norm": 0.12310172617435455, "learning_rate": 8.072220733851097e-05, "loss": 0.0058, "step": 123350 }, { "epoch": 14.558754129306276, "grad_norm": 0.0872839018702507, "learning_rate": 8.070754940049603e-05, "loss": 0.0071, "step": 123400 }, { "epoch": 14.56465313827277, "grad_norm": 0.04841817915439606, "learning_rate": 8.069288722395413e-05, "loss": 0.0044, "step": 123450 }, { "epoch": 14.570552147239264, "grad_norm": 0.03233736380934715, "learning_rate": 8.067822081090907e-05, "loss": 0.0058, "step": 123500 }, { "epoch": 14.576451156205758, "grad_norm": 0.16260044276714325, "learning_rate": 8.066355016338521e-05, "loss": 0.0057, "step": 123550 }, { "epoch": 14.582350165172251, "grad_norm": 0.06879427284002304, "learning_rate": 8.064887528340757e-05, "loss": 0.006, "step": 123600 }, { "epoch": 14.588249174138745, "grad_norm": 0.2753615975379944, "learning_rate": 8.063419617300166e-05, "loss": 0.0058, "step": 123650 }, { "epoch": 14.594148183105238, "grad_norm": 0.15354013442993164, "learning_rate": 8.061951283419363e-05, "loss": 0.0077, "step": 123700 }, { "epoch": 14.600047192071733, "grad_norm": 0.20806312561035156, "learning_rate": 8.060482526901024e-05, "loss": 0.0062, "step": 123750 }, { "epoch": 14.605946201038225, "grad_norm": 0.04637191817164421, "learning_rate": 8.059013347947874e-05, "loss": 0.0057, "step": 123800 }, { "epoch": 14.61184521000472, "grad_norm": 0.19095973670482635, "learning_rate": 8.057543746762709e-05, "loss": 0.0069, "step": 123850 }, { "epoch": 14.617744218971213, "grad_norm": 0.056184183806180954, "learning_rate": 8.05607372354837e-05, "loss": 0.0049, "step": 123900 }, { "epoch": 14.623643227937706, "grad_norm": 0.17906612157821655, "learning_rate": 8.054603278507764e-05, "loss": 0.0068, "step": 123950 }, { "epoch": 14.6295422369042, "grad_norm": 0.043911658227443695, "learning_rate": 8.053132411843858e-05, "loss": 0.0068, "step": 124000 }, { "epoch": 14.6295422369042, "eval_cer": 0.08605851979345955, "eval_loss": 0.0011606276966631413, "eval_runtime": 2.047, "eval_samples_per_second": 48.853, "eval_steps_per_second": 1.954, "eval_wer": 0.27, "step": 124000 }, { "epoch": 14.635441245870695, "grad_norm": 0.28928399085998535, "learning_rate": 8.051661123759671e-05, "loss": 0.0059, "step": 124050 }, { "epoch": 14.641340254837187, "grad_norm": 0.04738841950893402, "learning_rate": 8.050189414458284e-05, "loss": 0.0061, "step": 124100 }, { "epoch": 14.64723926380368, "grad_norm": 0.24887573719024658, "learning_rate": 8.048717284142835e-05, "loss": 0.0073, "step": 124150 }, { "epoch": 14.653138272770175, "grad_norm": 0.19418683648109436, "learning_rate": 8.047244733016522e-05, "loss": 0.0061, "step": 124200 }, { "epoch": 14.659037281736667, "grad_norm": 0.16290748119354248, "learning_rate": 8.045771761282597e-05, "loss": 0.0061, "step": 124250 }, { "epoch": 14.664936290703162, "grad_norm": 0.04284254461526871, "learning_rate": 8.044298369144374e-05, "loss": 0.0057, "step": 124300 }, { "epoch": 14.670835299669655, "grad_norm": 0.2922634184360504, "learning_rate": 8.042824556805223e-05, "loss": 0.0062, "step": 124350 }, { "epoch": 14.67673430863615, "grad_norm": 0.07708395272493362, "learning_rate": 8.041350324468573e-05, "loss": 0.0075, "step": 124400 }, { "epoch": 14.682633317602642, "grad_norm": 0.27028706669807434, "learning_rate": 8.03987567233791e-05, "loss": 0.0068, "step": 124450 }, { "epoch": 14.688532326569137, "grad_norm": 0.10798503458499908, "learning_rate": 8.03840060061678e-05, "loss": 0.009, "step": 124500 }, { "epoch": 14.69443133553563, "grad_norm": 0.19700171053409576, "learning_rate": 8.036925109508785e-05, "loss": 0.0071, "step": 124550 }, { "epoch": 14.700330344502124, "grad_norm": 0.08297441899776459, "learning_rate": 8.035449199217584e-05, "loss": 0.0062, "step": 124600 }, { "epoch": 14.706229353468617, "grad_norm": 0.13065284490585327, "learning_rate": 8.033972869946895e-05, "loss": 0.0059, "step": 124650 }, { "epoch": 14.712128362435111, "grad_norm": 0.4386148154735565, "learning_rate": 8.032496121900497e-05, "loss": 0.0058, "step": 124700 }, { "epoch": 14.718027371401604, "grad_norm": 0.12222136557102203, "learning_rate": 8.03101895528222e-05, "loss": 0.0056, "step": 124750 }, { "epoch": 14.723926380368098, "grad_norm": 0.015893783420324326, "learning_rate": 8.029541370295957e-05, "loss": 0.0062, "step": 124800 }, { "epoch": 14.729825389334591, "grad_norm": 0.07846727967262268, "learning_rate": 8.02806336714566e-05, "loss": 0.007, "step": 124850 }, { "epoch": 14.735724398301086, "grad_norm": 0.06757311522960663, "learning_rate": 8.026584946035331e-05, "loss": 0.0072, "step": 124900 }, { "epoch": 14.741623407267578, "grad_norm": 0.10802058130502701, "learning_rate": 8.025106107169038e-05, "loss": 0.005, "step": 124950 }, { "epoch": 14.747522416234073, "grad_norm": 0.11051464080810547, "learning_rate": 8.023626850750904e-05, "loss": 0.0049, "step": 125000 }, { "epoch": 14.747522416234073, "eval_cer": 0.08950086058519793, "eval_loss": 0.0018833652138710022, "eval_runtime": 2.0884, "eval_samples_per_second": 47.883, "eval_steps_per_second": 1.915, "eval_wer": 0.27, "step": 125000 }, { "epoch": 14.753421425200566, "grad_norm": 0.07337672263383865, "learning_rate": 8.022147176985107e-05, "loss": 0.0064, "step": 125050 }, { "epoch": 14.75932043416706, "grad_norm": 0.08729265630245209, "learning_rate": 8.020667086075884e-05, "loss": 0.0064, "step": 125100 }, { "epoch": 14.765219443133553, "grad_norm": 0.21256400644779205, "learning_rate": 8.019186578227533e-05, "loss": 0.0056, "step": 125150 }, { "epoch": 14.771118452100048, "grad_norm": 0.3200485408306122, "learning_rate": 8.017705653644407e-05, "loss": 0.0058, "step": 125200 }, { "epoch": 14.77701746106654, "grad_norm": 0.16232800483703613, "learning_rate": 8.01622431253091e-05, "loss": 0.0058, "step": 125250 }, { "epoch": 14.782916470033035, "grad_norm": 0.06619448959827423, "learning_rate": 8.014742555091519e-05, "loss": 0.0047, "step": 125300 }, { "epoch": 14.788815478999528, "grad_norm": 0.31563395261764526, "learning_rate": 8.013260381530751e-05, "loss": 0.0063, "step": 125350 }, { "epoch": 14.794714487966022, "grad_norm": 0.16472184658050537, "learning_rate": 8.011777792053195e-05, "loss": 0.0073, "step": 125400 }, { "epoch": 14.800613496932515, "grad_norm": 0.1558137834072113, "learning_rate": 8.010294786863489e-05, "loss": 0.0079, "step": 125450 }, { "epoch": 14.80651250589901, "grad_norm": 0.21233880519866943, "learning_rate": 8.008811366166328e-05, "loss": 0.006, "step": 125500 }, { "epoch": 14.812411514865502, "grad_norm": 0.07764485478401184, "learning_rate": 8.007327530166469e-05, "loss": 0.0047, "step": 125550 }, { "epoch": 14.818310523831997, "grad_norm": 0.09553736448287964, "learning_rate": 8.005843279068724e-05, "loss": 0.0062, "step": 125600 }, { "epoch": 14.82420953279849, "grad_norm": 0.1581951081752777, "learning_rate": 8.004358613077962e-05, "loss": 0.005, "step": 125650 }, { "epoch": 14.830108541764984, "grad_norm": 0.3052612543106079, "learning_rate": 8.002873532399111e-05, "loss": 0.0049, "step": 125700 }, { "epoch": 14.836007550731477, "grad_norm": 0.021580269560217857, "learning_rate": 8.001388037237154e-05, "loss": 0.0075, "step": 125750 }, { "epoch": 14.841906559697971, "grad_norm": 0.15519922971725464, "learning_rate": 7.999902127797129e-05, "loss": 0.0073, "step": 125800 }, { "epoch": 14.847805568664464, "grad_norm": 0.19176159799098969, "learning_rate": 7.998415804284141e-05, "loss": 0.0051, "step": 125850 }, { "epoch": 14.853704577630959, "grad_norm": 0.3388078510761261, "learning_rate": 7.996929066903342e-05, "loss": 0.0056, "step": 125900 }, { "epoch": 14.859603586597451, "grad_norm": 0.17303112149238586, "learning_rate": 7.995441915859943e-05, "loss": 0.0043, "step": 125950 }, { "epoch": 14.865502595563946, "grad_norm": 0.012113003991544247, "learning_rate": 7.993954351359215e-05, "loss": 0.0071, "step": 126000 }, { "epoch": 14.865502595563946, "eval_cer": 0.08433734939759036, "eval_loss": 0.0018241964280605316, "eval_runtime": 2.043, "eval_samples_per_second": 48.947, "eval_steps_per_second": 1.958, "eval_wer": 0.26, "step": 126000 }, { "epoch": 14.871401604530439, "grad_norm": 0.15028069913387299, "learning_rate": 7.992466373606485e-05, "loss": 0.0062, "step": 126050 }, { "epoch": 14.877300613496933, "grad_norm": 0.26828518509864807, "learning_rate": 7.990977982807138e-05, "loss": 0.0045, "step": 126100 }, { "epoch": 14.883199622463426, "grad_norm": 0.019849523901939392, "learning_rate": 7.989489179166611e-05, "loss": 0.0061, "step": 126150 }, { "epoch": 14.88909863142992, "grad_norm": 0.1321374624967575, "learning_rate": 7.987999962890407e-05, "loss": 0.0053, "step": 126200 }, { "epoch": 14.894997640396413, "grad_norm": 0.22890156507492065, "learning_rate": 7.986510334184078e-05, "loss": 0.0054, "step": 126250 }, { "epoch": 14.900896649362908, "grad_norm": 0.08445360511541367, "learning_rate": 7.985020293253235e-05, "loss": 0.0075, "step": 126300 }, { "epoch": 14.9067956583294, "grad_norm": 0.1994011104106903, "learning_rate": 7.983529840303547e-05, "loss": 0.0075, "step": 126350 }, { "epoch": 14.912694667295895, "grad_norm": 0.21100036799907684, "learning_rate": 7.982038975540742e-05, "loss": 0.0063, "step": 126400 }, { "epoch": 14.918593676262388, "grad_norm": 0.15339136123657227, "learning_rate": 7.980547699170599e-05, "loss": 0.0051, "step": 126450 }, { "epoch": 14.92449268522888, "grad_norm": 0.08811471611261368, "learning_rate": 7.979056011398959e-05, "loss": 0.0063, "step": 126500 }, { "epoch": 14.930391694195375, "grad_norm": 0.116600401699543, "learning_rate": 7.977563912431718e-05, "loss": 0.0076, "step": 126550 }, { "epoch": 14.93629070316187, "grad_norm": 0.04883226752281189, "learning_rate": 7.976071402474826e-05, "loss": 0.0062, "step": 126600 }, { "epoch": 14.942189712128362, "grad_norm": 0.1648404598236084, "learning_rate": 7.974578481734297e-05, "loss": 0.0075, "step": 126650 }, { "epoch": 14.948088721094855, "grad_norm": 0.29986944794654846, "learning_rate": 7.973085150416192e-05, "loss": 0.0065, "step": 126700 }, { "epoch": 14.95398773006135, "grad_norm": 0.19008639454841614, "learning_rate": 7.971591408726637e-05, "loss": 0.0067, "step": 126750 }, { "epoch": 14.959886739027842, "grad_norm": 0.019201505929231644, "learning_rate": 7.970097256871812e-05, "loss": 0.0049, "step": 126800 }, { "epoch": 14.965785747994337, "grad_norm": 0.17558352649211884, "learning_rate": 7.96860269505795e-05, "loss": 0.0051, "step": 126850 }, { "epoch": 14.97168475696083, "grad_norm": 0.13560368120670319, "learning_rate": 7.967107723491347e-05, "loss": 0.0059, "step": 126900 }, { "epoch": 14.977583765927324, "grad_norm": 0.043237388134002686, "learning_rate": 7.965612342378349e-05, "loss": 0.0078, "step": 126950 }, { "epoch": 14.983482774893817, "grad_norm": 0.25277987122535706, "learning_rate": 7.964116551925365e-05, "loss": 0.0071, "step": 127000 }, { "epoch": 14.983482774893817, "eval_cer": 0.09036144578313253, "eval_loss": 0.0018107114592567086, "eval_runtime": 2.0289, "eval_samples_per_second": 49.289, "eval_steps_per_second": 1.972, "eval_wer": 0.28, "step": 127000 }, { "epoch": 14.989381783860312, "grad_norm": 0.06531169265508652, "learning_rate": 7.962620352338853e-05, "loss": 0.0064, "step": 127050 }, { "epoch": 14.995280792826804, "grad_norm": 0.15695051848888397, "learning_rate": 7.961123743825337e-05, "loss": 0.008, "step": 127100 }, { "epoch": 15.001179801793299, "grad_norm": 0.16518352925777435, "learning_rate": 7.959626726591387e-05, "loss": 0.0064, "step": 127150 }, { "epoch": 15.007078810759792, "grad_norm": 0.11610259860754013, "learning_rate": 7.958129300843637e-05, "loss": 0.0045, "step": 127200 }, { "epoch": 15.012977819726286, "grad_norm": 0.1708211898803711, "learning_rate": 7.956631466788775e-05, "loss": 0.0042, "step": 127250 }, { "epoch": 15.018876828692779, "grad_norm": 0.09816591441631317, "learning_rate": 7.955133224633544e-05, "loss": 0.0057, "step": 127300 }, { "epoch": 15.024775837659273, "grad_norm": 0.20315439999103546, "learning_rate": 7.953634574584747e-05, "loss": 0.0041, "step": 127350 }, { "epoch": 15.030674846625766, "grad_norm": 0.1035240963101387, "learning_rate": 7.95213551684924e-05, "loss": 0.0057, "step": 127400 }, { "epoch": 15.03657385559226, "grad_norm": 0.08665107935667038, "learning_rate": 7.950636051633932e-05, "loss": 0.0044, "step": 127450 }, { "epoch": 15.042472864558754, "grad_norm": 0.02012302167713642, "learning_rate": 7.9491361791458e-05, "loss": 0.0052, "step": 127500 }, { "epoch": 15.048371873525248, "grad_norm": 0.18621380627155304, "learning_rate": 7.947635899591863e-05, "loss": 0.0075, "step": 127550 }, { "epoch": 15.05427088249174, "grad_norm": 0.09672108292579651, "learning_rate": 7.946135213179207e-05, "loss": 0.005, "step": 127600 }, { "epoch": 15.060169891458235, "grad_norm": 0.056590285152196884, "learning_rate": 7.944634120114969e-05, "loss": 0.0052, "step": 127650 }, { "epoch": 15.066068900424728, "grad_norm": 0.4509311020374298, "learning_rate": 7.943132620606341e-05, "loss": 0.0058, "step": 127700 }, { "epoch": 15.071967909391223, "grad_norm": 0.03340119495987892, "learning_rate": 7.941630714860576e-05, "loss": 0.0045, "step": 127750 }, { "epoch": 15.077866918357715, "grad_norm": 0.11917571723461151, "learning_rate": 7.940128403084976e-05, "loss": 0.0059, "step": 127800 }, { "epoch": 15.08376592732421, "grad_norm": 0.03311944380402565, "learning_rate": 7.93862568548691e-05, "loss": 0.0051, "step": 127850 }, { "epoch": 15.089664936290703, "grad_norm": 0.13974453508853912, "learning_rate": 7.937122562273792e-05, "loss": 0.0044, "step": 127900 }, { "epoch": 15.095563945257197, "grad_norm": 0.2467479556798935, "learning_rate": 7.935619033653096e-05, "loss": 0.0065, "step": 127950 }, { "epoch": 15.10146295422369, "grad_norm": 0.039143726229667664, "learning_rate": 7.934115099832356e-05, "loss": 0.0036, "step": 128000 }, { "epoch": 15.10146295422369, "eval_cer": 0.09036144578313253, "eval_loss": 0.0015166111988946795, "eval_runtime": 2.0687, "eval_samples_per_second": 48.339, "eval_steps_per_second": 1.934, "eval_wer": 0.28, "step": 128000 }, { "epoch": 15.107361963190185, "grad_norm": 0.0747445598244667, "learning_rate": 7.932610761019153e-05, "loss": 0.0045, "step": 128050 }, { "epoch": 15.113260972156677, "grad_norm": 0.17039452493190765, "learning_rate": 7.931106017421132e-05, "loss": 0.0047, "step": 128100 }, { "epoch": 15.119159981123172, "grad_norm": 0.18245664238929749, "learning_rate": 7.929600869245991e-05, "loss": 0.0043, "step": 128150 }, { "epoch": 15.125058990089665, "grad_norm": 0.0071115028113126755, "learning_rate": 7.928095316701484e-05, "loss": 0.004, "step": 128200 }, { "epoch": 15.13095799905616, "grad_norm": 0.48566001653671265, "learning_rate": 7.92658935999542e-05, "loss": 0.0057, "step": 128250 }, { "epoch": 15.136857008022652, "grad_norm": 0.10825774818658829, "learning_rate": 7.925082999335665e-05, "loss": 0.0054, "step": 128300 }, { "epoch": 15.142756016989146, "grad_norm": 0.20207451283931732, "learning_rate": 7.923576234930139e-05, "loss": 0.0059, "step": 128350 }, { "epoch": 15.14865502595564, "grad_norm": 0.12243761122226715, "learning_rate": 7.92206906698682e-05, "loss": 0.005, "step": 128400 }, { "epoch": 15.154554034922134, "grad_norm": 0.13200464844703674, "learning_rate": 7.92056149571374e-05, "loss": 0.0058, "step": 128450 }, { "epoch": 15.160453043888626, "grad_norm": 0.21660758554935455, "learning_rate": 7.919053521318988e-05, "loss": 0.007, "step": 128500 }, { "epoch": 15.166352052855121, "grad_norm": 0.19622495770454407, "learning_rate": 7.917545144010707e-05, "loss": 0.0075, "step": 128550 }, { "epoch": 15.172251061821614, "grad_norm": 0.05310583859682083, "learning_rate": 7.916036363997097e-05, "loss": 0.0055, "step": 128600 }, { "epoch": 15.178150070788108, "grad_norm": 0.1352926790714264, "learning_rate": 7.914527181486414e-05, "loss": 0.0061, "step": 128650 }, { "epoch": 15.184049079754601, "grad_norm": 0.278522789478302, "learning_rate": 7.913017596686968e-05, "loss": 0.0039, "step": 128700 }, { "epoch": 15.189948088721096, "grad_norm": 0.2257431447505951, "learning_rate": 7.911507609807125e-05, "loss": 0.0054, "step": 128750 }, { "epoch": 15.195847097687588, "grad_norm": 0.16150455176830292, "learning_rate": 7.909997221055308e-05, "loss": 0.0058, "step": 128800 }, { "epoch": 15.201746106654083, "grad_norm": 0.3527577221393585, "learning_rate": 7.908486430639992e-05, "loss": 0.0045, "step": 128850 }, { "epoch": 15.207645115620576, "grad_norm": 0.09376877546310425, "learning_rate": 7.906975238769709e-05, "loss": 0.0068, "step": 128900 }, { "epoch": 15.21354412458707, "grad_norm": 0.15888173878192902, "learning_rate": 7.905463645653051e-05, "loss": 0.0055, "step": 128950 }, { "epoch": 15.219443133553563, "grad_norm": 0.21187442541122437, "learning_rate": 7.903951651498658e-05, "loss": 0.0068, "step": 129000 }, { "epoch": 15.219443133553563, "eval_cer": 0.08519793459552495, "eval_loss": 0.0017047522123903036, "eval_runtime": 2.12, "eval_samples_per_second": 47.17, "eval_steps_per_second": 1.887, "eval_wer": 0.26, "step": 129000 }, { "epoch": 15.225342142520057, "grad_norm": 0.025320470333099365, "learning_rate": 7.902439256515232e-05, "loss": 0.0067, "step": 129050 }, { "epoch": 15.23124115148655, "grad_norm": 0.16183669865131378, "learning_rate": 7.900926460911523e-05, "loss": 0.0051, "step": 129100 }, { "epoch": 15.237140160453045, "grad_norm": 0.1777070313692093, "learning_rate": 7.899413264896342e-05, "loss": 0.0042, "step": 129150 }, { "epoch": 15.243039169419538, "grad_norm": 0.08477108180522919, "learning_rate": 7.897899668678557e-05, "loss": 0.006, "step": 129200 }, { "epoch": 15.24893817838603, "grad_norm": 0.18507994711399078, "learning_rate": 7.896385672467083e-05, "loss": 0.0052, "step": 129250 }, { "epoch": 15.254837187352525, "grad_norm": 0.11445048451423645, "learning_rate": 7.894871276470897e-05, "loss": 0.0045, "step": 129300 }, { "epoch": 15.260736196319018, "grad_norm": 0.04818500950932503, "learning_rate": 7.89335648089903e-05, "loss": 0.0055, "step": 129350 }, { "epoch": 15.266635205285512, "grad_norm": 0.01845003105700016, "learning_rate": 7.891841285960567e-05, "loss": 0.0054, "step": 129400 }, { "epoch": 15.272534214252005, "grad_norm": 0.1406475305557251, "learning_rate": 7.890325691864646e-05, "loss": 0.0056, "step": 129450 }, { "epoch": 15.2784332232185, "grad_norm": 0.1579555720090866, "learning_rate": 7.888809698820468e-05, "loss": 0.0058, "step": 129500 }, { "epoch": 15.284332232184992, "grad_norm": 0.1669968217611313, "learning_rate": 7.88729330703728e-05, "loss": 0.0058, "step": 129550 }, { "epoch": 15.290231241151487, "grad_norm": 0.08564981818199158, "learning_rate": 7.885776516724388e-05, "loss": 0.0058, "step": 129600 }, { "epoch": 15.29613025011798, "grad_norm": 0.2663213312625885, "learning_rate": 7.884259328091154e-05, "loss": 0.0055, "step": 129650 }, { "epoch": 15.302029259084474, "grad_norm": 0.008158339187502861, "learning_rate": 7.882741741346994e-05, "loss": 0.0055, "step": 129700 }, { "epoch": 15.307928268050967, "grad_norm": 0.017492402344942093, "learning_rate": 7.881223756701377e-05, "loss": 0.0052, "step": 129750 }, { "epoch": 15.313827277017461, "grad_norm": 0.10864783823490143, "learning_rate": 7.879705374363831e-05, "loss": 0.0057, "step": 129800 }, { "epoch": 15.319726285983954, "grad_norm": 0.11839068681001663, "learning_rate": 7.878186594543933e-05, "loss": 0.0064, "step": 129850 }, { "epoch": 15.325625294950449, "grad_norm": 0.04475186765193939, "learning_rate": 7.876667417451321e-05, "loss": 0.0061, "step": 129900 }, { "epoch": 15.331524303916941, "grad_norm": 0.18924406170845032, "learning_rate": 7.875147843295685e-05, "loss": 0.0048, "step": 129950 }, { "epoch": 15.337423312883436, "grad_norm": 0.017908960580825806, "learning_rate": 7.87362787228677e-05, "loss": 0.0057, "step": 130000 }, { "epoch": 15.337423312883436, "eval_cer": 0.08777969018932874, "eval_loss": 0.00181764573790133, "eval_runtime": 2.0191, "eval_samples_per_second": 49.526, "eval_steps_per_second": 1.981, "eval_wer": 0.27, "step": 130000 }, { "epoch": 15.343322321849929, "grad_norm": 0.17060644924640656, "learning_rate": 7.872107504634377e-05, "loss": 0.0047, "step": 130050 }, { "epoch": 15.349221330816423, "grad_norm": 0.1351674497127533, "learning_rate": 7.870586740548355e-05, "loss": 0.0054, "step": 130100 }, { "epoch": 15.355120339782916, "grad_norm": 0.16238899528980255, "learning_rate": 7.86906558023862e-05, "loss": 0.0047, "step": 130150 }, { "epoch": 15.36101934874941, "grad_norm": 0.20033274590969086, "learning_rate": 7.867544023915134e-05, "loss": 0.0063, "step": 130200 }, { "epoch": 15.366918357715903, "grad_norm": 0.03524111583828926, "learning_rate": 7.866022071787912e-05, "loss": 0.0044, "step": 130250 }, { "epoch": 15.372817366682398, "grad_norm": 0.1872410774230957, "learning_rate": 7.864499724067033e-05, "loss": 0.0064, "step": 130300 }, { "epoch": 15.37871637564889, "grad_norm": 0.03294975683093071, "learning_rate": 7.862976980962622e-05, "loss": 0.0046, "step": 130350 }, { "epoch": 15.384615384615385, "grad_norm": 0.13108976185321808, "learning_rate": 7.861453842684861e-05, "loss": 0.0059, "step": 130400 }, { "epoch": 15.390514393581878, "grad_norm": 0.05525757372379303, "learning_rate": 7.859930309443988e-05, "loss": 0.0059, "step": 130450 }, { "epoch": 15.396413402548372, "grad_norm": 0.07560838013887405, "learning_rate": 7.858406381450294e-05, "loss": 0.0053, "step": 130500 }, { "epoch": 15.402312411514865, "grad_norm": 0.06500355154275894, "learning_rate": 7.856882058914126e-05, "loss": 0.006, "step": 130550 }, { "epoch": 15.40821142048136, "grad_norm": 0.13608288764953613, "learning_rate": 7.855357342045881e-05, "loss": 0.0048, "step": 130600 }, { "epoch": 15.414110429447852, "grad_norm": 0.1127825453877449, "learning_rate": 7.853832231056017e-05, "loss": 0.0049, "step": 130650 }, { "epoch": 15.420009438414347, "grad_norm": 0.05407571420073509, "learning_rate": 7.852306726155045e-05, "loss": 0.0052, "step": 130700 }, { "epoch": 15.42590844738084, "grad_norm": 0.18996262550354004, "learning_rate": 7.850780827553523e-05, "loss": 0.0052, "step": 130750 }, { "epoch": 15.431807456347334, "grad_norm": 0.11077357083559036, "learning_rate": 7.849254535462075e-05, "loss": 0.0044, "step": 130800 }, { "epoch": 15.437706465313827, "grad_norm": 0.02782106213271618, "learning_rate": 7.84772785009137e-05, "loss": 0.0056, "step": 130850 }, { "epoch": 15.443605474280321, "grad_norm": 0.07504962384700775, "learning_rate": 7.846200771652135e-05, "loss": 0.0065, "step": 130900 }, { "epoch": 15.449504483246814, "grad_norm": 0.04609529301524162, "learning_rate": 7.844673300355149e-05, "loss": 0.0049, "step": 130950 }, { "epoch": 15.455403492213309, "grad_norm": 0.08177732676267624, "learning_rate": 7.843145436411252e-05, "loss": 0.0057, "step": 131000 }, { "epoch": 15.455403492213309, "eval_cer": 0.09208261617900172, "eval_loss": 0.0027305458206683397, "eval_runtime": 2.0422, "eval_samples_per_second": 48.966, "eval_steps_per_second": 1.959, "eval_wer": 0.28, "step": 131000 }, { "epoch": 15.461302501179802, "grad_norm": 0.19670259952545166, "learning_rate": 7.841617180031329e-05, "loss": 0.005, "step": 131050 }, { "epoch": 15.467201510146296, "grad_norm": 0.10413600504398346, "learning_rate": 7.840088531426325e-05, "loss": 0.0053, "step": 131100 }, { "epoch": 15.473100519112789, "grad_norm": 0.12112832814455032, "learning_rate": 7.838559490807238e-05, "loss": 0.0059, "step": 131150 }, { "epoch": 15.478999528079283, "grad_norm": 0.12413996458053589, "learning_rate": 7.837030058385118e-05, "loss": 0.0067, "step": 131200 }, { "epoch": 15.484898537045776, "grad_norm": 0.6709359884262085, "learning_rate": 7.835500234371072e-05, "loss": 0.0061, "step": 131250 }, { "epoch": 15.49079754601227, "grad_norm": 0.02412533387541771, "learning_rate": 7.83397001897626e-05, "loss": 0.0056, "step": 131300 }, { "epoch": 15.496696554978763, "grad_norm": 0.11043529212474823, "learning_rate": 7.832439412411897e-05, "loss": 0.0061, "step": 131350 }, { "epoch": 15.502595563945258, "grad_norm": 0.12295608967542648, "learning_rate": 7.830908414889247e-05, "loss": 0.0066, "step": 131400 }, { "epoch": 15.50849457291175, "grad_norm": 0.06846122443675995, "learning_rate": 7.829377026619633e-05, "loss": 0.0066, "step": 131450 }, { "epoch": 15.514393581878245, "grad_norm": 0.3007380962371826, "learning_rate": 7.827845247814433e-05, "loss": 0.005, "step": 131500 }, { "epoch": 15.520292590844738, "grad_norm": 0.3468702733516693, "learning_rate": 7.826313078685077e-05, "loss": 0.0049, "step": 131550 }, { "epoch": 15.526191599811233, "grad_norm": 0.04107768088579178, "learning_rate": 7.824780519443047e-05, "loss": 0.0057, "step": 131600 }, { "epoch": 15.532090608777725, "grad_norm": 0.13832519948482513, "learning_rate": 7.823247570299878e-05, "loss": 0.0057, "step": 131650 }, { "epoch": 15.53798961774422, "grad_norm": 0.07980325073003769, "learning_rate": 7.821714231467166e-05, "loss": 0.0046, "step": 131700 }, { "epoch": 15.543888626710713, "grad_norm": 0.30389857292175293, "learning_rate": 7.820180503156552e-05, "loss": 0.0054, "step": 131750 }, { "epoch": 15.549787635677205, "grad_norm": 0.13874422013759613, "learning_rate": 7.818646385579736e-05, "loss": 0.005, "step": 131800 }, { "epoch": 15.5556866446437, "grad_norm": 0.16242143511772156, "learning_rate": 7.817111878948471e-05, "loss": 0.0051, "step": 131850 }, { "epoch": 15.561585653610194, "grad_norm": 0.1858757883310318, "learning_rate": 7.815576983474562e-05, "loss": 0.0077, "step": 131900 }, { "epoch": 15.567484662576687, "grad_norm": 0.2115330547094345, "learning_rate": 7.814041699369869e-05, "loss": 0.0049, "step": 131950 }, { "epoch": 15.57338367154318, "grad_norm": 0.05267985910177231, "learning_rate": 7.812506026846307e-05, "loss": 0.0066, "step": 132000 }, { "epoch": 15.57338367154318, "eval_cer": 0.08950086058519793, "eval_loss": 0.002629079855978489, "eval_runtime": 2.0653, "eval_samples_per_second": 48.419, "eval_steps_per_second": 1.937, "eval_wer": 0.27, "step": 132000 }, { "epoch": 15.579282680509674, "grad_norm": 0.22963757812976837, "learning_rate": 7.810969966115842e-05, "loss": 0.0066, "step": 132050 }, { "epoch": 15.585181689476167, "grad_norm": 0.03684708848595619, "learning_rate": 7.809433517390495e-05, "loss": 0.0068, "step": 132100 }, { "epoch": 15.591080698442662, "grad_norm": 0.2941707968711853, "learning_rate": 7.80789668088234e-05, "loss": 0.0059, "step": 132150 }, { "epoch": 15.596979707409155, "grad_norm": 0.10058711469173431, "learning_rate": 7.806359456803505e-05, "loss": 0.0064, "step": 132200 }, { "epoch": 15.602878716375649, "grad_norm": 0.11719148606061935, "learning_rate": 7.804821845366169e-05, "loss": 0.0065, "step": 132250 }, { "epoch": 15.608777725342142, "grad_norm": 0.11717604100704193, "learning_rate": 7.80328384678257e-05, "loss": 0.0051, "step": 132300 }, { "epoch": 15.614676734308636, "grad_norm": 0.36209622025489807, "learning_rate": 7.801745461264996e-05, "loss": 0.0067, "step": 132350 }, { "epoch": 15.620575743275129, "grad_norm": 0.04605960100889206, "learning_rate": 7.800206689025785e-05, "loss": 0.0049, "step": 132400 }, { "epoch": 15.626474752241624, "grad_norm": 0.23671984672546387, "learning_rate": 7.798667530277336e-05, "loss": 0.0069, "step": 132450 }, { "epoch": 15.632373761208116, "grad_norm": 0.1568332314491272, "learning_rate": 7.797127985232095e-05, "loss": 0.005, "step": 132500 }, { "epoch": 15.638272770174611, "grad_norm": 0.21441851556301117, "learning_rate": 7.795588054102563e-05, "loss": 0.005, "step": 132550 }, { "epoch": 15.644171779141104, "grad_norm": 0.26561856269836426, "learning_rate": 7.794047737101297e-05, "loss": 0.0049, "step": 132600 }, { "epoch": 15.650070788107598, "grad_norm": 0.01973646506667137, "learning_rate": 7.792507034440905e-05, "loss": 0.005, "step": 132650 }, { "epoch": 15.655969797074091, "grad_norm": 0.15419742465019226, "learning_rate": 7.790965946334045e-05, "loss": 0.0051, "step": 132700 }, { "epoch": 15.661868806040586, "grad_norm": 0.1852724403142929, "learning_rate": 7.789424472993437e-05, "loss": 0.0049, "step": 132750 }, { "epoch": 15.667767815007078, "grad_norm": 0.09815014153718948, "learning_rate": 7.787882614631843e-05, "loss": 0.0059, "step": 132800 }, { "epoch": 15.673666823973573, "grad_norm": 0.02910785935819149, "learning_rate": 7.786340371462088e-05, "loss": 0.0046, "step": 132850 }, { "epoch": 15.679565832940066, "grad_norm": 0.278135746717453, "learning_rate": 7.784797743697045e-05, "loss": 0.0058, "step": 132900 }, { "epoch": 15.68546484190656, "grad_norm": 0.10363371670246124, "learning_rate": 7.78325473154964e-05, "loss": 0.0056, "step": 132950 }, { "epoch": 15.691363850873053, "grad_norm": 0.06950396299362183, "learning_rate": 7.781711335232856e-05, "loss": 0.0063, "step": 133000 }, { "epoch": 15.691363850873053, "eval_cer": 0.08777969018932874, "eval_loss": 0.0027465159073472023, "eval_runtime": 2.0772, "eval_samples_per_second": 48.142, "eval_steps_per_second": 1.926, "eval_wer": 0.27, "step": 133000 }, { "epoch": 15.697262859839547, "grad_norm": 0.005815328564494848, "learning_rate": 7.780167554959723e-05, "loss": 0.0049, "step": 133050 }, { "epoch": 15.70316186880604, "grad_norm": 0.2308121770620346, "learning_rate": 7.778623390943328e-05, "loss": 0.0056, "step": 133100 }, { "epoch": 15.709060877772535, "grad_norm": 0.05250941589474678, "learning_rate": 7.77707884339681e-05, "loss": 0.0058, "step": 133150 }, { "epoch": 15.714959886739027, "grad_norm": 0.15108393132686615, "learning_rate": 7.775533912533364e-05, "loss": 0.0066, "step": 133200 }, { "epoch": 15.720858895705522, "grad_norm": 0.15256808698177338, "learning_rate": 7.773988598566231e-05, "loss": 0.0046, "step": 133250 }, { "epoch": 15.726757904672015, "grad_norm": 0.05801746994256973, "learning_rate": 7.772442901708709e-05, "loss": 0.0055, "step": 133300 }, { "epoch": 15.73265691363851, "grad_norm": 0.04823807254433632, "learning_rate": 7.770896822174153e-05, "loss": 0.0054, "step": 133350 }, { "epoch": 15.738555922605002, "grad_norm": 0.04109187424182892, "learning_rate": 7.769350360175962e-05, "loss": 0.0071, "step": 133400 }, { "epoch": 15.744454931571497, "grad_norm": 0.1053256019949913, "learning_rate": 7.767803515927595e-05, "loss": 0.0061, "step": 133450 }, { "epoch": 15.75035394053799, "grad_norm": 0.15400193631649017, "learning_rate": 7.76625628964256e-05, "loss": 0.0053, "step": 133500 }, { "epoch": 15.756252949504484, "grad_norm": 0.0652630552649498, "learning_rate": 7.764708681534416e-05, "loss": 0.0053, "step": 133550 }, { "epoch": 15.762151958470977, "grad_norm": 0.13905102014541626, "learning_rate": 7.763160691816784e-05, "loss": 0.0063, "step": 133600 }, { "epoch": 15.768050967437471, "grad_norm": 0.12897613644599915, "learning_rate": 7.761612320703326e-05, "loss": 0.0047, "step": 133650 }, { "epoch": 15.773949976403964, "grad_norm": 0.08955135941505432, "learning_rate": 7.760063568407764e-05, "loss": 0.0044, "step": 133700 }, { "epoch": 15.779848985370458, "grad_norm": 0.17456327378749847, "learning_rate": 7.75851443514387e-05, "loss": 0.0049, "step": 133750 }, { "epoch": 15.785747994336951, "grad_norm": 0.1677796095609665, "learning_rate": 7.75696492112547e-05, "loss": 0.0045, "step": 133800 }, { "epoch": 15.791647003303446, "grad_norm": 0.28520774841308594, "learning_rate": 7.755415026566441e-05, "loss": 0.0048, "step": 133850 }, { "epoch": 15.797546012269938, "grad_norm": 0.10987276583909988, "learning_rate": 7.75386475168071e-05, "loss": 0.0057, "step": 133900 }, { "epoch": 15.803445021236433, "grad_norm": 0.12509074807167053, "learning_rate": 7.752314096682264e-05, "loss": 0.0066, "step": 133950 }, { "epoch": 15.809344030202926, "grad_norm": 0.2473394274711609, "learning_rate": 7.750763061785138e-05, "loss": 0.0075, "step": 134000 }, { "epoch": 15.809344030202926, "eval_cer": 0.08777969018932874, "eval_loss": 0.006387415342032909, "eval_runtime": 2.0645, "eval_samples_per_second": 48.438, "eval_steps_per_second": 1.938, "eval_wer": 0.27, "step": 134000 }, { "epoch": 15.81524303916942, "grad_norm": 0.17896945774555206, "learning_rate": 7.749211647203416e-05, "loss": 0.0058, "step": 134050 }, { "epoch": 15.821142048135913, "grad_norm": 0.1265399158000946, "learning_rate": 7.747659853151241e-05, "loss": 0.0053, "step": 134100 }, { "epoch": 15.827041057102408, "grad_norm": 0.16502363979816437, "learning_rate": 7.746107679842805e-05, "loss": 0.0072, "step": 134150 }, { "epoch": 15.8329400660689, "grad_norm": 0.20931698381900787, "learning_rate": 7.74455512749235e-05, "loss": 0.0072, "step": 134200 }, { "epoch": 15.838839075035395, "grad_norm": 0.022303439676761627, "learning_rate": 7.743002196314175e-05, "loss": 0.0053, "step": 134250 }, { "epoch": 15.844738084001888, "grad_norm": 0.3809831142425537, "learning_rate": 7.741448886522631e-05, "loss": 0.006, "step": 134300 }, { "epoch": 15.85063709296838, "grad_norm": 0.19763869047164917, "learning_rate": 7.739895198332116e-05, "loss": 0.0058, "step": 134350 }, { "epoch": 15.856536101934875, "grad_norm": 0.1337326467037201, "learning_rate": 7.738341131957085e-05, "loss": 0.0066, "step": 134400 }, { "epoch": 15.86243511090137, "grad_norm": 0.15148858726024628, "learning_rate": 7.736786687612045e-05, "loss": 0.0052, "step": 134450 }, { "epoch": 15.868334119867862, "grad_norm": 0.1473091095685959, "learning_rate": 7.735231865511555e-05, "loss": 0.0062, "step": 134500 }, { "epoch": 15.874233128834355, "grad_norm": 0.04415149241685867, "learning_rate": 7.733676665870221e-05, "loss": 0.0066, "step": 134550 }, { "epoch": 15.88013213780085, "grad_norm": 0.11641528457403183, "learning_rate": 7.732121088902711e-05, "loss": 0.0049, "step": 134600 }, { "epoch": 15.886031146767342, "grad_norm": 0.25152844190597534, "learning_rate": 7.730565134823736e-05, "loss": 0.0065, "step": 134650 }, { "epoch": 15.891930155733837, "grad_norm": 0.04202844947576523, "learning_rate": 7.729008803848063e-05, "loss": 0.0051, "step": 134700 }, { "epoch": 15.89782916470033, "grad_norm": 0.10169374942779541, "learning_rate": 7.727452096190509e-05, "loss": 0.0057, "step": 134750 }, { "epoch": 15.903728173666824, "grad_norm": 0.040623437613248825, "learning_rate": 7.725895012065947e-05, "loss": 0.0064, "step": 134800 }, { "epoch": 15.909627182633317, "grad_norm": 0.24822649359703064, "learning_rate": 7.7243375516893e-05, "loss": 0.0051, "step": 134850 }, { "epoch": 15.915526191599811, "grad_norm": 0.18153581023216248, "learning_rate": 7.722779715275541e-05, "loss": 0.0047, "step": 134900 }, { "epoch": 15.921425200566304, "grad_norm": 0.15534761548042297, "learning_rate": 7.721221503039695e-05, "loss": 0.0053, "step": 134950 }, { "epoch": 15.927324209532799, "grad_norm": 0.05554597079753876, "learning_rate": 7.719662915196844e-05, "loss": 0.0057, "step": 135000 }, { "epoch": 15.927324209532799, "eval_cer": 0.08777969018932874, "eval_loss": 0.0046640727669000626, "eval_runtime": 2.0668, "eval_samples_per_second": 48.384, "eval_steps_per_second": 1.935, "eval_wer": 0.27, "step": 135000 }, { "epoch": 15.933223218499291, "grad_norm": 0.025832099840044975, "learning_rate": 7.718103951962113e-05, "loss": 0.0054, "step": 135050 }, { "epoch": 15.939122227465786, "grad_norm": 0.03415431082248688, "learning_rate": 7.716544613550687e-05, "loss": 0.0072, "step": 135100 }, { "epoch": 15.945021236432279, "grad_norm": 0.14315195381641388, "learning_rate": 7.714984900177801e-05, "loss": 0.0063, "step": 135150 }, { "epoch": 15.950920245398773, "grad_norm": 0.12006157636642456, "learning_rate": 7.713424812058736e-05, "loss": 0.0069, "step": 135200 }, { "epoch": 15.956819254365266, "grad_norm": 0.19186362624168396, "learning_rate": 7.711864349408833e-05, "loss": 0.0058, "step": 135250 }, { "epoch": 15.96271826333176, "grad_norm": 0.1575363129377365, "learning_rate": 7.710303512443481e-05, "loss": 0.0062, "step": 135300 }, { "epoch": 15.968617272298253, "grad_norm": 0.07009279727935791, "learning_rate": 7.708742301378118e-05, "loss": 0.0059, "step": 135350 }, { "epoch": 15.974516281264748, "grad_norm": 0.6372588276863098, "learning_rate": 7.707180716428237e-05, "loss": 0.0053, "step": 135400 }, { "epoch": 15.98041529023124, "grad_norm": 0.027452217414975166, "learning_rate": 7.705618757809382e-05, "loss": 0.0056, "step": 135450 }, { "epoch": 15.986314299197735, "grad_norm": 0.11482611298561096, "learning_rate": 7.704056425737148e-05, "loss": 0.0057, "step": 135500 }, { "epoch": 15.992213308164228, "grad_norm": 0.035684194415807724, "learning_rate": 7.702493720427183e-05, "loss": 0.005, "step": 135550 }, { "epoch": 15.998112317130722, "grad_norm": 0.06527376919984818, "learning_rate": 7.700930642095184e-05, "loss": 0.0055, "step": 135600 }, { "epoch": 16.004011326097217, "grad_norm": 0.09537890553474426, "learning_rate": 7.699367190956901e-05, "loss": 0.0046, "step": 135650 }, { "epoch": 16.009910335063708, "grad_norm": 0.033865392208099365, "learning_rate": 7.697803367228136e-05, "loss": 0.004, "step": 135700 }, { "epoch": 16.015809344030203, "grad_norm": 0.08170061558485031, "learning_rate": 7.696239171124742e-05, "loss": 0.0042, "step": 135750 }, { "epoch": 16.021708352996697, "grad_norm": 0.02097596414387226, "learning_rate": 7.694674602862621e-05, "loss": 0.0044, "step": 135800 }, { "epoch": 16.02760736196319, "grad_norm": 0.07875951379537582, "learning_rate": 7.693109662657733e-05, "loss": 0.0044, "step": 135850 }, { "epoch": 16.033506370929683, "grad_norm": 0.33272871375083923, "learning_rate": 7.69154435072608e-05, "loss": 0.0059, "step": 135900 }, { "epoch": 16.039405379896177, "grad_norm": 0.266120970249176, "learning_rate": 7.689978667283722e-05, "loss": 0.0053, "step": 135950 }, { "epoch": 16.04530438886267, "grad_norm": 0.12941700220108032, "learning_rate": 7.688412612546769e-05, "loss": 0.0041, "step": 136000 }, { "epoch": 16.04530438886267, "eval_cer": 0.08777969018932874, "eval_loss": 0.0019407926592975855, "eval_runtime": 2.0919, "eval_samples_per_second": 47.804, "eval_steps_per_second": 1.912, "eval_wer": 0.27, "step": 136000 }, { "epoch": 16.051203397829166, "grad_norm": 0.09747696667909622, "learning_rate": 7.68684618673138e-05, "loss": 0.0046, "step": 136050 }, { "epoch": 16.057102406795657, "grad_norm": 0.26529404520988464, "learning_rate": 7.68527939005377e-05, "loss": 0.0047, "step": 136100 }, { "epoch": 16.06300141576215, "grad_norm": 0.030962901189923286, "learning_rate": 7.683712222730199e-05, "loss": 0.0049, "step": 136150 }, { "epoch": 16.068900424728646, "grad_norm": 0.1482895165681839, "learning_rate": 7.682144684976983e-05, "loss": 0.0052, "step": 136200 }, { "epoch": 16.07479943369514, "grad_norm": 0.2355547547340393, "learning_rate": 7.680576777010489e-05, "loss": 0.0056, "step": 136250 }, { "epoch": 16.08069844266163, "grad_norm": 0.023889323696494102, "learning_rate": 7.679008499047127e-05, "loss": 0.0059, "step": 136300 }, { "epoch": 16.086597451628126, "grad_norm": 0.799112856388092, "learning_rate": 7.677439851303372e-05, "loss": 0.0055, "step": 136350 }, { "epoch": 16.09249646059462, "grad_norm": 0.1393759548664093, "learning_rate": 7.67587083399574e-05, "loss": 0.0063, "step": 136400 }, { "epoch": 16.098395469561115, "grad_norm": 0.2997455894947052, "learning_rate": 7.6743014473408e-05, "loss": 0.0048, "step": 136450 }, { "epoch": 16.104294478527606, "grad_norm": 0.21944718062877655, "learning_rate": 7.672731691555173e-05, "loss": 0.005, "step": 136500 }, { "epoch": 16.1101934874941, "grad_norm": 0.009523395448923111, "learning_rate": 7.671161566855529e-05, "loss": 0.0047, "step": 136550 }, { "epoch": 16.116092496460595, "grad_norm": 0.11905284225940704, "learning_rate": 7.669591073458591e-05, "loss": 0.0042, "step": 136600 }, { "epoch": 16.12199150542709, "grad_norm": 0.09750983119010925, "learning_rate": 7.668020211581136e-05, "loss": 0.0047, "step": 136650 }, { "epoch": 16.12789051439358, "grad_norm": 0.03560517355799675, "learning_rate": 7.666448981439984e-05, "loss": 0.0056, "step": 136700 }, { "epoch": 16.133789523360075, "grad_norm": 0.29661014676094055, "learning_rate": 7.664877383252012e-05, "loss": 0.0051, "step": 136750 }, { "epoch": 16.13968853232657, "grad_norm": 0.30590546131134033, "learning_rate": 7.663305417234145e-05, "loss": 0.0057, "step": 136800 }, { "epoch": 16.145587541293065, "grad_norm": 0.10028564929962158, "learning_rate": 7.661733083603359e-05, "loss": 0.0031, "step": 136850 }, { "epoch": 16.151486550259555, "grad_norm": 0.12567450106143951, "learning_rate": 7.660160382576683e-05, "loss": 0.0041, "step": 136900 }, { "epoch": 16.15738555922605, "grad_norm": 0.10282541066408157, "learning_rate": 7.658587314371194e-05, "loss": 0.006, "step": 136950 }, { "epoch": 16.163284568192545, "grad_norm": 0.07710227370262146, "learning_rate": 7.657013879204022e-05, "loss": 0.0063, "step": 137000 }, { "epoch": 16.163284568192545, "eval_cer": 0.08777969018932874, "eval_loss": 0.002595743630081415, "eval_runtime": 2.0536, "eval_samples_per_second": 48.695, "eval_steps_per_second": 1.948, "eval_wer": 0.27, "step": 137000 }, { "epoch": 16.169183577159036, "grad_norm": 0.08321580290794373, "learning_rate": 7.655440077292346e-05, "loss": 0.0045, "step": 137050 }, { "epoch": 16.17508258612553, "grad_norm": 0.20330968499183655, "learning_rate": 7.653865908853394e-05, "loss": 0.0052, "step": 137100 }, { "epoch": 16.180981595092025, "grad_norm": 0.0797782838344574, "learning_rate": 7.65229137410445e-05, "loss": 0.0043, "step": 137150 }, { "epoch": 16.18688060405852, "grad_norm": 0.04670504108071327, "learning_rate": 7.650716473262842e-05, "loss": 0.0049, "step": 137200 }, { "epoch": 16.19277961302501, "grad_norm": 0.03313421830534935, "learning_rate": 7.649141206545955e-05, "loss": 0.006, "step": 137250 }, { "epoch": 16.198678621991505, "grad_norm": 0.03927946835756302, "learning_rate": 7.647565574171217e-05, "loss": 0.006, "step": 137300 }, { "epoch": 16.204577630958, "grad_norm": 0.08766765147447586, "learning_rate": 7.645989576356114e-05, "loss": 0.0044, "step": 137350 }, { "epoch": 16.210476639924494, "grad_norm": 0.12594027817249298, "learning_rate": 7.644413213318177e-05, "loss": 0.0042, "step": 137400 }, { "epoch": 16.216375648890985, "grad_norm": 0.12511883676052094, "learning_rate": 7.642836485274993e-05, "loss": 0.0048, "step": 137450 }, { "epoch": 16.22227465785748, "grad_norm": 0.16086268424987793, "learning_rate": 7.641259392444192e-05, "loss": 0.0057, "step": 137500 }, { "epoch": 16.228173666823974, "grad_norm": 0.029789498075842857, "learning_rate": 7.63968193504346e-05, "loss": 0.0051, "step": 137550 }, { "epoch": 16.23407267579047, "grad_norm": 0.269567608833313, "learning_rate": 7.638104113290532e-05, "loss": 0.0053, "step": 137600 }, { "epoch": 16.23997168475696, "grad_norm": 0.05261160060763359, "learning_rate": 7.636525927403191e-05, "loss": 0.0051, "step": 137650 }, { "epoch": 16.245870693723454, "grad_norm": 0.10164504498243332, "learning_rate": 7.634947377599273e-05, "loss": 0.0044, "step": 137700 }, { "epoch": 16.25176970268995, "grad_norm": 0.18795330822467804, "learning_rate": 7.633368464096666e-05, "loss": 0.0054, "step": 137750 }, { "epoch": 16.257668711656443, "grad_norm": 0.11335994303226471, "learning_rate": 7.631789187113303e-05, "loss": 0.004, "step": 137800 }, { "epoch": 16.263567720622934, "grad_norm": 0.049328260123729706, "learning_rate": 7.63020954686717e-05, "loss": 0.0055, "step": 137850 }, { "epoch": 16.26946672958943, "grad_norm": 0.5382207036018372, "learning_rate": 7.628629543576302e-05, "loss": 0.0057, "step": 137900 }, { "epoch": 16.275365738555923, "grad_norm": 0.056693803519010544, "learning_rate": 7.627049177458788e-05, "loss": 0.0051, "step": 137950 }, { "epoch": 16.281264747522417, "grad_norm": 0.1848532259464264, "learning_rate": 7.62546844873276e-05, "loss": 0.0061, "step": 138000 }, { "epoch": 16.281264747522417, "eval_cer": 0.08519793459552495, "eval_loss": 0.0009496811544522643, "eval_runtime": 2.0464, "eval_samples_per_second": 48.867, "eval_steps_per_second": 1.955, "eval_wer": 0.26, "step": 138000 }, { "epoch": 16.28716375648891, "grad_norm": 0.11494393646717072, "learning_rate": 7.623887357616408e-05, "loss": 0.0047, "step": 138050 }, { "epoch": 16.293062765455403, "grad_norm": 0.07967124134302139, "learning_rate": 7.622305904327966e-05, "loss": 0.0059, "step": 138100 }, { "epoch": 16.298961774421898, "grad_norm": 0.010768569074571133, "learning_rate": 7.62072408908572e-05, "loss": 0.0049, "step": 138150 }, { "epoch": 16.304860783388392, "grad_norm": 0.008480495773255825, "learning_rate": 7.619141912108008e-05, "loss": 0.0043, "step": 138200 }, { "epoch": 16.310759792354883, "grad_norm": 0.1857920140028, "learning_rate": 7.617559373613213e-05, "loss": 0.0054, "step": 138250 }, { "epoch": 16.316658801321378, "grad_norm": 0.03549302741885185, "learning_rate": 7.615976473819772e-05, "loss": 0.0041, "step": 138300 }, { "epoch": 16.322557810287872, "grad_norm": 0.024871479719877243, "learning_rate": 7.614393212946173e-05, "loss": 0.0061, "step": 138350 }, { "epoch": 16.328456819254367, "grad_norm": 0.01167189609259367, "learning_rate": 7.61280959121095e-05, "loss": 0.0051, "step": 138400 }, { "epoch": 16.334355828220858, "grad_norm": 0.06588822603225708, "learning_rate": 7.611225608832689e-05, "loss": 0.0055, "step": 138450 }, { "epoch": 16.340254837187352, "grad_norm": 0.33566245436668396, "learning_rate": 7.609641266030023e-05, "loss": 0.0057, "step": 138500 }, { "epoch": 16.346153846153847, "grad_norm": 0.16991665959358215, "learning_rate": 7.608056563021639e-05, "loss": 0.0055, "step": 138550 }, { "epoch": 16.35205285512034, "grad_norm": 0.06379478424787521, "learning_rate": 7.606471500026273e-05, "loss": 0.0041, "step": 138600 }, { "epoch": 16.357951864086832, "grad_norm": 0.07027692347764969, "learning_rate": 7.604886077262705e-05, "loss": 0.0056, "step": 138650 }, { "epoch": 16.363850873053327, "grad_norm": 0.05543661490082741, "learning_rate": 7.603300294949775e-05, "loss": 0.0044, "step": 138700 }, { "epoch": 16.36974988201982, "grad_norm": 0.14679980278015137, "learning_rate": 7.60171415330636e-05, "loss": 0.0063, "step": 138750 }, { "epoch": 16.375648890986316, "grad_norm": 0.05143392086029053, "learning_rate": 7.600127652551401e-05, "loss": 0.0046, "step": 138800 }, { "epoch": 16.381547899952807, "grad_norm": 0.09074300527572632, "learning_rate": 7.598540792903875e-05, "loss": 0.0054, "step": 138850 }, { "epoch": 16.3874469089193, "grad_norm": 0.15760105848312378, "learning_rate": 7.596953574582814e-05, "loss": 0.0053, "step": 138900 }, { "epoch": 16.393345917885796, "grad_norm": 0.09200216829776764, "learning_rate": 7.595365997807303e-05, "loss": 0.0046, "step": 138950 }, { "epoch": 16.39924492685229, "grad_norm": 0.3897489011287689, "learning_rate": 7.593778062796472e-05, "loss": 0.0055, "step": 139000 }, { "epoch": 16.39924492685229, "eval_cer": 0.08777969018932874, "eval_loss": 0.0022701388224959373, "eval_runtime": 2.0855, "eval_samples_per_second": 47.951, "eval_steps_per_second": 1.918, "eval_wer": 0.27, "step": 139000 }, { "epoch": 16.40514393581878, "grad_norm": 0.045040253549814224, "learning_rate": 7.592189769769504e-05, "loss": 0.006, "step": 139050 }, { "epoch": 16.411042944785276, "grad_norm": 0.05437321588397026, "learning_rate": 7.590601118945625e-05, "loss": 0.0049, "step": 139100 }, { "epoch": 16.41694195375177, "grad_norm": 0.12452913075685501, "learning_rate": 7.589012110544118e-05, "loss": 0.005, "step": 139150 }, { "epoch": 16.422840962718265, "grad_norm": 0.1721559762954712, "learning_rate": 7.587422744784311e-05, "loss": 0.005, "step": 139200 }, { "epoch": 16.428739971684756, "grad_norm": 0.05035824328660965, "learning_rate": 7.58583302188558e-05, "loss": 0.006, "step": 139250 }, { "epoch": 16.43463898065125, "grad_norm": 0.07729125022888184, "learning_rate": 7.584242942067356e-05, "loss": 0.006, "step": 139300 }, { "epoch": 16.440537989617745, "grad_norm": 0.006389150861650705, "learning_rate": 7.582652505549114e-05, "loss": 0.0059, "step": 139350 }, { "epoch": 16.44643699858424, "grad_norm": 0.20569747686386108, "learning_rate": 7.581061712550381e-05, "loss": 0.0043, "step": 139400 }, { "epoch": 16.45233600755073, "grad_norm": 0.2568388879299164, "learning_rate": 7.579470563290733e-05, "loss": 0.0053, "step": 139450 }, { "epoch": 16.458235016517225, "grad_norm": 0.2124088704586029, "learning_rate": 7.577879057989791e-05, "loss": 0.0044, "step": 139500 }, { "epoch": 16.46413402548372, "grad_norm": 0.030752032995224, "learning_rate": 7.576287196867233e-05, "loss": 0.004, "step": 139550 }, { "epoch": 16.47003303445021, "grad_norm": 0.1577909141778946, "learning_rate": 7.574694980142779e-05, "loss": 0.0047, "step": 139600 }, { "epoch": 16.475932043416705, "grad_norm": 0.1015341505408287, "learning_rate": 7.573102408036201e-05, "loss": 0.0047, "step": 139650 }, { "epoch": 16.4818310523832, "grad_norm": 0.01027072872966528, "learning_rate": 7.571509480767323e-05, "loss": 0.0052, "step": 139700 }, { "epoch": 16.487730061349694, "grad_norm": 0.08876933157444, "learning_rate": 7.569916198556009e-05, "loss": 0.0045, "step": 139750 }, { "epoch": 16.493629070316185, "grad_norm": 0.06540989130735397, "learning_rate": 7.568322561622183e-05, "loss": 0.0056, "step": 139800 }, { "epoch": 16.49952807928268, "grad_norm": 0.14778198301792145, "learning_rate": 7.56672857018581e-05, "loss": 0.0044, "step": 139850 }, { "epoch": 16.505427088249174, "grad_norm": 0.04574650898575783, "learning_rate": 7.565134224466907e-05, "loss": 0.0053, "step": 139900 }, { "epoch": 16.51132609721567, "grad_norm": 0.051859062165021896, "learning_rate": 7.563539524685542e-05, "loss": 0.0056, "step": 139950 }, { "epoch": 16.51722510618216, "grad_norm": 0.04601522535085678, "learning_rate": 7.561944471061826e-05, "loss": 0.0058, "step": 140000 }, { "epoch": 16.51722510618216, "eval_cer": 0.08691910499139414, "eval_loss": 0.0016625311691313982, "eval_runtime": 2.1002, "eval_samples_per_second": 47.614, "eval_steps_per_second": 1.905, "eval_wer": 0.27, "step": 140000 }, { "epoch": 16.523124115148654, "grad_norm": 0.14548765122890472, "learning_rate": 7.560349063815925e-05, "loss": 0.0061, "step": 140050 }, { "epoch": 16.52902312411515, "grad_norm": 0.049647845327854156, "learning_rate": 7.558753303168053e-05, "loss": 0.0045, "step": 140100 }, { "epoch": 16.534922133081643, "grad_norm": 0.1012166440486908, "learning_rate": 7.557157189338464e-05, "loss": 0.0048, "step": 140150 }, { "epoch": 16.540821142048134, "grad_norm": 0.029301578179001808, "learning_rate": 7.555560722547475e-05, "loss": 0.0058, "step": 140200 }, { "epoch": 16.54672015101463, "grad_norm": 0.07127171009778976, "learning_rate": 7.553963903015442e-05, "loss": 0.0053, "step": 140250 }, { "epoch": 16.552619159981123, "grad_norm": 0.0930304005742073, "learning_rate": 7.55236673096277e-05, "loss": 0.0063, "step": 140300 }, { "epoch": 16.558518168947618, "grad_norm": 0.026038290932774544, "learning_rate": 7.550769206609916e-05, "loss": 0.0045, "step": 140350 }, { "epoch": 16.56441717791411, "grad_norm": 0.047849562019109726, "learning_rate": 7.549171330177387e-05, "loss": 0.0042, "step": 140400 }, { "epoch": 16.570316186880603, "grad_norm": 0.2016211301088333, "learning_rate": 7.547573101885734e-05, "loss": 0.0053, "step": 140450 }, { "epoch": 16.576215195847098, "grad_norm": 0.19002553820610046, "learning_rate": 7.545974521955556e-05, "loss": 0.0061, "step": 140500 }, { "epoch": 16.582114204813593, "grad_norm": 0.04479103162884712, "learning_rate": 7.544375590607508e-05, "loss": 0.0046, "step": 140550 }, { "epoch": 16.588013213780084, "grad_norm": 0.03881995379924774, "learning_rate": 7.542776308062285e-05, "loss": 0.004, "step": 140600 }, { "epoch": 16.593912222746578, "grad_norm": 0.028243685141205788, "learning_rate": 7.541176674540636e-05, "loss": 0.0053, "step": 140650 }, { "epoch": 16.599811231713073, "grad_norm": 0.3614140748977661, "learning_rate": 7.539576690263355e-05, "loss": 0.0065, "step": 140700 }, { "epoch": 16.605710240679567, "grad_norm": 0.17021888494491577, "learning_rate": 7.537976355451288e-05, "loss": 0.0045, "step": 140750 }, { "epoch": 16.611609249646058, "grad_norm": 0.19219453632831573, "learning_rate": 7.536375670325326e-05, "loss": 0.0057, "step": 140800 }, { "epoch": 16.617508258612553, "grad_norm": 0.12435362488031387, "learning_rate": 7.534774635106409e-05, "loss": 0.0053, "step": 140850 }, { "epoch": 16.623407267579047, "grad_norm": 0.14551599323749542, "learning_rate": 7.533173250015526e-05, "loss": 0.0062, "step": 140900 }, { "epoch": 16.62930627654554, "grad_norm": 0.26380863785743713, "learning_rate": 7.531571515273717e-05, "loss": 0.0051, "step": 140950 }, { "epoch": 16.635205285512033, "grad_norm": 0.05585821345448494, "learning_rate": 7.529969431102064e-05, "loss": 0.004, "step": 141000 }, { "epoch": 16.635205285512033, "eval_cer": 0.08433734939759036, "eval_loss": 0.002098551718518138, "eval_runtime": 2.0519, "eval_samples_per_second": 48.736, "eval_steps_per_second": 1.949, "eval_wer": 0.26, "step": 141000 }, { "epoch": 16.641104294478527, "grad_norm": 0.28994983434677124, "learning_rate": 7.528366997721703e-05, "loss": 0.0047, "step": 141050 }, { "epoch": 16.64700330344502, "grad_norm": 0.0995970293879509, "learning_rate": 7.526764215353816e-05, "loss": 0.005, "step": 141100 }, { "epoch": 16.652902312411516, "grad_norm": 0.15577134490013123, "learning_rate": 7.525161084219633e-05, "loss": 0.0053, "step": 141150 }, { "epoch": 16.658801321378007, "grad_norm": 0.27184703946113586, "learning_rate": 7.52355760454043e-05, "loss": 0.0056, "step": 141200 }, { "epoch": 16.664700330344502, "grad_norm": 0.3258792459964752, "learning_rate": 7.521953776537536e-05, "loss": 0.0061, "step": 141250 }, { "epoch": 16.670599339310996, "grad_norm": 0.12339580804109573, "learning_rate": 7.520349600432326e-05, "loss": 0.0048, "step": 141300 }, { "epoch": 16.67649834827749, "grad_norm": 0.21181714534759521, "learning_rate": 7.518745076446221e-05, "loss": 0.005, "step": 141350 }, { "epoch": 16.682397357243982, "grad_norm": 0.2594021260738373, "learning_rate": 7.517140204800692e-05, "loss": 0.0046, "step": 141400 }, { "epoch": 16.688296366210476, "grad_norm": 0.028521409258246422, "learning_rate": 7.515534985717259e-05, "loss": 0.0055, "step": 141450 }, { "epoch": 16.69419537517697, "grad_norm": 0.03548736497759819, "learning_rate": 7.513929419417485e-05, "loss": 0.0048, "step": 141500 }, { "epoch": 16.700094384143465, "grad_norm": 0.03269116207957268, "learning_rate": 7.512323506122986e-05, "loss": 0.0044, "step": 141550 }, { "epoch": 16.705993393109956, "grad_norm": 0.22981248795986176, "learning_rate": 7.510717246055426e-05, "loss": 0.0059, "step": 141600 }, { "epoch": 16.71189240207645, "grad_norm": 0.06508989632129669, "learning_rate": 7.509110639436514e-05, "loss": 0.0055, "step": 141650 }, { "epoch": 16.717791411042946, "grad_norm": 0.042171601206064224, "learning_rate": 7.507503686488007e-05, "loss": 0.0061, "step": 141700 }, { "epoch": 16.72369042000944, "grad_norm": 0.29209044575691223, "learning_rate": 7.505896387431712e-05, "loss": 0.0052, "step": 141750 }, { "epoch": 16.72958942897593, "grad_norm": 0.139998197555542, "learning_rate": 7.504288742489483e-05, "loss": 0.0062, "step": 141800 }, { "epoch": 16.735488437942426, "grad_norm": 0.23842453956604004, "learning_rate": 7.502680751883221e-05, "loss": 0.0055, "step": 141850 }, { "epoch": 16.74138744690892, "grad_norm": 0.09783776849508286, "learning_rate": 7.501072415834875e-05, "loss": 0.005, "step": 141900 }, { "epoch": 16.747286455875415, "grad_norm": 0.0786634162068367, "learning_rate": 7.49946373456644e-05, "loss": 0.0054, "step": 141950 }, { "epoch": 16.753185464841906, "grad_norm": 0.1977573186159134, "learning_rate": 7.497854708299963e-05, "loss": 0.0057, "step": 142000 }, { "epoch": 16.753185464841906, "eval_cer": 0.08519793459552495, "eval_loss": 0.001011815038509667, "eval_runtime": 2.0302, "eval_samples_per_second": 49.257, "eval_steps_per_second": 1.97, "eval_wer": 0.26, "step": 142000 }, { "epoch": 16.7590844738084, "grad_norm": 0.11322975903749466, "learning_rate": 7.496245337257535e-05, "loss": 0.0071, "step": 142050 }, { "epoch": 16.764983482774895, "grad_norm": 0.0917373076081276, "learning_rate": 7.494635621661295e-05, "loss": 0.0055, "step": 142100 }, { "epoch": 16.77088249174139, "grad_norm": 0.049931854009628296, "learning_rate": 7.493025561733432e-05, "loss": 0.0062, "step": 142150 }, { "epoch": 16.77678150070788, "grad_norm": 0.0421471893787384, "learning_rate": 7.491415157696179e-05, "loss": 0.0048, "step": 142200 }, { "epoch": 16.782680509674375, "grad_norm": 0.07878706604242325, "learning_rate": 7.489804409771817e-05, "loss": 0.0055, "step": 142250 }, { "epoch": 16.78857951864087, "grad_norm": 0.21574586629867554, "learning_rate": 7.488193318182679e-05, "loss": 0.0053, "step": 142300 }, { "epoch": 16.79447852760736, "grad_norm": 0.0074656084179878235, "learning_rate": 7.48658188315114e-05, "loss": 0.0053, "step": 142350 }, { "epoch": 16.800377536573855, "grad_norm": 0.08475372940301895, "learning_rate": 7.484970104899624e-05, "loss": 0.0061, "step": 142400 }, { "epoch": 16.80627654554035, "grad_norm": 0.14544759690761566, "learning_rate": 7.483357983650605e-05, "loss": 0.0059, "step": 142450 }, { "epoch": 16.812175554506844, "grad_norm": 0.13514907658100128, "learning_rate": 7.481745519626599e-05, "loss": 0.0045, "step": 142500 }, { "epoch": 16.818074563473335, "grad_norm": 0.15715019404888153, "learning_rate": 7.480132713050174e-05, "loss": 0.005, "step": 142550 }, { "epoch": 16.82397357243983, "grad_norm": 0.14221549034118652, "learning_rate": 7.478519564143946e-05, "loss": 0.0065, "step": 142600 }, { "epoch": 16.829872581406324, "grad_norm": 0.05880877375602722, "learning_rate": 7.476906073130571e-05, "loss": 0.005, "step": 142650 }, { "epoch": 16.83577159037282, "grad_norm": 0.18038176000118256, "learning_rate": 7.475292240232763e-05, "loss": 0.0061, "step": 142700 }, { "epoch": 16.84167059933931, "grad_norm": 0.07216218113899231, "learning_rate": 7.473678065673275e-05, "loss": 0.0054, "step": 142750 }, { "epoch": 16.847569608305804, "grad_norm": 0.5907226204872131, "learning_rate": 7.47206354967491e-05, "loss": 0.0057, "step": 142800 }, { "epoch": 16.8534686172723, "grad_norm": 0.2881943881511688, "learning_rate": 7.470448692460516e-05, "loss": 0.0057, "step": 142850 }, { "epoch": 16.859367626238793, "grad_norm": 0.04299014061689377, "learning_rate": 7.468833494252992e-05, "loss": 0.0046, "step": 142900 }, { "epoch": 16.865266635205284, "grad_norm": 0.2185756266117096, "learning_rate": 7.467217955275281e-05, "loss": 0.0054, "step": 142950 }, { "epoch": 16.87116564417178, "grad_norm": 0.2034912407398224, "learning_rate": 7.465602075750373e-05, "loss": 0.0045, "step": 143000 }, { "epoch": 16.87116564417178, "eval_cer": 0.08777969018932874, "eval_loss": 0.0018847313476726413, "eval_runtime": 2.0335, "eval_samples_per_second": 49.176, "eval_steps_per_second": 1.967, "eval_wer": 0.27, "step": 143000 }, { "epoch": 16.877064653138273, "grad_norm": 0.1592949628829956, "learning_rate": 7.463985855901307e-05, "loss": 0.0055, "step": 143050 }, { "epoch": 16.882963662104768, "grad_norm": 0.011424368247389793, "learning_rate": 7.462369295951169e-05, "loss": 0.0048, "step": 143100 }, { "epoch": 16.88886267107126, "grad_norm": 0.0009498211438767612, "learning_rate": 7.46075239612309e-05, "loss": 0.0044, "step": 143150 }, { "epoch": 16.894761680037753, "grad_norm": 0.08022716641426086, "learning_rate": 7.459135156640247e-05, "loss": 0.0041, "step": 143200 }, { "epoch": 16.900660689004248, "grad_norm": 0.2781617343425751, "learning_rate": 7.457517577725867e-05, "loss": 0.0053, "step": 143250 }, { "epoch": 16.906559697970742, "grad_norm": 0.2804657220840454, "learning_rate": 7.455899659603223e-05, "loss": 0.0046, "step": 143300 }, { "epoch": 16.912458706937233, "grad_norm": 0.22138501703739166, "learning_rate": 7.454281402495635e-05, "loss": 0.0051, "step": 143350 }, { "epoch": 16.918357715903728, "grad_norm": 0.043777015060186386, "learning_rate": 7.452662806626468e-05, "loss": 0.0049, "step": 143400 }, { "epoch": 16.924256724870222, "grad_norm": 0.07756868749856949, "learning_rate": 7.451043872219135e-05, "loss": 0.0048, "step": 143450 }, { "epoch": 16.930155733836717, "grad_norm": 0.02824394218623638, "learning_rate": 7.449424599497094e-05, "loss": 0.0049, "step": 143500 }, { "epoch": 16.936054742803208, "grad_norm": 0.04840002581477165, "learning_rate": 7.447804988683856e-05, "loss": 0.005, "step": 143550 }, { "epoch": 16.941953751769702, "grad_norm": 0.052434664219617844, "learning_rate": 7.446185040002968e-05, "loss": 0.0047, "step": 143600 }, { "epoch": 16.947852760736197, "grad_norm": 0.044635191559791565, "learning_rate": 7.444564753678034e-05, "loss": 0.0051, "step": 143650 }, { "epoch": 16.95375176970269, "grad_norm": 0.09914981573820114, "learning_rate": 7.442944129932698e-05, "loss": 0.004, "step": 143700 }, { "epoch": 16.959650778669182, "grad_norm": 0.07518811523914337, "learning_rate": 7.441323168990655e-05, "loss": 0.0059, "step": 143750 }, { "epoch": 16.965549787635677, "grad_norm": 0.012245218269526958, "learning_rate": 7.439701871075641e-05, "loss": 0.0042, "step": 143800 }, { "epoch": 16.97144879660217, "grad_norm": 0.16174668073654175, "learning_rate": 7.438080236411447e-05, "loss": 0.0055, "step": 143850 }, { "epoch": 16.977347805568666, "grad_norm": 0.1815369725227356, "learning_rate": 7.436458265221899e-05, "loss": 0.006, "step": 143900 }, { "epoch": 16.983246814535157, "grad_norm": 0.10689488053321838, "learning_rate": 7.434835957730879e-05, "loss": 0.0057, "step": 143950 }, { "epoch": 16.98914582350165, "grad_norm": 0.09496395289897919, "learning_rate": 7.433213314162313e-05, "loss": 0.0044, "step": 144000 }, { "epoch": 16.98914582350165, "eval_cer": 0.08519793459552495, "eval_loss": 0.0012304700212553144, "eval_runtime": 2.1084, "eval_samples_per_second": 47.428, "eval_steps_per_second": 1.897, "eval_wer": 0.26, "step": 144000 }, { "epoch": 16.995044832468146, "grad_norm": 0.19608238339424133, "learning_rate": 7.431590334740172e-05, "loss": 0.0052, "step": 144050 }, { "epoch": 17.00094384143464, "grad_norm": 0.06745205074548721, "learning_rate": 7.429967019688473e-05, "loss": 0.0051, "step": 144100 }, { "epoch": 17.00684285040113, "grad_norm": 0.20380981266498566, "learning_rate": 7.428343369231281e-05, "loss": 0.0043, "step": 144150 }, { "epoch": 17.012741859367626, "grad_norm": 0.16413453221321106, "learning_rate": 7.426719383592706e-05, "loss": 0.0052, "step": 144200 }, { "epoch": 17.01864086833412, "grad_norm": 0.05552702397108078, "learning_rate": 7.425095062996905e-05, "loss": 0.003, "step": 144250 }, { "epoch": 17.024539877300615, "grad_norm": 0.17132887244224548, "learning_rate": 7.423470407668081e-05, "loss": 0.0036, "step": 144300 }, { "epoch": 17.030438886267106, "grad_norm": 0.016759466379880905, "learning_rate": 7.421845417830484e-05, "loss": 0.0042, "step": 144350 }, { "epoch": 17.0363378952336, "grad_norm": 0.39178362488746643, "learning_rate": 7.42022009370841e-05, "loss": 0.004, "step": 144400 }, { "epoch": 17.042236904200095, "grad_norm": 0.0961684063076973, "learning_rate": 7.4185944355262e-05, "loss": 0.0055, "step": 144450 }, { "epoch": 17.04813591316659, "grad_norm": 0.33597254753112793, "learning_rate": 7.41696844350824e-05, "loss": 0.0053, "step": 144500 }, { "epoch": 17.05403492213308, "grad_norm": 0.023109816014766693, "learning_rate": 7.415342117878965e-05, "loss": 0.0031, "step": 144550 }, { "epoch": 17.059933931099575, "grad_norm": 0.018647747114300728, "learning_rate": 7.413715458862856e-05, "loss": 0.0047, "step": 144600 }, { "epoch": 17.06583294006607, "grad_norm": 0.0755615308880806, "learning_rate": 7.412088466684435e-05, "loss": 0.0044, "step": 144650 }, { "epoch": 17.07173194903256, "grad_norm": 0.09746792167425156, "learning_rate": 7.41046114156828e-05, "loss": 0.0058, "step": 144700 }, { "epoch": 17.077630957999055, "grad_norm": 0.08234965801239014, "learning_rate": 7.408833483739003e-05, "loss": 0.0038, "step": 144750 }, { "epoch": 17.08352996696555, "grad_norm": 0.015599250793457031, "learning_rate": 7.407205493421271e-05, "loss": 0.0042, "step": 144800 }, { "epoch": 17.089428975932044, "grad_norm": 0.1170579046010971, "learning_rate": 7.405577170839793e-05, "loss": 0.0044, "step": 144850 }, { "epoch": 17.095327984898535, "grad_norm": 0.08128388971090317, "learning_rate": 7.403948516219322e-05, "loss": 0.0053, "step": 144900 }, { "epoch": 17.10122699386503, "grad_norm": 0.12182468175888062, "learning_rate": 7.402319529784664e-05, "loss": 0.0038, "step": 144950 }, { "epoch": 17.107126002831524, "grad_norm": 0.15995381772518158, "learning_rate": 7.400690211760662e-05, "loss": 0.0058, "step": 145000 }, { "epoch": 17.107126002831524, "eval_cer": 0.08519793459552495, "eval_loss": 0.001610824023373425, "eval_runtime": 2.1258, "eval_samples_per_second": 47.041, "eval_steps_per_second": 1.882, "eval_wer": 0.26, "step": 145000 }, { "epoch": 17.11302501179802, "grad_norm": 0.02119554579257965, "learning_rate": 7.399060562372211e-05, "loss": 0.0043, "step": 145050 }, { "epoch": 17.11892402076451, "grad_norm": 0.00903379824012518, "learning_rate": 7.397430581844246e-05, "loss": 0.004, "step": 145100 }, { "epoch": 17.124823029731004, "grad_norm": 0.019479384645819664, "learning_rate": 7.395800270401756e-05, "loss": 0.0049, "step": 145150 }, { "epoch": 17.1307220386975, "grad_norm": 0.12782873213291168, "learning_rate": 7.394169628269772e-05, "loss": 0.0039, "step": 145200 }, { "epoch": 17.136621047663994, "grad_norm": 0.05778196454048157, "learning_rate": 7.392538655673362e-05, "loss": 0.005, "step": 145250 }, { "epoch": 17.142520056630485, "grad_norm": 0.08247622102499008, "learning_rate": 7.390907352837654e-05, "loss": 0.0044, "step": 145300 }, { "epoch": 17.14841906559698, "grad_norm": 0.5727740526199341, "learning_rate": 7.389275719987813e-05, "loss": 0.0041, "step": 145350 }, { "epoch": 17.154318074563474, "grad_norm": 0.21593047678470612, "learning_rate": 7.387643757349053e-05, "loss": 0.0039, "step": 145400 }, { "epoch": 17.160217083529968, "grad_norm": 0.26206356287002563, "learning_rate": 7.386011465146627e-05, "loss": 0.0049, "step": 145450 }, { "epoch": 17.16611609249646, "grad_norm": 0.3058561682701111, "learning_rate": 7.384378843605843e-05, "loss": 0.0054, "step": 145500 }, { "epoch": 17.172015101462954, "grad_norm": 0.011125176213681698, "learning_rate": 7.382745892952047e-05, "loss": 0.0045, "step": 145550 }, { "epoch": 17.177914110429448, "grad_norm": 0.24427703022956848, "learning_rate": 7.381112613410636e-05, "loss": 0.0058, "step": 145600 }, { "epoch": 17.183813119395943, "grad_norm": 0.13147133588790894, "learning_rate": 7.379479005207047e-05, "loss": 0.0042, "step": 145650 }, { "epoch": 17.189712128362434, "grad_norm": 0.28343290090560913, "learning_rate": 7.377845068566767e-05, "loss": 0.0044, "step": 145700 }, { "epoch": 17.195611137328928, "grad_norm": 0.010785987600684166, "learning_rate": 7.376210803715324e-05, "loss": 0.0063, "step": 145750 }, { "epoch": 17.201510146295423, "grad_norm": 0.13539613783359528, "learning_rate": 7.374576210878297e-05, "loss": 0.0049, "step": 145800 }, { "epoch": 17.207409155261917, "grad_norm": 0.0037464722990989685, "learning_rate": 7.372941290281304e-05, "loss": 0.0048, "step": 145850 }, { "epoch": 17.21330816422841, "grad_norm": 0.22179453074932098, "learning_rate": 7.371306042150012e-05, "loss": 0.006, "step": 145900 }, { "epoch": 17.219207173194903, "grad_norm": 0.04720484837889671, "learning_rate": 7.369670466710133e-05, "loss": 0.0046, "step": 145950 }, { "epoch": 17.225106182161397, "grad_norm": 0.039440929889678955, "learning_rate": 7.368034564187425e-05, "loss": 0.005, "step": 146000 }, { "epoch": 17.225106182161397, "eval_cer": 0.08777969018932874, "eval_loss": 0.002401795005425811, "eval_runtime": 2.0611, "eval_samples_per_second": 48.518, "eval_steps_per_second": 1.941, "eval_wer": 0.27, "step": 146000 }, { "epoch": 17.231005191127892, "grad_norm": 0.025295956060290337, "learning_rate": 7.366398334807686e-05, "loss": 0.0049, "step": 146050 }, { "epoch": 17.236904200094383, "grad_norm": 0.22746501863002777, "learning_rate": 7.364761778796766e-05, "loss": 0.0047, "step": 146100 }, { "epoch": 17.242803209060877, "grad_norm": 0.04726707935333252, "learning_rate": 7.363124896380556e-05, "loss": 0.004, "step": 146150 }, { "epoch": 17.248702218027372, "grad_norm": 0.009208421222865582, "learning_rate": 7.361487687784989e-05, "loss": 0.0043, "step": 146200 }, { "epoch": 17.254601226993866, "grad_norm": 0.024523859843611717, "learning_rate": 7.359850153236055e-05, "loss": 0.0052, "step": 146250 }, { "epoch": 17.260500235960357, "grad_norm": 0.013366125524044037, "learning_rate": 7.358212292959774e-05, "loss": 0.0059, "step": 146300 }, { "epoch": 17.266399244926852, "grad_norm": 0.09548529237508774, "learning_rate": 7.356574107182221e-05, "loss": 0.005, "step": 146350 }, { "epoch": 17.272298253893346, "grad_norm": 0.0022281960118561983, "learning_rate": 7.354935596129513e-05, "loss": 0.0039, "step": 146400 }, { "epoch": 17.27819726285984, "grad_norm": 0.1159558892250061, "learning_rate": 7.353296760027811e-05, "loss": 0.0048, "step": 146450 }, { "epoch": 17.284096271826332, "grad_norm": 0.19034969806671143, "learning_rate": 7.351657599103321e-05, "loss": 0.0044, "step": 146500 }, { "epoch": 17.289995280792827, "grad_norm": 0.05008554458618164, "learning_rate": 7.350018113582296e-05, "loss": 0.0033, "step": 146550 }, { "epoch": 17.29589428975932, "grad_norm": 0.050845302641391754, "learning_rate": 7.34837830369103e-05, "loss": 0.0033, "step": 146600 }, { "epoch": 17.301793298725816, "grad_norm": 0.05537547916173935, "learning_rate": 7.346738169655867e-05, "loss": 0.003, "step": 146650 }, { "epoch": 17.307692307692307, "grad_norm": 0.1138148307800293, "learning_rate": 7.345097711703191e-05, "loss": 0.0043, "step": 146700 }, { "epoch": 17.3135913166588, "grad_norm": 0.061739057302474976, "learning_rate": 7.343456930059433e-05, "loss": 0.0046, "step": 146750 }, { "epoch": 17.319490325625296, "grad_norm": 0.062265608459711075, "learning_rate": 7.341815824951066e-05, "loss": 0.0047, "step": 146800 }, { "epoch": 17.32538933459179, "grad_norm": 0.034400470554828644, "learning_rate": 7.340174396604613e-05, "loss": 0.0053, "step": 146850 }, { "epoch": 17.33128834355828, "grad_norm": 0.13764719665050507, "learning_rate": 7.338532645246637e-05, "loss": 0.0055, "step": 146900 }, { "epoch": 17.337187352524776, "grad_norm": 0.06719473749399185, "learning_rate": 7.336890571103747e-05, "loss": 0.0053, "step": 146950 }, { "epoch": 17.34308636149127, "grad_norm": 0.0894601047039032, "learning_rate": 7.335248174402598e-05, "loss": 0.0045, "step": 147000 }, { "epoch": 17.34308636149127, "eval_cer": 0.08605851979345955, "eval_loss": 0.0010589836165308952, "eval_runtime": 2.08, "eval_samples_per_second": 48.078, "eval_steps_per_second": 1.923, "eval_wer": 0.27, "step": 147000 }, { "epoch": 17.348985370457765, "grad_norm": 0.0233826395124197, "learning_rate": 7.333605455369886e-05, "loss": 0.0048, "step": 147050 }, { "epoch": 17.354884379424256, "grad_norm": 0.09670780599117279, "learning_rate": 7.331962414232354e-05, "loss": 0.0045, "step": 147100 }, { "epoch": 17.36078338839075, "grad_norm": 0.038072265684604645, "learning_rate": 7.330319051216789e-05, "loss": 0.0041, "step": 147150 }, { "epoch": 17.366682397357245, "grad_norm": 0.13644151389598846, "learning_rate": 7.328675366550024e-05, "loss": 0.0048, "step": 147200 }, { "epoch": 17.37258140632374, "grad_norm": 0.21361996233463287, "learning_rate": 7.327031360458934e-05, "loss": 0.0047, "step": 147250 }, { "epoch": 17.37848041529023, "grad_norm": 0.06270342320203781, "learning_rate": 7.325387033170438e-05, "loss": 0.0045, "step": 147300 }, { "epoch": 17.384379424256725, "grad_norm": 0.10752038657665253, "learning_rate": 7.323742384911502e-05, "loss": 0.0039, "step": 147350 }, { "epoch": 17.39027843322322, "grad_norm": 0.13393832743167877, "learning_rate": 7.322097415909133e-05, "loss": 0.0045, "step": 147400 }, { "epoch": 17.39617744218971, "grad_norm": 0.18975362181663513, "learning_rate": 7.320452126390388e-05, "loss": 0.004, "step": 147450 }, { "epoch": 17.402076451156205, "grad_norm": 0.01432402990758419, "learning_rate": 7.318806516582359e-05, "loss": 0.0038, "step": 147500 }, { "epoch": 17.4079754601227, "grad_norm": 0.16007283329963684, "learning_rate": 7.317160586712193e-05, "loss": 0.0046, "step": 147550 }, { "epoch": 17.413874469089194, "grad_norm": 0.10418226569890976, "learning_rate": 7.315514337007071e-05, "loss": 0.004, "step": 147600 }, { "epoch": 17.419773478055685, "grad_norm": 0.2867427468299866, "learning_rate": 7.313867767694226e-05, "loss": 0.0049, "step": 147650 }, { "epoch": 17.42567248702218, "grad_norm": 0.079237200319767, "learning_rate": 7.312220879000933e-05, "loss": 0.004, "step": 147700 }, { "epoch": 17.431571495988674, "grad_norm": 0.10707419365644455, "learning_rate": 7.310573671154508e-05, "loss": 0.0038, "step": 147750 }, { "epoch": 17.43747050495517, "grad_norm": 0.1392238885164261, "learning_rate": 7.308926144382312e-05, "loss": 0.0043, "step": 147800 }, { "epoch": 17.44336951392166, "grad_norm": 0.13999728858470917, "learning_rate": 7.307278298911753e-05, "loss": 0.0048, "step": 147850 }, { "epoch": 17.449268522888154, "grad_norm": 0.23864273726940155, "learning_rate": 7.305630134970281e-05, "loss": 0.0046, "step": 147900 }, { "epoch": 17.45516753185465, "grad_norm": 0.03534487262368202, "learning_rate": 7.30398165278539e-05, "loss": 0.0046, "step": 147950 }, { "epoch": 17.461066540821143, "grad_norm": 0.0926760658621788, "learning_rate": 7.302332852584618e-05, "loss": 0.0049, "step": 148000 }, { "epoch": 17.461066540821143, "eval_cer": 0.08519793459552495, "eval_loss": 0.0016082583460956812, "eval_runtime": 2.0358, "eval_samples_per_second": 49.12, "eval_steps_per_second": 1.965, "eval_wer": 0.26, "step": 148000 }, { "epoch": 17.466965549787634, "grad_norm": 0.19223430752754211, "learning_rate": 7.300683734595548e-05, "loss": 0.0037, "step": 148050 }, { "epoch": 17.47286455875413, "grad_norm": 0.0952589362859726, "learning_rate": 7.299034299045805e-05, "loss": 0.0045, "step": 148100 }, { "epoch": 17.478763567720623, "grad_norm": 0.0613909512758255, "learning_rate": 7.297384546163056e-05, "loss": 0.004, "step": 148150 }, { "epoch": 17.484662576687118, "grad_norm": 0.08443333953619003, "learning_rate": 7.29573447617502e-05, "loss": 0.0059, "step": 148200 }, { "epoch": 17.49056158565361, "grad_norm": 0.03692301735281944, "learning_rate": 7.294084089309448e-05, "loss": 0.0038, "step": 148250 }, { "epoch": 17.496460594620103, "grad_norm": 0.0349004752933979, "learning_rate": 7.292433385794146e-05, "loss": 0.0049, "step": 148300 }, { "epoch": 17.502359603586598, "grad_norm": 0.01969047263264656, "learning_rate": 7.290782365856957e-05, "loss": 0.0054, "step": 148350 }, { "epoch": 17.508258612553092, "grad_norm": 0.14299005270004272, "learning_rate": 7.289131029725769e-05, "loss": 0.0043, "step": 148400 }, { "epoch": 17.514157621519583, "grad_norm": 0.04527019336819649, "learning_rate": 7.287479377628512e-05, "loss": 0.0048, "step": 148450 }, { "epoch": 17.520056630486078, "grad_norm": 0.03060811012983322, "learning_rate": 7.285827409793166e-05, "loss": 0.0056, "step": 148500 }, { "epoch": 17.525955639452572, "grad_norm": 0.04641721397638321, "learning_rate": 7.284175126447747e-05, "loss": 0.0041, "step": 148550 }, { "epoch": 17.531854648419067, "grad_norm": 0.08698023855686188, "learning_rate": 7.282522527820319e-05, "loss": 0.004, "step": 148600 }, { "epoch": 17.537753657385558, "grad_norm": 0.17636629939079285, "learning_rate": 7.280869614138988e-05, "loss": 0.0052, "step": 148650 }, { "epoch": 17.543652666352052, "grad_norm": 0.15357205271720886, "learning_rate": 7.279216385631903e-05, "loss": 0.0059, "step": 148700 }, { "epoch": 17.549551675318547, "grad_norm": 0.0905831828713417, "learning_rate": 7.277562842527259e-05, "loss": 0.0056, "step": 148750 }, { "epoch": 17.55545068428504, "grad_norm": 0.08029526472091675, "learning_rate": 7.27590898505329e-05, "loss": 0.0053, "step": 148800 }, { "epoch": 17.561349693251532, "grad_norm": 0.09220881760120392, "learning_rate": 7.274254813438277e-05, "loss": 0.0047, "step": 148850 }, { "epoch": 17.567248702218027, "grad_norm": 0.08111276477575302, "learning_rate": 7.272600327910544e-05, "loss": 0.0059, "step": 148900 }, { "epoch": 17.57314771118452, "grad_norm": 0.022017614915966988, "learning_rate": 7.270945528698458e-05, "loss": 0.0048, "step": 148950 }, { "epoch": 17.579046720151016, "grad_norm": 0.04284998029470444, "learning_rate": 7.26929041603043e-05, "loss": 0.0042, "step": 149000 }, { "epoch": 17.579046720151016, "eval_cer": 0.08605851979345955, "eval_loss": 0.0011433407198637724, "eval_runtime": 2.062, "eval_samples_per_second": 48.497, "eval_steps_per_second": 1.94, "eval_wer": 0.27, "step": 149000 }, { "epoch": 17.584945729117507, "grad_norm": 0.1816432923078537, "learning_rate": 7.26763499013491e-05, "loss": 0.0038, "step": 149050 }, { "epoch": 17.590844738084, "grad_norm": 0.35153329372406006, "learning_rate": 7.265979251240397e-05, "loss": 0.0051, "step": 149100 }, { "epoch": 17.596743747050496, "grad_norm": 0.03377506509423256, "learning_rate": 7.264323199575427e-05, "loss": 0.0061, "step": 149150 }, { "epoch": 17.60264275601699, "grad_norm": 0.27077504992485046, "learning_rate": 7.26266683536859e-05, "loss": 0.0057, "step": 149200 }, { "epoch": 17.60854176498348, "grad_norm": 0.12268324196338654, "learning_rate": 7.261010158848504e-05, "loss": 0.0052, "step": 149250 }, { "epoch": 17.614440773949976, "grad_norm": 0.22039371728897095, "learning_rate": 7.259353170243844e-05, "loss": 0.0049, "step": 149300 }, { "epoch": 17.62033978291647, "grad_norm": 0.19621260464191437, "learning_rate": 7.257695869783321e-05, "loss": 0.0055, "step": 149350 }, { "epoch": 17.626238791882965, "grad_norm": 0.037676963955163956, "learning_rate": 7.256038257695687e-05, "loss": 0.0041, "step": 149400 }, { "epoch": 17.632137800849456, "grad_norm": 0.11943408846855164, "learning_rate": 7.254380334209743e-05, "loss": 0.0048, "step": 149450 }, { "epoch": 17.63803680981595, "grad_norm": 0.05667112022638321, "learning_rate": 7.252722099554331e-05, "loss": 0.0058, "step": 149500 }, { "epoch": 17.643935818782445, "grad_norm": 0.10420941561460495, "learning_rate": 7.251063553958333e-05, "loss": 0.0059, "step": 149550 }, { "epoch": 17.64983482774894, "grad_norm": 0.17459934949874878, "learning_rate": 7.249404697650678e-05, "loss": 0.0045, "step": 149600 }, { "epoch": 17.65573383671543, "grad_norm": 0.14712458848953247, "learning_rate": 7.247745530860335e-05, "loss": 0.0048, "step": 149650 }, { "epoch": 17.661632845681925, "grad_norm": 0.13290660083293915, "learning_rate": 7.246086053816316e-05, "loss": 0.0045, "step": 149700 }, { "epoch": 17.66753185464842, "grad_norm": 0.14453300833702087, "learning_rate": 7.244426266747679e-05, "loss": 0.0043, "step": 149750 }, { "epoch": 17.67343086361491, "grad_norm": 0.26743727922439575, "learning_rate": 7.242766169883519e-05, "loss": 0.005, "step": 149800 }, { "epoch": 17.679329872581405, "grad_norm": 0.2620093822479248, "learning_rate": 7.24110576345298e-05, "loss": 0.0049, "step": 149850 }, { "epoch": 17.6852288815479, "grad_norm": 0.21990728378295898, "learning_rate": 7.239445047685245e-05, "loss": 0.0049, "step": 149900 }, { "epoch": 17.691127890514394, "grad_norm": 0.05750241130590439, "learning_rate": 7.237784022809542e-05, "loss": 0.0058, "step": 149950 }, { "epoch": 17.69702689948089, "grad_norm": 0.036232780665159225, "learning_rate": 7.236122689055138e-05, "loss": 0.0041, "step": 150000 }, { "epoch": 17.69702689948089, "eval_cer": 0.08347676419965576, "eval_loss": 0.0003997279272880405, "eval_runtime": 2.0173, "eval_samples_per_second": 49.572, "eval_steps_per_second": 1.983, "eval_wer": 0.26, "step": 150000 }, { "epoch": 17.70292590844738, "grad_norm": 0.13674405217170715, "learning_rate": 7.234461046651346e-05, "loss": 0.0039, "step": 150050 }, { "epoch": 17.708824917413875, "grad_norm": 0.08136676996946335, "learning_rate": 7.232799095827521e-05, "loss": 0.0051, "step": 150100 }, { "epoch": 17.71472392638037, "grad_norm": 0.3870909512042999, "learning_rate": 7.231136836813059e-05, "loss": 0.0064, "step": 150150 }, { "epoch": 17.72062293534686, "grad_norm": 0.2580162584781647, "learning_rate": 7.2294742698374e-05, "loss": 0.006, "step": 150200 }, { "epoch": 17.726521944313355, "grad_norm": 0.060439810156822205, "learning_rate": 7.227811395130029e-05, "loss": 0.0055, "step": 150250 }, { "epoch": 17.73242095327985, "grad_norm": 0.0985172837972641, "learning_rate": 7.226148212920467e-05, "loss": 0.0046, "step": 150300 }, { "epoch": 17.738319962246344, "grad_norm": 0.0558052584528923, "learning_rate": 7.224484723438282e-05, "loss": 0.0031, "step": 150350 }, { "epoch": 17.744218971212835, "grad_norm": 0.1879604160785675, "learning_rate": 7.222820926913085e-05, "loss": 0.0046, "step": 150400 }, { "epoch": 17.75011798017933, "grad_norm": 0.04470084607601166, "learning_rate": 7.221156823574526e-05, "loss": 0.0042, "step": 150450 }, { "epoch": 17.756016989145824, "grad_norm": 0.23901961743831635, "learning_rate": 7.2194924136523e-05, "loss": 0.0039, "step": 150500 }, { "epoch": 17.76191599811232, "grad_norm": 0.2842200994491577, "learning_rate": 7.217827697376143e-05, "loss": 0.0047, "step": 150550 }, { "epoch": 17.76781500707881, "grad_norm": 0.028955386951565742, "learning_rate": 7.216162674975834e-05, "loss": 0.0063, "step": 150600 }, { "epoch": 17.773714016045304, "grad_norm": 0.1560254842042923, "learning_rate": 7.214497346681195e-05, "loss": 0.0047, "step": 150650 }, { "epoch": 17.7796130250118, "grad_norm": 0.25443726778030396, "learning_rate": 7.212831712722088e-05, "loss": 0.0073, "step": 150700 }, { "epoch": 17.785512033978293, "grad_norm": 0.11446991562843323, "learning_rate": 7.211165773328421e-05, "loss": 0.0048, "step": 150750 }, { "epoch": 17.791411042944784, "grad_norm": 0.10281478613615036, "learning_rate": 7.209499528730138e-05, "loss": 0.0045, "step": 150800 }, { "epoch": 17.79731005191128, "grad_norm": 0.08101646602153778, "learning_rate": 7.207832979157232e-05, "loss": 0.0047, "step": 150850 }, { "epoch": 17.803209060877773, "grad_norm": 0.0598604790866375, "learning_rate": 7.206166124839732e-05, "loss": 0.0051, "step": 150900 }, { "epoch": 17.809108069844267, "grad_norm": 0.056880563497543335, "learning_rate": 7.204498966007713e-05, "loss": 0.0044, "step": 150950 }, { "epoch": 17.81500707881076, "grad_norm": 0.19191092252731323, "learning_rate": 7.202831502891295e-05, "loss": 0.0045, "step": 151000 }, { "epoch": 17.81500707881076, "eval_cer": 0.08777969018932874, "eval_loss": 0.0030374054331332445, "eval_runtime": 2.044, "eval_samples_per_second": 48.924, "eval_steps_per_second": 1.957, "eval_wer": 0.27, "step": 151000 }, { "epoch": 17.820906087777253, "grad_norm": 0.027910517528653145, "learning_rate": 7.201163735720629e-05, "loss": 0.0053, "step": 151050 }, { "epoch": 17.826805096743747, "grad_norm": 0.006311438977718353, "learning_rate": 7.199495664725919e-05, "loss": 0.0059, "step": 151100 }, { "epoch": 17.832704105710242, "grad_norm": 0.9574313163757324, "learning_rate": 7.197827290137406e-05, "loss": 0.0055, "step": 151150 }, { "epoch": 17.838603114676733, "grad_norm": 0.044750042259693146, "learning_rate": 7.196158612185375e-05, "loss": 0.0046, "step": 151200 }, { "epoch": 17.844502123643228, "grad_norm": 0.1460782289505005, "learning_rate": 7.194489631100151e-05, "loss": 0.0047, "step": 151250 }, { "epoch": 17.850401132609722, "grad_norm": 0.04933222010731697, "learning_rate": 7.192820347112101e-05, "loss": 0.006, "step": 151300 }, { "epoch": 17.856300141576217, "grad_norm": 0.02604830637574196, "learning_rate": 7.191150760451633e-05, "loss": 0.0054, "step": 151350 }, { "epoch": 17.862199150542708, "grad_norm": 0.07324841618537903, "learning_rate": 7.1894808713492e-05, "loss": 0.0062, "step": 151400 }, { "epoch": 17.868098159509202, "grad_norm": 0.072272390127182, "learning_rate": 7.187810680035296e-05, "loss": 0.0057, "step": 151450 }, { "epoch": 17.873997168475697, "grad_norm": 0.18091344833374023, "learning_rate": 7.186140186740453e-05, "loss": 0.005, "step": 151500 }, { "epoch": 17.87989617744219, "grad_norm": 0.11379492282867432, "learning_rate": 7.184469391695249e-05, "loss": 0.0044, "step": 151550 }, { "epoch": 17.885795186408682, "grad_norm": 0.1942724734544754, "learning_rate": 7.1827982951303e-05, "loss": 0.0053, "step": 151600 }, { "epoch": 17.891694195375177, "grad_norm": 0.13000255823135376, "learning_rate": 7.181126897276266e-05, "loss": 0.0047, "step": 151650 }, { "epoch": 17.89759320434167, "grad_norm": 0.12506155669689178, "learning_rate": 7.179455198363851e-05, "loss": 0.005, "step": 151700 }, { "epoch": 17.903492213308166, "grad_norm": 0.029564812779426575, "learning_rate": 7.177783198623792e-05, "loss": 0.0052, "step": 151750 }, { "epoch": 17.909391222274657, "grad_norm": 0.28783175349235535, "learning_rate": 7.176110898286878e-05, "loss": 0.0048, "step": 151800 }, { "epoch": 17.91529023124115, "grad_norm": 0.08869367837905884, "learning_rate": 7.174438297583932e-05, "loss": 0.0045, "step": 151850 }, { "epoch": 17.921189240207646, "grad_norm": 0.5610207319259644, "learning_rate": 7.172765396745824e-05, "loss": 0.0067, "step": 151900 }, { "epoch": 17.92708824917414, "grad_norm": 0.17394211888313293, "learning_rate": 7.17109219600346e-05, "loss": 0.0058, "step": 151950 }, { "epoch": 17.93298725814063, "grad_norm": 0.14585036039352417, "learning_rate": 7.169418695587791e-05, "loss": 0.006, "step": 152000 }, { "epoch": 17.93298725814063, "eval_cer": 0.08777969018932874, "eval_loss": 0.002954871393740177, "eval_runtime": 2.0272, "eval_samples_per_second": 49.329, "eval_steps_per_second": 1.973, "eval_wer": 0.27, "step": 152000 }, { "epoch": 17.938886267107126, "grad_norm": 0.17380553483963013, "learning_rate": 7.167744895729808e-05, "loss": 0.0045, "step": 152050 }, { "epoch": 17.94478527607362, "grad_norm": 0.1838270127773285, "learning_rate": 7.166070796660544e-05, "loss": 0.0048, "step": 152100 }, { "epoch": 17.950684285040115, "grad_norm": 0.019355036318302155, "learning_rate": 7.164396398611071e-05, "loss": 0.0041, "step": 152150 }, { "epoch": 17.956583294006606, "grad_norm": 0.1934259682893753, "learning_rate": 7.162721701812505e-05, "loss": 0.0055, "step": 152200 }, { "epoch": 17.9624823029731, "grad_norm": 0.28172019124031067, "learning_rate": 7.161046706496005e-05, "loss": 0.0049, "step": 152250 }, { "epoch": 17.968381311939595, "grad_norm": 0.05997835472226143, "learning_rate": 7.159371412892767e-05, "loss": 0.0041, "step": 152300 }, { "epoch": 17.97428032090609, "grad_norm": 0.1097855493426323, "learning_rate": 7.157695821234029e-05, "loss": 0.0047, "step": 152350 }, { "epoch": 17.98017932987258, "grad_norm": 0.16779306530952454, "learning_rate": 7.156019931751072e-05, "loss": 0.0053, "step": 152400 }, { "epoch": 17.986078338839075, "grad_norm": 0.2587100565433502, "learning_rate": 7.154343744675215e-05, "loss": 0.0045, "step": 152450 }, { "epoch": 17.99197734780557, "grad_norm": 0.09785783290863037, "learning_rate": 7.152667260237823e-05, "loss": 0.006, "step": 152500 }, { "epoch": 17.99787635677206, "grad_norm": 0.07718683034181595, "learning_rate": 7.150990478670297e-05, "loss": 0.0051, "step": 152550 }, { "epoch": 18.003775365738555, "grad_norm": 0.03192280977964401, "learning_rate": 7.149313400204083e-05, "loss": 0.0042, "step": 152600 }, { "epoch": 18.00967437470505, "grad_norm": 0.24572046101093292, "learning_rate": 7.147636025070664e-05, "loss": 0.0039, "step": 152650 }, { "epoch": 18.015573383671544, "grad_norm": 0.16849708557128906, "learning_rate": 7.14595835350157e-05, "loss": 0.0042, "step": 152700 }, { "epoch": 18.021472392638035, "grad_norm": 0.09928102791309357, "learning_rate": 7.144280385728363e-05, "loss": 0.0042, "step": 152750 }, { "epoch": 18.02737140160453, "grad_norm": 0.014955568127334118, "learning_rate": 7.142602121982653e-05, "loss": 0.0045, "step": 152800 }, { "epoch": 18.033270410571024, "grad_norm": 0.035954058170318604, "learning_rate": 7.14092356249609e-05, "loss": 0.0039, "step": 152850 }, { "epoch": 18.03916941953752, "grad_norm": 0.14927220344543457, "learning_rate": 7.139244707500363e-05, "loss": 0.0033, "step": 152900 }, { "epoch": 18.04506842850401, "grad_norm": 0.07820387929677963, "learning_rate": 7.137565557227203e-05, "loss": 0.0045, "step": 152950 }, { "epoch": 18.050967437470504, "grad_norm": 0.10798590630292892, "learning_rate": 7.135886111908378e-05, "loss": 0.0047, "step": 153000 }, { "epoch": 18.050967437470504, "eval_cer": 0.08519793459552495, "eval_loss": 0.002810741774737835, "eval_runtime": 2.0954, "eval_samples_per_second": 47.724, "eval_steps_per_second": 1.909, "eval_wer": 0.26, "step": 153000 }, { "epoch": 18.056866446437, "grad_norm": 0.08684704452753067, "learning_rate": 7.134206371775705e-05, "loss": 0.0045, "step": 153050 }, { "epoch": 18.062765455403493, "grad_norm": 0.09531024843454361, "learning_rate": 7.132526337061031e-05, "loss": 0.0049, "step": 153100 }, { "epoch": 18.068664464369984, "grad_norm": 0.1539149135351181, "learning_rate": 7.130846007996252e-05, "loss": 0.0042, "step": 153150 }, { "epoch": 18.07456347333648, "grad_norm": 0.02973327971994877, "learning_rate": 7.129165384813303e-05, "loss": 0.0039, "step": 153200 }, { "epoch": 18.080462482302973, "grad_norm": 0.0478024035692215, "learning_rate": 7.127484467744157e-05, "loss": 0.0035, "step": 153250 }, { "epoch": 18.086361491269468, "grad_norm": 0.031201070174574852, "learning_rate": 7.125803257020829e-05, "loss": 0.0036, "step": 153300 }, { "epoch": 18.09226050023596, "grad_norm": 0.20343707501888275, "learning_rate": 7.124121752875375e-05, "loss": 0.0041, "step": 153350 }, { "epoch": 18.098159509202453, "grad_norm": 0.28208673000335693, "learning_rate": 7.122439955539889e-05, "loss": 0.0027, "step": 153400 }, { "epoch": 18.104058518168948, "grad_norm": 0.03931023180484772, "learning_rate": 7.120757865246511e-05, "loss": 0.0053, "step": 153450 }, { "epoch": 18.109957527135442, "grad_norm": 0.15768134593963623, "learning_rate": 7.119075482227414e-05, "loss": 0.0046, "step": 153500 }, { "epoch": 18.115856536101933, "grad_norm": 0.21996209025382996, "learning_rate": 7.11739280671482e-05, "loss": 0.0055, "step": 153550 }, { "epoch": 18.121755545068428, "grad_norm": 0.01570543274283409, "learning_rate": 7.115709838940983e-05, "loss": 0.0048, "step": 153600 }, { "epoch": 18.127654554034923, "grad_norm": 0.13356561958789825, "learning_rate": 7.114026579138203e-05, "loss": 0.0051, "step": 153650 }, { "epoch": 18.133553563001417, "grad_norm": 0.04469175264239311, "learning_rate": 7.112343027538818e-05, "loss": 0.0039, "step": 153700 }, { "epoch": 18.139452571967908, "grad_norm": 0.09785781055688858, "learning_rate": 7.110659184375206e-05, "loss": 0.0057, "step": 153750 }, { "epoch": 18.145351580934403, "grad_norm": 0.0204787440598011, "learning_rate": 7.108975049879784e-05, "loss": 0.0043, "step": 153800 }, { "epoch": 18.151250589900897, "grad_norm": 0.190158411860466, "learning_rate": 7.107290624285017e-05, "loss": 0.0046, "step": 153850 }, { "epoch": 18.15714959886739, "grad_norm": 0.0921434834599495, "learning_rate": 7.1056059078234e-05, "loss": 0.0049, "step": 153900 }, { "epoch": 18.163048607833883, "grad_norm": 0.017425863072276115, "learning_rate": 7.103920900727473e-05, "loss": 0.0048, "step": 153950 }, { "epoch": 18.168947616800377, "grad_norm": 0.17226779460906982, "learning_rate": 7.102235603229814e-05, "loss": 0.0048, "step": 154000 }, { "epoch": 18.168947616800377, "eval_cer": 0.08777969018932874, "eval_loss": 0.002383650979027152, "eval_runtime": 2.0342, "eval_samples_per_second": 49.159, "eval_steps_per_second": 1.966, "eval_wer": 0.27, "step": 154000 }, { "epoch": 18.17484662576687, "grad_norm": 0.017910413444042206, "learning_rate": 7.100550015563046e-05, "loss": 0.0035, "step": 154050 }, { "epoch": 18.180745634733366, "grad_norm": 0.34378668665885925, "learning_rate": 7.098864137959824e-05, "loss": 0.0039, "step": 154100 }, { "epoch": 18.186644643699857, "grad_norm": 0.14149627089500427, "learning_rate": 7.097177970652853e-05, "loss": 0.0052, "step": 154150 }, { "epoch": 18.19254365266635, "grad_norm": 0.059498559683561325, "learning_rate": 7.09549151387487e-05, "loss": 0.0039, "step": 154200 }, { "epoch": 18.198442661632846, "grad_norm": 0.22946512699127197, "learning_rate": 7.093804767858655e-05, "loss": 0.0045, "step": 154250 }, { "epoch": 18.20434167059934, "grad_norm": 0.07697410136461258, "learning_rate": 7.092117732837027e-05, "loss": 0.0043, "step": 154300 }, { "epoch": 18.210240679565832, "grad_norm": 0.18822580575942993, "learning_rate": 7.090430409042848e-05, "loss": 0.0053, "step": 154350 }, { "epoch": 18.216139688532326, "grad_norm": 0.1889175921678543, "learning_rate": 7.088742796709014e-05, "loss": 0.005, "step": 154400 }, { "epoch": 18.22203869749882, "grad_norm": 0.4113497734069824, "learning_rate": 7.087054896068464e-05, "loss": 0.0043, "step": 154450 }, { "epoch": 18.227937706465315, "grad_norm": 0.15382610261440277, "learning_rate": 7.085366707354181e-05, "loss": 0.0044, "step": 154500 }, { "epoch": 18.233836715431806, "grad_norm": 0.1074705570936203, "learning_rate": 7.08367823079918e-05, "loss": 0.0033, "step": 154550 }, { "epoch": 18.2397357243983, "grad_norm": 0.1986875683069229, "learning_rate": 7.08198946663652e-05, "loss": 0.0047, "step": 154600 }, { "epoch": 18.245634733364795, "grad_norm": 0.10342487692832947, "learning_rate": 7.080300415099302e-05, "loss": 0.0049, "step": 154650 }, { "epoch": 18.25153374233129, "grad_norm": 0.06357408314943314, "learning_rate": 7.078611076420659e-05, "loss": 0.0044, "step": 154700 }, { "epoch": 18.25743275129778, "grad_norm": 0.10012180358171463, "learning_rate": 7.076921450833771e-05, "loss": 0.0043, "step": 154750 }, { "epoch": 18.263331760264276, "grad_norm": 0.06721896678209305, "learning_rate": 7.075231538571856e-05, "loss": 0.0054, "step": 154800 }, { "epoch": 18.26923076923077, "grad_norm": 0.26361849904060364, "learning_rate": 7.073541339868171e-05, "loss": 0.0047, "step": 154850 }, { "epoch": 18.275129778197265, "grad_norm": 0.060763753950595856, "learning_rate": 7.071850854956008e-05, "loss": 0.0043, "step": 154900 }, { "epoch": 18.281028787163756, "grad_norm": 0.15521295368671417, "learning_rate": 7.070160084068707e-05, "loss": 0.0043, "step": 154950 }, { "epoch": 18.28692779613025, "grad_norm": 0.03524549677968025, "learning_rate": 7.068469027439642e-05, "loss": 0.0045, "step": 155000 }, { "epoch": 18.28692779613025, "eval_cer": 0.08347676419965576, "eval_loss": 0.0009192289435304701, "eval_runtime": 2.0565, "eval_samples_per_second": 48.626, "eval_steps_per_second": 1.945, "eval_wer": 0.26, "step": 155000 }, { "epoch": 18.292826805096745, "grad_norm": 0.1801002323627472, "learning_rate": 7.066777685302225e-05, "loss": 0.0042, "step": 155050 }, { "epoch": 18.29872581406324, "grad_norm": 0.10825762897729874, "learning_rate": 7.065086057889914e-05, "loss": 0.0056, "step": 155100 }, { "epoch": 18.30462482302973, "grad_norm": 0.17228196561336517, "learning_rate": 7.063394145436198e-05, "loss": 0.0043, "step": 155150 }, { "epoch": 18.310523831996225, "grad_norm": 0.11467855423688889, "learning_rate": 7.061701948174614e-05, "loss": 0.0035, "step": 155200 }, { "epoch": 18.31642284096272, "grad_norm": 0.025001784786581993, "learning_rate": 7.060009466338731e-05, "loss": 0.0048, "step": 155250 }, { "epoch": 18.32232184992921, "grad_norm": 0.04690094292163849, "learning_rate": 7.058316700162162e-05, "loss": 0.0043, "step": 155300 }, { "epoch": 18.328220858895705, "grad_norm": 0.234424889087677, "learning_rate": 7.056623649878558e-05, "loss": 0.0043, "step": 155350 }, { "epoch": 18.3341198678622, "grad_norm": 0.08574162423610687, "learning_rate": 7.054930315721606e-05, "loss": 0.0039, "step": 155400 }, { "epoch": 18.340018876828694, "grad_norm": 0.04023660719394684, "learning_rate": 7.053236697925037e-05, "loss": 0.0042, "step": 155450 }, { "epoch": 18.345917885795185, "grad_norm": 0.06631231307983398, "learning_rate": 7.051542796722618e-05, "loss": 0.0045, "step": 155500 }, { "epoch": 18.35181689476168, "grad_norm": 0.15779821574687958, "learning_rate": 7.049848612348157e-05, "loss": 0.0054, "step": 155550 }, { "epoch": 18.357715903728174, "grad_norm": 0.1939706653356552, "learning_rate": 7.048154145035502e-05, "loss": 0.0046, "step": 155600 }, { "epoch": 18.36361491269467, "grad_norm": 0.38293078541755676, "learning_rate": 7.046459395018535e-05, "loss": 0.0045, "step": 155650 }, { "epoch": 18.36951392166116, "grad_norm": 0.41091054677963257, "learning_rate": 7.04476436253118e-05, "loss": 0.0049, "step": 155700 }, { "epoch": 18.375412930627654, "grad_norm": 0.28078439831733704, "learning_rate": 7.043069047807404e-05, "loss": 0.0046, "step": 155750 }, { "epoch": 18.38131193959415, "grad_norm": 0.22999069094657898, "learning_rate": 7.041373451081207e-05, "loss": 0.0055, "step": 155800 }, { "epoch": 18.387210948560643, "grad_norm": 0.16928383708000183, "learning_rate": 7.03967757258663e-05, "loss": 0.005, "step": 155850 }, { "epoch": 18.393109957527134, "grad_norm": 0.11494036763906479, "learning_rate": 7.037981412557755e-05, "loss": 0.0035, "step": 155900 }, { "epoch": 18.39900896649363, "grad_norm": 0.3335033357143402, "learning_rate": 7.0362849712287e-05, "loss": 0.0054, "step": 155950 }, { "epoch": 18.404907975460123, "grad_norm": 0.03175634145736694, "learning_rate": 7.034588248833621e-05, "loss": 0.005, "step": 156000 }, { "epoch": 18.404907975460123, "eval_cer": 0.08347676419965576, "eval_loss": 0.0003250810841564089, "eval_runtime": 2.0431, "eval_samples_per_second": 48.944, "eval_steps_per_second": 1.958, "eval_wer": 0.26, "step": 156000 }, { "epoch": 18.410806984426618, "grad_norm": 0.05130722001194954, "learning_rate": 7.032891245606716e-05, "loss": 0.0045, "step": 156050 }, { "epoch": 18.41670599339311, "grad_norm": 0.048502836376428604, "learning_rate": 7.031193961782221e-05, "loss": 0.0046, "step": 156100 }, { "epoch": 18.422605002359603, "grad_norm": 0.2042209357023239, "learning_rate": 7.02949639759441e-05, "loss": 0.0043, "step": 156150 }, { "epoch": 18.428504011326098, "grad_norm": 0.015235889703035355, "learning_rate": 7.027798553277596e-05, "loss": 0.005, "step": 156200 }, { "epoch": 18.434403020292592, "grad_norm": 0.1604101061820984, "learning_rate": 7.026100429066129e-05, "loss": 0.0045, "step": 156250 }, { "epoch": 18.440302029259083, "grad_norm": 0.06913124024868011, "learning_rate": 7.024402025194402e-05, "loss": 0.0035, "step": 156300 }, { "epoch": 18.446201038225578, "grad_norm": 0.052657369524240494, "learning_rate": 7.022703341896837e-05, "loss": 0.003, "step": 156350 }, { "epoch": 18.452100047192072, "grad_norm": 0.019685382023453712, "learning_rate": 7.021004379407909e-05, "loss": 0.0044, "step": 156400 }, { "epoch": 18.457999056158567, "grad_norm": 0.20051570236682892, "learning_rate": 7.01930513796212e-05, "loss": 0.0038, "step": 156450 }, { "epoch": 18.463898065125058, "grad_norm": 0.10786265879869461, "learning_rate": 7.017605617794016e-05, "loss": 0.0041, "step": 156500 }, { "epoch": 18.469797074091552, "grad_norm": 0.10887929052114487, "learning_rate": 7.015905819138177e-05, "loss": 0.0047, "step": 156550 }, { "epoch": 18.475696083058047, "grad_norm": 0.06242843344807625, "learning_rate": 7.014205742229227e-05, "loss": 0.0046, "step": 156600 }, { "epoch": 18.48159509202454, "grad_norm": 0.3827958405017853, "learning_rate": 7.012505387301825e-05, "loss": 0.0052, "step": 156650 }, { "epoch": 18.487494100991032, "grad_norm": 0.003222447820007801, "learning_rate": 7.010804754590668e-05, "loss": 0.0052, "step": 156700 }, { "epoch": 18.493393109957527, "grad_norm": 0.2946621775627136, "learning_rate": 7.009103844330493e-05, "loss": 0.0041, "step": 156750 }, { "epoch": 18.49929211892402, "grad_norm": 0.03531253710389137, "learning_rate": 7.007402656756073e-05, "loss": 0.0037, "step": 156800 }, { "epoch": 18.505191127890516, "grad_norm": 0.1386864185333252, "learning_rate": 7.005701192102224e-05, "loss": 0.0033, "step": 156850 }, { "epoch": 18.511090136857007, "grad_norm": 0.006638247985392809, "learning_rate": 7.003999450603796e-05, "loss": 0.0047, "step": 156900 }, { "epoch": 18.5169891458235, "grad_norm": 0.07751500606536865, "learning_rate": 7.002297432495678e-05, "loss": 0.0047, "step": 156950 }, { "epoch": 18.522888154789996, "grad_norm": 0.16504019498825073, "learning_rate": 7.000595138012796e-05, "loss": 0.0043, "step": 157000 }, { "epoch": 18.522888154789996, "eval_cer": 0.08519793459552495, "eval_loss": 0.0015821164706721902, "eval_runtime": 2.116, "eval_samples_per_second": 47.259, "eval_steps_per_second": 1.89, "eval_wer": 0.26, "step": 157000 }, { "epoch": 18.52878716375649, "grad_norm": 0.2455109804868698, "learning_rate": 6.99889256739012e-05, "loss": 0.0057, "step": 157050 }, { "epoch": 18.53468617272298, "grad_norm": 0.11863785237073898, "learning_rate": 6.997189720862648e-05, "loss": 0.0061, "step": 157100 }, { "epoch": 18.540585181689476, "grad_norm": 0.13568297028541565, "learning_rate": 6.995486598665427e-05, "loss": 0.0046, "step": 157150 }, { "epoch": 18.54648419065597, "grad_norm": 0.08470079302787781, "learning_rate": 6.993783201033535e-05, "loss": 0.0043, "step": 157200 }, { "epoch": 18.552383199622465, "grad_norm": 0.10210379958152771, "learning_rate": 6.99207952820209e-05, "loss": 0.0045, "step": 157250 }, { "epoch": 18.558282208588956, "grad_norm": 0.05255935341119766, "learning_rate": 6.990375580406245e-05, "loss": 0.0041, "step": 157300 }, { "epoch": 18.56418121755545, "grad_norm": 0.286740779876709, "learning_rate": 6.988671357881201e-05, "loss": 0.0048, "step": 157350 }, { "epoch": 18.570080226521945, "grad_norm": 0.11357388645410538, "learning_rate": 6.986966860862181e-05, "loss": 0.0044, "step": 157400 }, { "epoch": 18.57597923548844, "grad_norm": 0.20073340833187103, "learning_rate": 6.985262089584463e-05, "loss": 0.0046, "step": 157450 }, { "epoch": 18.58187824445493, "grad_norm": 0.03299572318792343, "learning_rate": 6.98355704428335e-05, "loss": 0.0035, "step": 157500 }, { "epoch": 18.587777253421425, "grad_norm": 0.20131085813045502, "learning_rate": 6.981851725194188e-05, "loss": 0.0048, "step": 157550 }, { "epoch": 18.59367626238792, "grad_norm": 0.22256405651569366, "learning_rate": 6.98014613255236e-05, "loss": 0.0049, "step": 157600 }, { "epoch": 18.59957527135441, "grad_norm": 0.09988892078399658, "learning_rate": 6.97844026659329e-05, "loss": 0.0042, "step": 157650 }, { "epoch": 18.605474280320905, "grad_norm": 0.11096733808517456, "learning_rate": 6.976734127552432e-05, "loss": 0.0039, "step": 157700 }, { "epoch": 18.6113732892874, "grad_norm": 0.03629356622695923, "learning_rate": 6.975027715665284e-05, "loss": 0.0048, "step": 157750 }, { "epoch": 18.617272298253894, "grad_norm": 0.11976651102304459, "learning_rate": 6.973321031167383e-05, "loss": 0.0045, "step": 157800 }, { "epoch": 18.623171307220385, "grad_norm": 0.22111600637435913, "learning_rate": 6.971614074294297e-05, "loss": 0.0057, "step": 157850 }, { "epoch": 18.62907031618688, "grad_norm": 0.03273991122841835, "learning_rate": 6.969906845281636e-05, "loss": 0.0049, "step": 157900 }, { "epoch": 18.634969325153374, "grad_norm": 0.23423852026462555, "learning_rate": 6.968199344365048e-05, "loss": 0.0044, "step": 157950 }, { "epoch": 18.64086833411987, "grad_norm": 0.06046473607420921, "learning_rate": 6.966491571780217e-05, "loss": 0.0048, "step": 158000 }, { "epoch": 18.64086833411987, "eval_cer": 0.08519793459552495, "eval_loss": 0.0016216945368796587, "eval_runtime": 2.0133, "eval_samples_per_second": 49.671, "eval_steps_per_second": 1.987, "eval_wer": 0.26, "step": 158000 }, { "epoch": 18.64676734308636, "grad_norm": 0.369311660528183, "learning_rate": 6.964783527762863e-05, "loss": 0.0044, "step": 158050 }, { "epoch": 18.652666352052854, "grad_norm": 0.058943506330251694, "learning_rate": 6.963075212548747e-05, "loss": 0.0042, "step": 158100 }, { "epoch": 18.65856536101935, "grad_norm": 0.04402460530400276, "learning_rate": 6.961366626373666e-05, "loss": 0.0053, "step": 158150 }, { "epoch": 18.664464369985843, "grad_norm": 0.09251465648412704, "learning_rate": 6.959657769473453e-05, "loss": 0.0047, "step": 158200 }, { "epoch": 18.670363378952334, "grad_norm": 0.1738308072090149, "learning_rate": 6.957948642083981e-05, "loss": 0.0041, "step": 158250 }, { "epoch": 18.67626238791883, "grad_norm": 0.15942154824733734, "learning_rate": 6.956239244441159e-05, "loss": 0.0043, "step": 158300 }, { "epoch": 18.682161396885324, "grad_norm": 0.10723111778497696, "learning_rate": 6.954529576780929e-05, "loss": 0.0053, "step": 158350 }, { "epoch": 18.688060405851818, "grad_norm": 0.019586466252803802, "learning_rate": 6.95281963933928e-05, "loss": 0.0045, "step": 158400 }, { "epoch": 18.69395941481831, "grad_norm": 0.14029859006404877, "learning_rate": 6.951109432352229e-05, "loss": 0.0041, "step": 158450 }, { "epoch": 18.699858423784804, "grad_norm": 0.20164984464645386, "learning_rate": 6.949398956055835e-05, "loss": 0.0045, "step": 158500 }, { "epoch": 18.705757432751298, "grad_norm": 0.14099860191345215, "learning_rate": 6.947688210686196e-05, "loss": 0.0057, "step": 158550 }, { "epoch": 18.711656441717793, "grad_norm": 0.06461411714553833, "learning_rate": 6.945977196479439e-05, "loss": 0.0047, "step": 158600 }, { "epoch": 18.717555450684284, "grad_norm": 0.059312574565410614, "learning_rate": 6.944265913671735e-05, "loss": 0.006, "step": 158650 }, { "epoch": 18.723454459650778, "grad_norm": 0.2894044518470764, "learning_rate": 6.942554362499292e-05, "loss": 0.0048, "step": 158700 }, { "epoch": 18.729353468617273, "grad_norm": 0.0856458842754364, "learning_rate": 6.940842543198352e-05, "loss": 0.004, "step": 158750 }, { "epoch": 18.735252477583767, "grad_norm": 0.11184125393629074, "learning_rate": 6.939130456005196e-05, "loss": 0.0051, "step": 158800 }, { "epoch": 18.741151486550258, "grad_norm": 0.5085691213607788, "learning_rate": 6.937418101156142e-05, "loss": 0.0045, "step": 158850 }, { "epoch": 18.747050495516753, "grad_norm": 0.11091062426567078, "learning_rate": 6.935705478887542e-05, "loss": 0.005, "step": 158900 }, { "epoch": 18.752949504483247, "grad_norm": 0.14303719997406006, "learning_rate": 6.93399258943579e-05, "loss": 0.0045, "step": 158950 }, { "epoch": 18.758848513449742, "grad_norm": 0.052255310118198395, "learning_rate": 6.932279433037311e-05, "loss": 0.0048, "step": 159000 }, { "epoch": 18.758848513449742, "eval_cer": 0.08347676419965576, "eval_loss": 0.0004672195063903928, "eval_runtime": 2.0329, "eval_samples_per_second": 49.192, "eval_steps_per_second": 1.968, "eval_wer": 0.26, "step": 159000 }, { "epoch": 18.764747522416233, "grad_norm": 0.03180147334933281, "learning_rate": 6.930566009928571e-05, "loss": 0.0039, "step": 159050 }, { "epoch": 18.770646531382727, "grad_norm": 0.12581202387809753, "learning_rate": 6.928852320346076e-05, "loss": 0.0049, "step": 159100 }, { "epoch": 18.776545540349222, "grad_norm": 0.0731407031416893, "learning_rate": 6.927138364526358e-05, "loss": 0.005, "step": 159150 }, { "epoch": 18.782444549315716, "grad_norm": 0.04610191658139229, "learning_rate": 6.925424142705997e-05, "loss": 0.005, "step": 159200 }, { "epoch": 18.788343558282207, "grad_norm": 0.15304836630821228, "learning_rate": 6.923709655121604e-05, "loss": 0.0057, "step": 159250 }, { "epoch": 18.794242567248702, "grad_norm": 0.011393701657652855, "learning_rate": 6.921994902009827e-05, "loss": 0.0056, "step": 159300 }, { "epoch": 18.800141576215196, "grad_norm": 0.10000502318143845, "learning_rate": 6.92027988360735e-05, "loss": 0.0047, "step": 159350 }, { "epoch": 18.80604058518169, "grad_norm": 0.18157555162906647, "learning_rate": 6.918564600150897e-05, "loss": 0.0036, "step": 159400 }, { "epoch": 18.811939594148182, "grad_norm": 0.13073474168777466, "learning_rate": 6.916849051877226e-05, "loss": 0.005, "step": 159450 }, { "epoch": 18.817838603114676, "grad_norm": 0.09358105063438416, "learning_rate": 6.915133239023134e-05, "loss": 0.0049, "step": 159500 }, { "epoch": 18.82373761208117, "grad_norm": 0.24414187669754028, "learning_rate": 6.91341716182545e-05, "loss": 0.0045, "step": 159550 }, { "epoch": 18.829636621047666, "grad_norm": 0.15459124743938446, "learning_rate": 6.911700820521042e-05, "loss": 0.0042, "step": 159600 }, { "epoch": 18.835535630014157, "grad_norm": 0.010575356893241405, "learning_rate": 6.909984215346816e-05, "loss": 0.0035, "step": 159650 }, { "epoch": 18.84143463898065, "grad_norm": 0.20980337262153625, "learning_rate": 6.908267346539712e-05, "loss": 0.0051, "step": 159700 }, { "epoch": 18.847333647947146, "grad_norm": 0.13927385210990906, "learning_rate": 6.906550214336709e-05, "loss": 0.0045, "step": 159750 }, { "epoch": 18.85323265691364, "grad_norm": 0.09468574821949005, "learning_rate": 6.904832818974818e-05, "loss": 0.0042, "step": 159800 }, { "epoch": 18.85913166588013, "grad_norm": 0.2001240998506546, "learning_rate": 6.903115160691091e-05, "loss": 0.0046, "step": 159850 }, { "epoch": 18.865030674846626, "grad_norm": 0.015677398070693016, "learning_rate": 6.901397239722616e-05, "loss": 0.0041, "step": 159900 }, { "epoch": 18.87092968381312, "grad_norm": 0.1288854479789734, "learning_rate": 6.899679056306513e-05, "loss": 0.0044, "step": 159950 }, { "epoch": 18.876828692779615, "grad_norm": 0.1903911679983139, "learning_rate": 6.897960610679938e-05, "loss": 0.005, "step": 160000 }, { "epoch": 18.876828692779615, "eval_cer": 0.08519793459552495, "eval_loss": 0.0010631500044837594, "eval_runtime": 2.0672, "eval_samples_per_second": 48.375, "eval_steps_per_second": 1.935, "eval_wer": 0.26, "step": 160000 }, { "epoch": 18.882727701746106, "grad_norm": 0.041001223027706146, "learning_rate": 6.896241903080094e-05, "loss": 0.0058, "step": 160050 }, { "epoch": 18.8886267107126, "grad_norm": 0.060164064168930054, "learning_rate": 6.894522933744205e-05, "loss": 0.0034, "step": 160100 }, { "epoch": 18.894525719679095, "grad_norm": 0.13531889021396637, "learning_rate": 6.892803702909542e-05, "loss": 0.0036, "step": 160150 }, { "epoch": 18.90042472864559, "grad_norm": 0.07251855731010437, "learning_rate": 6.891084210813408e-05, "loss": 0.0045, "step": 160200 }, { "epoch": 18.90632373761208, "grad_norm": 0.08592572808265686, "learning_rate": 6.889364457693141e-05, "loss": 0.0048, "step": 160250 }, { "epoch": 18.912222746578575, "grad_norm": 0.043975215405225754, "learning_rate": 6.887644443786118e-05, "loss": 0.004, "step": 160300 }, { "epoch": 18.91812175554507, "grad_norm": 0.13182121515274048, "learning_rate": 6.885924169329751e-05, "loss": 0.0041, "step": 160350 }, { "epoch": 18.92402076451156, "grad_norm": 0.05021953955292702, "learning_rate": 6.884203634561483e-05, "loss": 0.0045, "step": 160400 }, { "epoch": 18.929919773478055, "grad_norm": 0.04024948924779892, "learning_rate": 6.882482839718804e-05, "loss": 0.0032, "step": 160450 }, { "epoch": 18.93581878244455, "grad_norm": 0.1458601951599121, "learning_rate": 6.880761785039229e-05, "loss": 0.0053, "step": 160500 }, { "epoch": 18.941717791411044, "grad_norm": 0.1166665181517601, "learning_rate": 6.879040470760313e-05, "loss": 0.0053, "step": 160550 }, { "epoch": 18.947616800377535, "grad_norm": 0.13635273277759552, "learning_rate": 6.87731889711965e-05, "loss": 0.0041, "step": 160600 }, { "epoch": 18.95351580934403, "grad_norm": 0.08589162677526474, "learning_rate": 6.875597064354865e-05, "loss": 0.0036, "step": 160650 }, { "epoch": 18.959414818310524, "grad_norm": 0.13392943143844604, "learning_rate": 6.87387497270362e-05, "loss": 0.0042, "step": 160700 }, { "epoch": 18.96531382727702, "grad_norm": 0.11946255713701248, "learning_rate": 6.872152622403614e-05, "loss": 0.0048, "step": 160750 }, { "epoch": 18.97121283624351, "grad_norm": 0.11884260922670364, "learning_rate": 6.87043001369258e-05, "loss": 0.0054, "step": 160800 }, { "epoch": 18.977111845210004, "grad_norm": 0.0829172357916832, "learning_rate": 6.868707146808288e-05, "loss": 0.0043, "step": 160850 }, { "epoch": 18.9830108541765, "grad_norm": 0.09298104047775269, "learning_rate": 6.866984021988545e-05, "loss": 0.0043, "step": 160900 }, { "epoch": 18.988909863142993, "grad_norm": 0.22562095522880554, "learning_rate": 6.86526063947119e-05, "loss": 0.005, "step": 160950 }, { "epoch": 18.994808872109484, "grad_norm": 0.11331473290920258, "learning_rate": 6.863536999494101e-05, "loss": 0.0047, "step": 161000 }, { "epoch": 18.994808872109484, "eval_cer": 0.08347676419965576, "eval_loss": 0.0008733487338759005, "eval_runtime": 2.0055, "eval_samples_per_second": 49.862, "eval_steps_per_second": 1.994, "eval_wer": 0.26, "step": 161000 }, { "epoch": 19.00070788107598, "grad_norm": 0.06090158596634865, "learning_rate": 6.861813102295189e-05, "loss": 0.0041, "step": 161050 }, { "epoch": 19.006606890042473, "grad_norm": 0.07048491388559341, "learning_rate": 6.860088948112401e-05, "loss": 0.0039, "step": 161100 }, { "epoch": 19.012505899008968, "grad_norm": 0.13755278289318085, "learning_rate": 6.858364537183722e-05, "loss": 0.0043, "step": 161150 }, { "epoch": 19.01840490797546, "grad_norm": 0.08053167164325714, "learning_rate": 6.856639869747166e-05, "loss": 0.0037, "step": 161200 }, { "epoch": 19.024303916941953, "grad_norm": 0.08584588021039963, "learning_rate": 6.854914946040794e-05, "loss": 0.0043, "step": 161250 }, { "epoch": 19.030202925908448, "grad_norm": 0.027609968557953835, "learning_rate": 6.853189766302688e-05, "loss": 0.0034, "step": 161300 }, { "epoch": 19.036101934874942, "grad_norm": 0.09046491980552673, "learning_rate": 6.851464330770977e-05, "loss": 0.0033, "step": 161350 }, { "epoch": 19.042000943841433, "grad_norm": 0.16160772740840912, "learning_rate": 6.849738639683818e-05, "loss": 0.0044, "step": 161400 }, { "epoch": 19.047899952807928, "grad_norm": 0.09336888790130615, "learning_rate": 6.848012693279409e-05, "loss": 0.0047, "step": 161450 }, { "epoch": 19.053798961774422, "grad_norm": 0.07776917517185211, "learning_rate": 6.846286491795977e-05, "loss": 0.0035, "step": 161500 }, { "epoch": 19.059697970740917, "grad_norm": 0.21469593048095703, "learning_rate": 6.844560035471792e-05, "loss": 0.0042, "step": 161550 }, { "epoch": 19.065596979707408, "grad_norm": 0.054861217737197876, "learning_rate": 6.84283332454515e-05, "loss": 0.0037, "step": 161600 }, { "epoch": 19.071495988673902, "grad_norm": 0.015735477209091187, "learning_rate": 6.84110635925439e-05, "loss": 0.0036, "step": 161650 }, { "epoch": 19.077394997640397, "grad_norm": 0.15008987486362457, "learning_rate": 6.839379139837883e-05, "loss": 0.0031, "step": 161700 }, { "epoch": 19.08329400660689, "grad_norm": 0.06358896195888519, "learning_rate": 6.837651666534031e-05, "loss": 0.0043, "step": 161750 }, { "epoch": 19.089193015573382, "grad_norm": 0.19772803783416748, "learning_rate": 6.835923939581281e-05, "loss": 0.0038, "step": 161800 }, { "epoch": 19.095092024539877, "grad_norm": 0.16405445337295532, "learning_rate": 6.834195959218105e-05, "loss": 0.0045, "step": 161850 }, { "epoch": 19.10099103350637, "grad_norm": 0.10004613548517227, "learning_rate": 6.832467725683017e-05, "loss": 0.0036, "step": 161900 }, { "epoch": 19.106890042472866, "grad_norm": 0.2837271988391876, "learning_rate": 6.830739239214561e-05, "loss": 0.0033, "step": 161950 }, { "epoch": 19.112789051439357, "grad_norm": 0.05305129289627075, "learning_rate": 6.829010500051318e-05, "loss": 0.0045, "step": 162000 }, { "epoch": 19.112789051439357, "eval_cer": 0.08347676419965576, "eval_loss": 0.0008705616928637028, "eval_runtime": 2.0217, "eval_samples_per_second": 49.462, "eval_steps_per_second": 1.978, "eval_wer": 0.26, "step": 162000 }, { "epoch": 19.11868806040585, "grad_norm": 0.04906931519508362, "learning_rate": 6.827281508431904e-05, "loss": 0.0026, "step": 162050 }, { "epoch": 19.124587069372346, "grad_norm": 0.10436113178730011, "learning_rate": 6.82555226459497e-05, "loss": 0.0052, "step": 162100 }, { "epoch": 19.13048607833884, "grad_norm": 0.2040911763906479, "learning_rate": 6.823822768779202e-05, "loss": 0.0043, "step": 162150 }, { "epoch": 19.13638508730533, "grad_norm": 0.020113356411457062, "learning_rate": 6.82209302122332e-05, "loss": 0.0051, "step": 162200 }, { "epoch": 19.142284096271826, "grad_norm": 0.05607236549258232, "learning_rate": 6.820363022166078e-05, "loss": 0.0039, "step": 162250 }, { "epoch": 19.14818310523832, "grad_norm": 0.028622256591916084, "learning_rate": 6.818632771846268e-05, "loss": 0.0044, "step": 162300 }, { "epoch": 19.154082114204815, "grad_norm": 0.09498877823352814, "learning_rate": 6.816902270502711e-05, "loss": 0.0028, "step": 162350 }, { "epoch": 19.159981123171306, "grad_norm": 0.18519826233386993, "learning_rate": 6.815171518374268e-05, "loss": 0.004, "step": 162400 }, { "epoch": 19.1658801321378, "grad_norm": 0.029147055000066757, "learning_rate": 6.813440515699833e-05, "loss": 0.0035, "step": 162450 }, { "epoch": 19.171779141104295, "grad_norm": 0.14877955615520477, "learning_rate": 6.811709262718335e-05, "loss": 0.0039, "step": 162500 }, { "epoch": 19.17767815007079, "grad_norm": 0.07051589339971542, "learning_rate": 6.809977759668736e-05, "loss": 0.0038, "step": 162550 }, { "epoch": 19.18357715903728, "grad_norm": 0.23382830619812012, "learning_rate": 6.808246006790031e-05, "loss": 0.0049, "step": 162600 }, { "epoch": 19.189476168003775, "grad_norm": 0.15329253673553467, "learning_rate": 6.806514004321256e-05, "loss": 0.0047, "step": 162650 }, { "epoch": 19.19537517697027, "grad_norm": 0.13519762456417084, "learning_rate": 6.804781752501475e-05, "loss": 0.0036, "step": 162700 }, { "epoch": 19.201274185936764, "grad_norm": 0.02189100719988346, "learning_rate": 6.803049251569786e-05, "loss": 0.0038, "step": 162750 }, { "epoch": 19.207173194903255, "grad_norm": 0.01022179052233696, "learning_rate": 6.80131650176533e-05, "loss": 0.0039, "step": 162800 }, { "epoch": 19.21307220386975, "grad_norm": 0.26816803216934204, "learning_rate": 6.799583503327273e-05, "loss": 0.0059, "step": 162850 }, { "epoch": 19.218971212836244, "grad_norm": 0.06098787859082222, "learning_rate": 6.797850256494817e-05, "loss": 0.0044, "step": 162900 }, { "epoch": 19.224870221802735, "grad_norm": 0.30561208724975586, "learning_rate": 6.796116761507206e-05, "loss": 0.0049, "step": 162950 }, { "epoch": 19.23076923076923, "grad_norm": 0.05560985207557678, "learning_rate": 6.794383018603704e-05, "loss": 0.0041, "step": 163000 }, { "epoch": 19.23076923076923, "eval_cer": 0.08347676419965576, "eval_loss": 0.0006767050363123417, "eval_runtime": 2.0435, "eval_samples_per_second": 48.936, "eval_steps_per_second": 1.957, "eval_wer": 0.26, "step": 163000 }, { "epoch": 19.236668239735724, "grad_norm": 0.05985232815146446, "learning_rate": 6.792649028023624e-05, "loss": 0.0046, "step": 163050 }, { "epoch": 19.24256724870222, "grad_norm": 0.122454434633255, "learning_rate": 6.790914790006304e-05, "loss": 0.0042, "step": 163100 }, { "epoch": 19.24846625766871, "grad_norm": 0.019492842257022858, "learning_rate": 6.78918030479112e-05, "loss": 0.0032, "step": 163150 }, { "epoch": 19.254365266635205, "grad_norm": 0.1474081426858902, "learning_rate": 6.787445572617481e-05, "loss": 0.0042, "step": 163200 }, { "epoch": 19.2602642756017, "grad_norm": 0.05642309784889221, "learning_rate": 6.785710593724828e-05, "loss": 0.0039, "step": 163250 }, { "epoch": 19.266163284568194, "grad_norm": 0.23909850418567657, "learning_rate": 6.78397536835264e-05, "loss": 0.0032, "step": 163300 }, { "epoch": 19.272062293534685, "grad_norm": 0.026679132133722305, "learning_rate": 6.782239896740428e-05, "loss": 0.0042, "step": 163350 }, { "epoch": 19.27796130250118, "grad_norm": 0.05261019244790077, "learning_rate": 6.780504179127734e-05, "loss": 0.0034, "step": 163400 }, { "epoch": 19.283860311467674, "grad_norm": 0.22910062968730927, "learning_rate": 6.778768215754144e-05, "loss": 0.0049, "step": 163450 }, { "epoch": 19.289759320434168, "grad_norm": 0.15409433841705322, "learning_rate": 6.777032006859264e-05, "loss": 0.0043, "step": 163500 }, { "epoch": 19.29565832940066, "grad_norm": 0.17155930399894714, "learning_rate": 6.775295552682746e-05, "loss": 0.0038, "step": 163550 }, { "epoch": 19.301557338367154, "grad_norm": 0.2599988281726837, "learning_rate": 6.773558853464265e-05, "loss": 0.0037, "step": 163600 }, { "epoch": 19.30745634733365, "grad_norm": 0.11971904337406158, "learning_rate": 6.771821909443542e-05, "loss": 0.0049, "step": 163650 }, { "epoch": 19.313355356300143, "grad_norm": 0.07385408133268356, "learning_rate": 6.77008472086032e-05, "loss": 0.0038, "step": 163700 }, { "epoch": 19.319254365266634, "grad_norm": 0.057909153401851654, "learning_rate": 6.768347287954385e-05, "loss": 0.0045, "step": 163750 }, { "epoch": 19.32515337423313, "grad_norm": 0.061824485659599304, "learning_rate": 6.766609610965551e-05, "loss": 0.0044, "step": 163800 }, { "epoch": 19.331052383199623, "grad_norm": 0.15515276789665222, "learning_rate": 6.764871690133668e-05, "loss": 0.0048, "step": 163850 }, { "epoch": 19.336951392166117, "grad_norm": 0.05572575330734253, "learning_rate": 6.763133525698617e-05, "loss": 0.0038, "step": 163900 }, { "epoch": 19.34285040113261, "grad_norm": 0.21745844185352325, "learning_rate": 6.761395117900319e-05, "loss": 0.005, "step": 163950 }, { "epoch": 19.348749410099103, "grad_norm": 0.03210647404193878, "learning_rate": 6.75965646697872e-05, "loss": 0.0038, "step": 164000 }, { "epoch": 19.348749410099103, "eval_cer": 0.08605851979345955, "eval_loss": 0.0010237637907266617, "eval_runtime": 2.0974, "eval_samples_per_second": 47.678, "eval_steps_per_second": 1.907, "eval_wer": 0.27, "step": 164000 }, { "epoch": 19.354648419065597, "grad_norm": 0.24368764460086823, "learning_rate": 6.757917573173808e-05, "loss": 0.0037, "step": 164050 }, { "epoch": 19.360547428032092, "grad_norm": 0.12115088850259781, "learning_rate": 6.756178436725596e-05, "loss": 0.0036, "step": 164100 }, { "epoch": 19.366446436998583, "grad_norm": 0.07229416817426682, "learning_rate": 6.754439057874139e-05, "loss": 0.004, "step": 164150 }, { "epoch": 19.372345445965077, "grad_norm": 0.01219487376511097, "learning_rate": 6.75269943685952e-05, "loss": 0.0032, "step": 164200 }, { "epoch": 19.378244454931572, "grad_norm": 0.2642762362957001, "learning_rate": 6.750959573921857e-05, "loss": 0.0039, "step": 164250 }, { "epoch": 19.384143463898067, "grad_norm": 0.016552383080124855, "learning_rate": 6.7492194693013e-05, "loss": 0.0031, "step": 164300 }, { "epoch": 19.390042472864558, "grad_norm": 0.025658713653683662, "learning_rate": 6.747479123238035e-05, "loss": 0.004, "step": 164350 }, { "epoch": 19.395941481831052, "grad_norm": 0.09077712148427963, "learning_rate": 6.745738535972278e-05, "loss": 0.0044, "step": 164400 }, { "epoch": 19.401840490797547, "grad_norm": 0.06337963789701462, "learning_rate": 6.743997707744284e-05, "loss": 0.0046, "step": 164450 }, { "epoch": 19.40773949976404, "grad_norm": 0.1089402288198471, "learning_rate": 6.742256638794335e-05, "loss": 0.0033, "step": 164500 }, { "epoch": 19.413638508730532, "grad_norm": 1.2615641355514526, "learning_rate": 6.740515329362748e-05, "loss": 0.0045, "step": 164550 }, { "epoch": 19.419537517697027, "grad_norm": 0.053166840225458145, "learning_rate": 6.738773779689874e-05, "loss": 0.0042, "step": 164600 }, { "epoch": 19.42543652666352, "grad_norm": 0.1847137063741684, "learning_rate": 6.7370319900161e-05, "loss": 0.0035, "step": 164650 }, { "epoch": 19.431335535630016, "grad_norm": 0.0982169583439827, "learning_rate": 6.735289960581837e-05, "loss": 0.0047, "step": 164700 }, { "epoch": 19.437234544596507, "grad_norm": 0.5321641564369202, "learning_rate": 6.733547691627542e-05, "loss": 0.0038, "step": 164750 }, { "epoch": 19.443133553563, "grad_norm": 0.1327955275774002, "learning_rate": 6.731805183393696e-05, "loss": 0.0043, "step": 164800 }, { "epoch": 19.449032562529496, "grad_norm": 0.04674449935555458, "learning_rate": 6.730062436120814e-05, "loss": 0.0044, "step": 164850 }, { "epoch": 19.45493157149599, "grad_norm": 0.18572205305099487, "learning_rate": 6.728319450049447e-05, "loss": 0.0047, "step": 164900 }, { "epoch": 19.46083058046248, "grad_norm": 0.14242097735404968, "learning_rate": 6.726576225420176e-05, "loss": 0.0037, "step": 164950 }, { "epoch": 19.466729589428976, "grad_norm": 0.14019988477230072, "learning_rate": 6.724832762473617e-05, "loss": 0.0048, "step": 165000 }, { "epoch": 19.466729589428976, "eval_cer": 0.08605851979345955, "eval_loss": 0.0012856974499300122, "eval_runtime": 2.0537, "eval_samples_per_second": 48.692, "eval_steps_per_second": 1.948, "eval_wer": 0.27, "step": 165000 }, { "epoch": 19.47262859839547, "grad_norm": 0.026451706886291504, "learning_rate": 6.723089061450419e-05, "loss": 0.0034, "step": 165050 }, { "epoch": 19.478527607361965, "grad_norm": 0.2157066911458969, "learning_rate": 6.72134512259126e-05, "loss": 0.0044, "step": 165100 }, { "epoch": 19.484426616328456, "grad_norm": 0.1545124650001526, "learning_rate": 6.719600946136856e-05, "loss": 0.0045, "step": 165150 }, { "epoch": 19.49032562529495, "grad_norm": 0.2529233694076538, "learning_rate": 6.717856532327956e-05, "loss": 0.004, "step": 165200 }, { "epoch": 19.496224634261445, "grad_norm": 0.0007919971249066293, "learning_rate": 6.716111881405335e-05, "loss": 0.005, "step": 165250 }, { "epoch": 19.50212364322794, "grad_norm": 0.02863485924899578, "learning_rate": 6.714366993609808e-05, "loss": 0.0043, "step": 165300 }, { "epoch": 19.50802265219443, "grad_norm": 0.160169780254364, "learning_rate": 6.712621869182217e-05, "loss": 0.0048, "step": 165350 }, { "epoch": 19.513921661160925, "grad_norm": 0.16416825354099274, "learning_rate": 6.710876508363444e-05, "loss": 0.0048, "step": 165400 }, { "epoch": 19.51982067012742, "grad_norm": 0.0028329661581665277, "learning_rate": 6.709130911394394e-05, "loss": 0.0045, "step": 165450 }, { "epoch": 19.52571967909391, "grad_norm": 0.15794052183628082, "learning_rate": 6.707385078516014e-05, "loss": 0.0044, "step": 165500 }, { "epoch": 19.531618688060405, "grad_norm": 0.08665424585342407, "learning_rate": 6.705639009969276e-05, "loss": 0.0051, "step": 165550 }, { "epoch": 19.5375176970269, "grad_norm": 0.07811304181814194, "learning_rate": 6.70389270599519e-05, "loss": 0.0054, "step": 165600 }, { "epoch": 19.543416705993394, "grad_norm": 0.02617485448718071, "learning_rate": 6.702146166834794e-05, "loss": 0.0049, "step": 165650 }, { "epoch": 19.549315714959885, "grad_norm": 0.9034730792045593, "learning_rate": 6.700399392729163e-05, "loss": 0.0044, "step": 165700 }, { "epoch": 19.55521472392638, "grad_norm": 0.22368881106376648, "learning_rate": 6.6986523839194e-05, "loss": 0.0036, "step": 165750 }, { "epoch": 19.561113732892874, "grad_norm": 0.06893317401409149, "learning_rate": 6.696905140646646e-05, "loss": 0.0042, "step": 165800 }, { "epoch": 19.56701274185937, "grad_norm": 0.48811376094818115, "learning_rate": 6.695157663152068e-05, "loss": 0.0047, "step": 165850 }, { "epoch": 19.57291175082586, "grad_norm": 0.09751306474208832, "learning_rate": 6.69340995167687e-05, "loss": 0.0038, "step": 165900 }, { "epoch": 19.578810759792354, "grad_norm": 0.06784439831972122, "learning_rate": 6.691662006462285e-05, "loss": 0.0042, "step": 165950 }, { "epoch": 19.58470976875885, "grad_norm": 0.1089111939072609, "learning_rate": 6.689913827749582e-05, "loss": 0.0044, "step": 166000 }, { "epoch": 19.58470976875885, "eval_cer": 0.08605851979345955, "eval_loss": 0.0013382055331021547, "eval_runtime": 2.0306, "eval_samples_per_second": 49.247, "eval_steps_per_second": 1.97, "eval_wer": 0.27, "step": 166000 }, { "epoch": 19.590608777725343, "grad_norm": 0.03157069906592369, "learning_rate": 6.688165415780056e-05, "loss": 0.005, "step": 166050 }, { "epoch": 19.596507786691834, "grad_norm": 0.03951680660247803, "learning_rate": 6.686416770795041e-05, "loss": 0.0042, "step": 166100 }, { "epoch": 19.60240679565833, "grad_norm": 0.1223788931965828, "learning_rate": 6.684667893035903e-05, "loss": 0.004, "step": 166150 }, { "epoch": 19.608305804624823, "grad_norm": 0.005447902716696262, "learning_rate": 6.682918782744032e-05, "loss": 0.0041, "step": 166200 }, { "epoch": 19.614204813591318, "grad_norm": 0.023176519200205803, "learning_rate": 6.68116944016086e-05, "loss": 0.0047, "step": 166250 }, { "epoch": 19.62010382255781, "grad_norm": 0.10305638611316681, "learning_rate": 6.679419865527844e-05, "loss": 0.0039, "step": 166300 }, { "epoch": 19.626002831524303, "grad_norm": 0.03163667768239975, "learning_rate": 6.677670059086477e-05, "loss": 0.004, "step": 166350 }, { "epoch": 19.631901840490798, "grad_norm": 0.011555829085409641, "learning_rate": 6.675920021078282e-05, "loss": 0.004, "step": 166400 }, { "epoch": 19.637800849457292, "grad_norm": 0.07751473784446716, "learning_rate": 6.674169751744817e-05, "loss": 0.0047, "step": 166450 }, { "epoch": 19.643699858423783, "grad_norm": 0.17701616883277893, "learning_rate": 6.672419251327666e-05, "loss": 0.003, "step": 166500 }, { "epoch": 19.649598867390278, "grad_norm": 0.0473906435072422, "learning_rate": 6.670668520068453e-05, "loss": 0.0043, "step": 166550 }, { "epoch": 19.655497876356772, "grad_norm": 0.051675520837306976, "learning_rate": 6.668917558208824e-05, "loss": 0.0047, "step": 166600 }, { "epoch": 19.661396885323267, "grad_norm": 0.10280736535787582, "learning_rate": 6.667166365990466e-05, "loss": 0.0047, "step": 166650 }, { "epoch": 19.667295894289758, "grad_norm": 0.13176476955413818, "learning_rate": 6.665414943655093e-05, "loss": 0.0046, "step": 166700 }, { "epoch": 19.673194903256253, "grad_norm": 0.08451350778341293, "learning_rate": 6.66366329144445e-05, "loss": 0.0047, "step": 166750 }, { "epoch": 19.679093912222747, "grad_norm": 0.28722599148750305, "learning_rate": 6.661911409600321e-05, "loss": 0.0035, "step": 166800 }, { "epoch": 19.68499292118924, "grad_norm": 0.14663338661193848, "learning_rate": 6.66015929836451e-05, "loss": 0.0043, "step": 166850 }, { "epoch": 19.690891930155733, "grad_norm": 0.2266213297843933, "learning_rate": 6.658406957978862e-05, "loss": 0.0047, "step": 166900 }, { "epoch": 19.696790939122227, "grad_norm": 0.014614793471992016, "learning_rate": 6.65665438868525e-05, "loss": 0.0039, "step": 166950 }, { "epoch": 19.70268994808872, "grad_norm": 0.036615874618291855, "learning_rate": 6.654901590725578e-05, "loss": 0.0044, "step": 167000 }, { "epoch": 19.70268994808872, "eval_cer": 0.08605851979345955, "eval_loss": 0.0017461188836023211, "eval_runtime": 2.0429, "eval_samples_per_second": 48.949, "eval_steps_per_second": 1.958, "eval_wer": 0.27, "step": 167000 }, { "epoch": 19.708588957055216, "grad_norm": 0.03200450539588928, "learning_rate": 6.653148564341785e-05, "loss": 0.004, "step": 167050 }, { "epoch": 19.714487966021707, "grad_norm": 0.08228056877851486, "learning_rate": 6.651395309775837e-05, "loss": 0.0036, "step": 167100 }, { "epoch": 19.7203869749882, "grad_norm": 0.11347587406635284, "learning_rate": 6.649641827269733e-05, "loss": 0.0058, "step": 167150 }, { "epoch": 19.726285983954696, "grad_norm": 0.513939380645752, "learning_rate": 6.647888117065507e-05, "loss": 0.0049, "step": 167200 }, { "epoch": 19.73218499292119, "grad_norm": 0.06556562334299088, "learning_rate": 6.646134179405221e-05, "loss": 0.0039, "step": 167250 }, { "epoch": 19.73808400188768, "grad_norm": 0.1613898128271103, "learning_rate": 6.644380014530964e-05, "loss": 0.005, "step": 167300 }, { "epoch": 19.743983010854176, "grad_norm": 0.04825330525636673, "learning_rate": 6.642625622684869e-05, "loss": 0.0037, "step": 167350 }, { "epoch": 19.74988201982067, "grad_norm": 0.022844744846224785, "learning_rate": 6.640871004109086e-05, "loss": 0.0042, "step": 167400 }, { "epoch": 19.755781028787165, "grad_norm": 0.08010026067495346, "learning_rate": 6.639116159045808e-05, "loss": 0.0043, "step": 167450 }, { "epoch": 19.761680037753656, "grad_norm": 0.1657320111989975, "learning_rate": 6.637361087737253e-05, "loss": 0.0041, "step": 167500 }, { "epoch": 19.76757904672015, "grad_norm": 0.4904226064682007, "learning_rate": 6.635605790425669e-05, "loss": 0.0041, "step": 167550 }, { "epoch": 19.773478055686645, "grad_norm": 0.24887755513191223, "learning_rate": 6.63385026735334e-05, "loss": 0.0047, "step": 167600 }, { "epoch": 19.77937706465314, "grad_norm": 0.14033178985118866, "learning_rate": 6.632094518762577e-05, "loss": 0.0043, "step": 167650 }, { "epoch": 19.78527607361963, "grad_norm": 0.1697266548871994, "learning_rate": 6.630338544895725e-05, "loss": 0.0049, "step": 167700 }, { "epoch": 19.791175082586125, "grad_norm": 0.09019714593887329, "learning_rate": 6.62858234599516e-05, "loss": 0.0046, "step": 167750 }, { "epoch": 19.79707409155262, "grad_norm": 0.054023053497076035, "learning_rate": 6.626825922303286e-05, "loss": 0.0032, "step": 167800 }, { "epoch": 19.80297310051911, "grad_norm": 0.44180870056152344, "learning_rate": 6.625069274062541e-05, "loss": 0.0057, "step": 167850 }, { "epoch": 19.808872109485606, "grad_norm": 0.2052747756242752, "learning_rate": 6.623312401515394e-05, "loss": 0.0049, "step": 167900 }, { "epoch": 19.8147711184521, "grad_norm": 0.15783637762069702, "learning_rate": 6.621555304904343e-05, "loss": 0.0044, "step": 167950 }, { "epoch": 19.820670127418595, "grad_norm": 0.07429243624210358, "learning_rate": 6.619797984471916e-05, "loss": 0.0046, "step": 168000 }, { "epoch": 19.820670127418595, "eval_cer": 0.08605851979345955, "eval_loss": 0.0014513294445350766, "eval_runtime": 2.0494, "eval_samples_per_second": 48.794, "eval_steps_per_second": 1.952, "eval_wer": 0.27, "step": 168000 }, { "epoch": 19.82656913638509, "grad_norm": 0.19984738528728485, "learning_rate": 6.618040440460677e-05, "loss": 0.0044, "step": 168050 }, { "epoch": 19.83246814535158, "grad_norm": 0.04733125492930412, "learning_rate": 6.616282673113218e-05, "loss": 0.0042, "step": 168100 }, { "epoch": 19.838367154318075, "grad_norm": 0.169071227312088, "learning_rate": 6.61452468267216e-05, "loss": 0.0045, "step": 168150 }, { "epoch": 19.84426616328457, "grad_norm": 0.040416013449430466, "learning_rate": 6.612766469380159e-05, "loss": 0.0046, "step": 168200 }, { "epoch": 19.85016517225106, "grad_norm": 0.13321618735790253, "learning_rate": 6.611008033479893e-05, "loss": 0.0052, "step": 168250 }, { "epoch": 19.856064181217555, "grad_norm": 0.08470572531223297, "learning_rate": 6.609249375214082e-05, "loss": 0.0042, "step": 168300 }, { "epoch": 19.86196319018405, "grad_norm": 0.15920814871788025, "learning_rate": 6.60749049482547e-05, "loss": 0.004, "step": 168350 }, { "epoch": 19.867862199150544, "grad_norm": 0.09609214216470718, "learning_rate": 6.605731392556833e-05, "loss": 0.0054, "step": 168400 }, { "epoch": 19.873761208117035, "grad_norm": 0.19622008502483368, "learning_rate": 6.603972068650979e-05, "loss": 0.0042, "step": 168450 }, { "epoch": 19.87966021708353, "grad_norm": 0.07110331207513809, "learning_rate": 6.602212523350744e-05, "loss": 0.006, "step": 168500 }, { "epoch": 19.885559226050024, "grad_norm": 0.1665927916765213, "learning_rate": 6.600452756898999e-05, "loss": 0.0042, "step": 168550 }, { "epoch": 19.89145823501652, "grad_norm": 0.05219061300158501, "learning_rate": 6.598692769538637e-05, "loss": 0.005, "step": 168600 }, { "epoch": 19.89735724398301, "grad_norm": 0.13141600787639618, "learning_rate": 6.59693256151259e-05, "loss": 0.0037, "step": 168650 }, { "epoch": 19.903256252949504, "grad_norm": 0.11485210061073303, "learning_rate": 6.59517213306382e-05, "loss": 0.004, "step": 168700 }, { "epoch": 19.909155261916, "grad_norm": 0.04258287325501442, "learning_rate": 6.593411484435313e-05, "loss": 0.0044, "step": 168750 }, { "epoch": 19.915054270882493, "grad_norm": 0.12104091048240662, "learning_rate": 6.591650615870091e-05, "loss": 0.0046, "step": 168800 }, { "epoch": 19.920953279848984, "grad_norm": 0.012186977081000805, "learning_rate": 6.589889527611203e-05, "loss": 0.0044, "step": 168850 }, { "epoch": 19.92685228881548, "grad_norm": 0.16198515892028809, "learning_rate": 6.588128219901733e-05, "loss": 0.0045, "step": 168900 }, { "epoch": 19.932751297781973, "grad_norm": 0.02372923120856285, "learning_rate": 6.586366692984788e-05, "loss": 0.0048, "step": 168950 }, { "epoch": 19.938650306748468, "grad_norm": 0.11765206605195999, "learning_rate": 6.584604947103514e-05, "loss": 0.0044, "step": 169000 }, { "epoch": 19.938650306748468, "eval_cer": 0.08777969018932874, "eval_loss": 0.0027245681267231703, "eval_runtime": 2.0333, "eval_samples_per_second": 49.181, "eval_steps_per_second": 1.967, "eval_wer": 0.27, "step": 169000 }, { "epoch": 19.94454931571496, "grad_norm": 0.12345021963119507, "learning_rate": 6.58284298250108e-05, "loss": 0.0044, "step": 169050 }, { "epoch": 19.950448324681453, "grad_norm": 0.010380170308053493, "learning_rate": 6.58108079942069e-05, "loss": 0.0051, "step": 169100 }, { "epoch": 19.956347333647948, "grad_norm": 0.010261934250593185, "learning_rate": 6.579318398105572e-05, "loss": 0.0038, "step": 169150 }, { "epoch": 19.962246342614442, "grad_norm": 0.07752074301242828, "learning_rate": 6.577555778798992e-05, "loss": 0.004, "step": 169200 }, { "epoch": 19.968145351580933, "grad_norm": 0.02074175886809826, "learning_rate": 6.575792941744242e-05, "loss": 0.0042, "step": 169250 }, { "epoch": 19.974044360547428, "grad_norm": 0.22518962621688843, "learning_rate": 6.574029887184642e-05, "loss": 0.0051, "step": 169300 }, { "epoch": 19.979943369513922, "grad_norm": 0.06001349166035652, "learning_rate": 6.572266615363546e-05, "loss": 0.0036, "step": 169350 }, { "epoch": 19.985842378480417, "grad_norm": 0.033396221697330475, "learning_rate": 6.570503126524336e-05, "loss": 0.0043, "step": 169400 }, { "epoch": 19.991741387446908, "grad_norm": 0.16306550800800323, "learning_rate": 6.568739420910425e-05, "loss": 0.0051, "step": 169450 }, { "epoch": 19.997640396413402, "grad_norm": 0.11302798241376877, "learning_rate": 6.566975498765255e-05, "loss": 0.0046, "step": 169500 }, { "epoch": 20.003539405379897, "grad_norm": 0.16513356566429138, "learning_rate": 6.565211360332297e-05, "loss": 0.0046, "step": 169550 }, { "epoch": 20.00943841434639, "grad_norm": 0.3321670591831207, "learning_rate": 6.563447005855054e-05, "loss": 0.003, "step": 169600 }, { "epoch": 20.015337423312882, "grad_norm": 0.024600714445114136, "learning_rate": 6.561682435577057e-05, "loss": 0.0029, "step": 169650 }, { "epoch": 20.021236432279377, "grad_norm": 0.04613819345831871, "learning_rate": 6.55991764974187e-05, "loss": 0.0033, "step": 169700 }, { "epoch": 20.02713544124587, "grad_norm": 0.10735428333282471, "learning_rate": 6.558152648593079e-05, "loss": 0.0038, "step": 169750 }, { "epoch": 20.033034450212366, "grad_norm": 0.262157142162323, "learning_rate": 6.556387432374311e-05, "loss": 0.0037, "step": 169800 }, { "epoch": 20.038933459178857, "grad_norm": 0.037949465215206146, "learning_rate": 6.554622001329213e-05, "loss": 0.0026, "step": 169850 }, { "epoch": 20.04483246814535, "grad_norm": 0.026466667652130127, "learning_rate": 6.552856355701464e-05, "loss": 0.0033, "step": 169900 }, { "epoch": 20.050731477111846, "grad_norm": 0.3677739202976227, "learning_rate": 6.55109049573478e-05, "loss": 0.0036, "step": 169950 }, { "epoch": 20.05663048607834, "grad_norm": 0.03848999738693237, "learning_rate": 6.549324421672894e-05, "loss": 0.003, "step": 170000 }, { "epoch": 20.05663048607834, "eval_cer": 0.08777969018932874, "eval_loss": 0.0013292038347572088, "eval_runtime": 2.0646, "eval_samples_per_second": 48.435, "eval_steps_per_second": 1.937, "eval_wer": 0.27, "step": 170000 }, { "epoch": 20.06252949504483, "grad_norm": 0.1398729830980301, "learning_rate": 6.547558133759581e-05, "loss": 0.0045, "step": 170050 }, { "epoch": 20.068428504011326, "grad_norm": 0.06817122548818588, "learning_rate": 6.545791632238636e-05, "loss": 0.0036, "step": 170100 }, { "epoch": 20.07432751297782, "grad_norm": 0.055936869233846664, "learning_rate": 6.544024917353888e-05, "loss": 0.0043, "step": 170150 }, { "epoch": 20.080226521944315, "grad_norm": 0.013848589733242989, "learning_rate": 6.542257989349195e-05, "loss": 0.0034, "step": 170200 }, { "epoch": 20.086125530910806, "grad_norm": 0.060050118714571, "learning_rate": 6.540490848468441e-05, "loss": 0.0036, "step": 170250 }, { "epoch": 20.0920245398773, "grad_norm": 0.07157604396343231, "learning_rate": 6.538723494955546e-05, "loss": 0.0038, "step": 170300 }, { "epoch": 20.097923548843795, "grad_norm": 0.12368687987327576, "learning_rate": 6.536955929054456e-05, "loss": 0.0038, "step": 170350 }, { "epoch": 20.10382255781029, "grad_norm": 0.01597670093178749, "learning_rate": 6.535188151009143e-05, "loss": 0.0028, "step": 170400 }, { "epoch": 20.10972156677678, "grad_norm": 0.11102274805307388, "learning_rate": 6.533420161063614e-05, "loss": 0.004, "step": 170450 }, { "epoch": 20.115620575743275, "grad_norm": 0.21416182816028595, "learning_rate": 6.531651959461901e-05, "loss": 0.0044, "step": 170500 }, { "epoch": 20.12151958470977, "grad_norm": 0.16950884461402893, "learning_rate": 6.529883546448068e-05, "loss": 0.0032, "step": 170550 }, { "epoch": 20.12741859367626, "grad_norm": 0.08873943239450455, "learning_rate": 6.528114922266205e-05, "loss": 0.0037, "step": 170600 }, { "epoch": 20.133317602642755, "grad_norm": 0.32589805126190186, "learning_rate": 6.526346087160435e-05, "loss": 0.0048, "step": 170650 }, { "epoch": 20.13921661160925, "grad_norm": 0.10403880476951599, "learning_rate": 6.524577041374906e-05, "loss": 0.0034, "step": 170700 }, { "epoch": 20.145115620575744, "grad_norm": 0.03353187441825867, "learning_rate": 6.522807785153802e-05, "loss": 0.0035, "step": 170750 }, { "epoch": 20.151014629542235, "grad_norm": 0.036719128489494324, "learning_rate": 6.521038318741327e-05, "loss": 0.0041, "step": 170800 }, { "epoch": 20.15691363850873, "grad_norm": 0.03519904240965843, "learning_rate": 6.519268642381721e-05, "loss": 0.005, "step": 170850 }, { "epoch": 20.162812647475224, "grad_norm": 0.10565750300884247, "learning_rate": 6.517498756319247e-05, "loss": 0.0037, "step": 170900 }, { "epoch": 20.16871165644172, "grad_norm": 0.020408231765031815, "learning_rate": 6.515728660798204e-05, "loss": 0.0034, "step": 170950 }, { "epoch": 20.17461066540821, "grad_norm": 0.00853234063833952, "learning_rate": 6.513958356062913e-05, "loss": 0.0036, "step": 171000 }, { "epoch": 20.17461066540821, "eval_cer": 0.08777969018932874, "eval_loss": 0.0012140778126195073, "eval_runtime": 2.0568, "eval_samples_per_second": 48.62, "eval_steps_per_second": 1.945, "eval_wer": 0.27, "step": 171000 }, { "epoch": 20.180509674374704, "grad_norm": 0.13536471128463745, "learning_rate": 6.512187842357729e-05, "loss": 0.0034, "step": 171050 }, { "epoch": 20.1864086833412, "grad_norm": 0.19642478227615356, "learning_rate": 6.510417119927034e-05, "loss": 0.0043, "step": 171100 }, { "epoch": 20.192307692307693, "grad_norm": 0.10818310081958771, "learning_rate": 6.508646189015238e-05, "loss": 0.0041, "step": 171150 }, { "epoch": 20.198206701274184, "grad_norm": 0.1367146074771881, "learning_rate": 6.50687504986678e-05, "loss": 0.0039, "step": 171200 }, { "epoch": 20.20410571024068, "grad_norm": 0.054175883531570435, "learning_rate": 6.505103702726131e-05, "loss": 0.0044, "step": 171250 }, { "epoch": 20.210004719207173, "grad_norm": 0.10494008660316467, "learning_rate": 6.503332147837785e-05, "loss": 0.0041, "step": 171300 }, { "epoch": 20.215903728173668, "grad_norm": 0.11848928034305573, "learning_rate": 6.501560385446268e-05, "loss": 0.004, "step": 171350 }, { "epoch": 20.22180273714016, "grad_norm": 0.028241155669093132, "learning_rate": 6.499788415796136e-05, "loss": 0.004, "step": 171400 }, { "epoch": 20.227701746106654, "grad_norm": 0.0183312576264143, "learning_rate": 6.498016239131972e-05, "loss": 0.0032, "step": 171450 }, { "epoch": 20.233600755073148, "grad_norm": 0.14473338425159454, "learning_rate": 6.496243855698384e-05, "loss": 0.0043, "step": 171500 }, { "epoch": 20.239499764039643, "grad_norm": 0.3631230890750885, "learning_rate": 6.494471265740014e-05, "loss": 0.0038, "step": 171550 }, { "epoch": 20.245398773006134, "grad_norm": 0.013978470116853714, "learning_rate": 6.492698469501533e-05, "loss": 0.0043, "step": 171600 }, { "epoch": 20.251297781972628, "grad_norm": 0.09065049141645432, "learning_rate": 6.490925467227633e-05, "loss": 0.0038, "step": 171650 }, { "epoch": 20.257196790939123, "grad_norm": 0.06965707242488861, "learning_rate": 6.489152259163044e-05, "loss": 0.0032, "step": 171700 }, { "epoch": 20.263095799905617, "grad_norm": 0.022713473066687584, "learning_rate": 6.487378845552518e-05, "loss": 0.0027, "step": 171750 }, { "epoch": 20.268994808872108, "grad_norm": 0.03677340969443321, "learning_rate": 6.485605226640837e-05, "loss": 0.0036, "step": 171800 }, { "epoch": 20.274893817838603, "grad_norm": 0.041325535625219345, "learning_rate": 6.483831402672812e-05, "loss": 0.0036, "step": 171850 }, { "epoch": 20.280792826805097, "grad_norm": 0.13505259156227112, "learning_rate": 6.482057373893282e-05, "loss": 0.0039, "step": 171900 }, { "epoch": 20.28669183577159, "grad_norm": 0.12977750599384308, "learning_rate": 6.480283140547111e-05, "loss": 0.0039, "step": 171950 }, { "epoch": 20.292590844738083, "grad_norm": 0.4037037491798401, "learning_rate": 6.478508702879201e-05, "loss": 0.004, "step": 172000 }, { "epoch": 20.292590844738083, "eval_cer": 0.08605851979345955, "eval_loss": 0.0013228475581854582, "eval_runtime": 2.0412, "eval_samples_per_second": 48.99, "eval_steps_per_second": 1.96, "eval_wer": 0.27, "step": 172000 }, { "epoch": 20.298489853704577, "grad_norm": 0.08212849497795105, "learning_rate": 6.47673406113447e-05, "loss": 0.0052, "step": 172050 }, { "epoch": 20.304388862671072, "grad_norm": 0.17309772968292236, "learning_rate": 6.474959215557871e-05, "loss": 0.0039, "step": 172100 }, { "epoch": 20.310287871637566, "grad_norm": 0.10001610219478607, "learning_rate": 6.473184166394385e-05, "loss": 0.0026, "step": 172150 }, { "epoch": 20.316186880604057, "grad_norm": 0.3204772174358368, "learning_rate": 6.471408913889019e-05, "loss": 0.0034, "step": 172200 }, { "epoch": 20.322085889570552, "grad_norm": 0.05064302310347557, "learning_rate": 6.469633458286806e-05, "loss": 0.0047, "step": 172250 }, { "epoch": 20.327984898537046, "grad_norm": 0.0228855200111866, "learning_rate": 6.467857799832817e-05, "loss": 0.0046, "step": 172300 }, { "epoch": 20.33388390750354, "grad_norm": 0.07111170142889023, "learning_rate": 6.466081938772138e-05, "loss": 0.0044, "step": 172350 }, { "epoch": 20.339782916470032, "grad_norm": 0.004894769284874201, "learning_rate": 6.464305875349892e-05, "loss": 0.003, "step": 172400 }, { "epoch": 20.345681925436526, "grad_norm": 0.06712862849235535, "learning_rate": 6.462529609811228e-05, "loss": 0.0035, "step": 172450 }, { "epoch": 20.35158093440302, "grad_norm": 0.09191945195198059, "learning_rate": 6.460753142401318e-05, "loss": 0.0044, "step": 172500 }, { "epoch": 20.357479943369515, "grad_norm": 0.1215718686580658, "learning_rate": 6.458976473365368e-05, "loss": 0.003, "step": 172550 }, { "epoch": 20.363378952336006, "grad_norm": 0.04241454228758812, "learning_rate": 6.45719960294861e-05, "loss": 0.0046, "step": 172600 }, { "epoch": 20.3692779613025, "grad_norm": 0.21699246764183044, "learning_rate": 6.455422531396301e-05, "loss": 0.0035, "step": 172650 }, { "epoch": 20.375176970268996, "grad_norm": 0.005110525526106358, "learning_rate": 6.45364525895373e-05, "loss": 0.004, "step": 172700 }, { "epoch": 20.38107597923549, "grad_norm": 0.13485945761203766, "learning_rate": 6.451867785866214e-05, "loss": 0.0043, "step": 172750 }, { "epoch": 20.38697498820198, "grad_norm": 0.38370513916015625, "learning_rate": 6.450090112379092e-05, "loss": 0.0049, "step": 172800 }, { "epoch": 20.392873997168476, "grad_norm": 0.21141652762889862, "learning_rate": 6.448312238737734e-05, "loss": 0.0041, "step": 172850 }, { "epoch": 20.39877300613497, "grad_norm": 0.0218480434268713, "learning_rate": 6.446534165187541e-05, "loss": 0.0038, "step": 172900 }, { "epoch": 20.404672015101465, "grad_norm": 0.11449833959341049, "learning_rate": 6.444755891973935e-05, "loss": 0.0037, "step": 172950 }, { "epoch": 20.410571024067956, "grad_norm": 0.1661580502986908, "learning_rate": 6.442977419342371e-05, "loss": 0.0038, "step": 173000 }, { "epoch": 20.410571024067956, "eval_cer": 0.08605851979345955, "eval_loss": 0.0014449084410443902, "eval_runtime": 2.0884, "eval_samples_per_second": 47.885, "eval_steps_per_second": 1.915, "eval_wer": 0.27, "step": 173000 }, { "epoch": 20.41647003303445, "grad_norm": 0.02445152774453163, "learning_rate": 6.441198747538329e-05, "loss": 0.004, "step": 173050 }, { "epoch": 20.422369042000945, "grad_norm": 0.20447613298892975, "learning_rate": 6.439419876807316e-05, "loss": 0.0043, "step": 173100 }, { "epoch": 20.42826805096744, "grad_norm": 0.056723084300756454, "learning_rate": 6.43764080739487e-05, "loss": 0.0054, "step": 173150 }, { "epoch": 20.43416705993393, "grad_norm": 0.08088245242834091, "learning_rate": 6.43586153954655e-05, "loss": 0.0045, "step": 173200 }, { "epoch": 20.440066068900425, "grad_norm": 0.08871709555387497, "learning_rate": 6.43408207350795e-05, "loss": 0.0043, "step": 173250 }, { "epoch": 20.44596507786692, "grad_norm": 0.11064555495977402, "learning_rate": 6.432302409524685e-05, "loss": 0.0045, "step": 173300 }, { "epoch": 20.45186408683341, "grad_norm": 0.1530437022447586, "learning_rate": 6.430522547842402e-05, "loss": 0.0046, "step": 173350 }, { "epoch": 20.457763095799905, "grad_norm": 0.225731760263443, "learning_rate": 6.428742488706773e-05, "loss": 0.0055, "step": 173400 }, { "epoch": 20.4636621047664, "grad_norm": 0.08296644687652588, "learning_rate": 6.426962232363494e-05, "loss": 0.0035, "step": 173450 }, { "epoch": 20.469561113732894, "grad_norm": 0.09870719164609909, "learning_rate": 6.425181779058298e-05, "loss": 0.0039, "step": 173500 }, { "epoch": 20.475460122699385, "grad_norm": 0.15769992768764496, "learning_rate": 6.423401129036934e-05, "loss": 0.0031, "step": 173550 }, { "epoch": 20.48135913166588, "grad_norm": 0.03175609931349754, "learning_rate": 6.421620282545182e-05, "loss": 0.0036, "step": 173600 }, { "epoch": 20.487258140632374, "grad_norm": 0.3242534101009369, "learning_rate": 6.419839239828854e-05, "loss": 0.0032, "step": 173650 }, { "epoch": 20.49315714959887, "grad_norm": 0.019467730075120926, "learning_rate": 6.418058001133784e-05, "loss": 0.0032, "step": 173700 }, { "epoch": 20.49905615856536, "grad_norm": 0.08385372906923294, "learning_rate": 6.416276566705833e-05, "loss": 0.0027, "step": 173750 }, { "epoch": 20.504955167531854, "grad_norm": 0.21500970423221588, "learning_rate": 6.414494936790893e-05, "loss": 0.0041, "step": 173800 }, { "epoch": 20.51085417649835, "grad_norm": 0.006383809726685286, "learning_rate": 6.412713111634877e-05, "loss": 0.0045, "step": 173850 }, { "epoch": 20.516753185464843, "grad_norm": 0.024907438084483147, "learning_rate": 6.41093109148373e-05, "loss": 0.0043, "step": 173900 }, { "epoch": 20.522652194431334, "grad_norm": 0.019125156104564667, "learning_rate": 6.409148876583422e-05, "loss": 0.0029, "step": 173950 }, { "epoch": 20.52855120339783, "grad_norm": 0.08660736680030823, "learning_rate": 6.40736646717995e-05, "loss": 0.0042, "step": 174000 }, { "epoch": 20.52855120339783, "eval_cer": 0.08605851979345955, "eval_loss": 0.0012186592211946845, "eval_runtime": 2.0576, "eval_samples_per_second": 48.6, "eval_steps_per_second": 1.944, "eval_wer": 0.27, "step": 174000 }, { "epoch": 20.534450212364323, "grad_norm": 0.18327130377292633, "learning_rate": 6.405583863519338e-05, "loss": 0.0042, "step": 174050 }, { "epoch": 20.540349221330818, "grad_norm": 0.2767132520675659, "learning_rate": 6.403801065847637e-05, "loss": 0.0037, "step": 174100 }, { "epoch": 20.54624823029731, "grad_norm": 0.06437741965055466, "learning_rate": 6.402018074410923e-05, "loss": 0.0035, "step": 174150 }, { "epoch": 20.552147239263803, "grad_norm": 0.26389262080192566, "learning_rate": 6.400234889455301e-05, "loss": 0.0036, "step": 174200 }, { "epoch": 20.558046248230298, "grad_norm": 0.1764765977859497, "learning_rate": 6.398451511226904e-05, "loss": 0.0042, "step": 174250 }, { "epoch": 20.563945257196792, "grad_norm": 0.03954656422138214, "learning_rate": 6.396667939971886e-05, "loss": 0.0036, "step": 174300 }, { "epoch": 20.569844266163283, "grad_norm": 0.12326976656913757, "learning_rate": 6.394884175936432e-05, "loss": 0.004, "step": 174350 }, { "epoch": 20.575743275129778, "grad_norm": 0.06011518836021423, "learning_rate": 6.393100219366755e-05, "loss": 0.0035, "step": 174400 }, { "epoch": 20.581642284096272, "grad_norm": 0.1359413117170334, "learning_rate": 6.391316070509093e-05, "loss": 0.0031, "step": 174450 }, { "epoch": 20.587541293062767, "grad_norm": 0.02111797407269478, "learning_rate": 6.389531729609707e-05, "loss": 0.0035, "step": 174500 }, { "epoch": 20.593440302029258, "grad_norm": 0.023360872641205788, "learning_rate": 6.387747196914889e-05, "loss": 0.0031, "step": 174550 }, { "epoch": 20.599339310995752, "grad_norm": 0.018795156851410866, "learning_rate": 6.385962472670953e-05, "loss": 0.0047, "step": 174600 }, { "epoch": 20.605238319962247, "grad_norm": 0.1639808863401413, "learning_rate": 6.384177557124247e-05, "loss": 0.0042, "step": 174650 }, { "epoch": 20.61113732892874, "grad_norm": 0.13785526156425476, "learning_rate": 6.38239245052114e-05, "loss": 0.0043, "step": 174700 }, { "epoch": 20.617036337895232, "grad_norm": 0.09832900017499924, "learning_rate": 6.380607153108026e-05, "loss": 0.0035, "step": 174750 }, { "epoch": 20.622935346861727, "grad_norm": 0.12339827418327332, "learning_rate": 6.378821665131328e-05, "loss": 0.0035, "step": 174800 }, { "epoch": 20.62883435582822, "grad_norm": 0.023166833445429802, "learning_rate": 6.377035986837495e-05, "loss": 0.0054, "step": 174850 }, { "epoch": 20.634733364794716, "grad_norm": 0.16802245378494263, "learning_rate": 6.375250118473004e-05, "loss": 0.0034, "step": 174900 }, { "epoch": 20.640632373761207, "grad_norm": 0.24788884818553925, "learning_rate": 6.373464060284353e-05, "loss": 0.0049, "step": 174950 }, { "epoch": 20.6465313827277, "grad_norm": 0.027230845764279366, "learning_rate": 6.371677812518072e-05, "loss": 0.0041, "step": 175000 }, { "epoch": 20.6465313827277, "eval_cer": 0.08347676419965576, "eval_loss": 0.0003218879282940179, "eval_runtime": 2.0242, "eval_samples_per_second": 49.402, "eval_steps_per_second": 1.976, "eval_wer": 0.26, "step": 175000 }, { "epoch": 20.652430391694196, "grad_norm": 0.027710113674402237, "learning_rate": 6.369891375420713e-05, "loss": 0.0054, "step": 175050 }, { "epoch": 20.65832940066069, "grad_norm": 0.138239786028862, "learning_rate": 6.368104749238859e-05, "loss": 0.005, "step": 175100 }, { "epoch": 20.66422840962718, "grad_norm": 0.322264701128006, "learning_rate": 6.366317934219113e-05, "loss": 0.003, "step": 175150 }, { "epoch": 20.670127418593676, "grad_norm": 0.31602877378463745, "learning_rate": 6.364530930608106e-05, "loss": 0.0041, "step": 175200 }, { "epoch": 20.67602642756017, "grad_norm": 0.09650852531194687, "learning_rate": 6.3627437386525e-05, "loss": 0.004, "step": 175250 }, { "epoch": 20.681925436526665, "grad_norm": 0.006932263728231192, "learning_rate": 6.360956358598975e-05, "loss": 0.0035, "step": 175300 }, { "epoch": 20.687824445493156, "grad_norm": 0.09273724257946014, "learning_rate": 6.359168790694242e-05, "loss": 0.0045, "step": 175350 }, { "epoch": 20.69372345445965, "grad_norm": 0.04491236060857773, "learning_rate": 6.357381035185038e-05, "loss": 0.004, "step": 175400 }, { "epoch": 20.699622463426145, "grad_norm": 0.00860129576176405, "learning_rate": 6.355593092318124e-05, "loss": 0.0031, "step": 175450 }, { "epoch": 20.70552147239264, "grad_norm": 0.21223224699497223, "learning_rate": 6.35380496234029e-05, "loss": 0.0049, "step": 175500 }, { "epoch": 20.71142048135913, "grad_norm": 0.1356860250234604, "learning_rate": 6.352016645498344e-05, "loss": 0.0031, "step": 175550 }, { "epoch": 20.717319490325625, "grad_norm": 0.023601053282618523, "learning_rate": 6.350228142039132e-05, "loss": 0.0036, "step": 175600 }, { "epoch": 20.72321849929212, "grad_norm": 0.05352192744612694, "learning_rate": 6.348439452209513e-05, "loss": 0.0045, "step": 175650 }, { "epoch": 20.72911750825861, "grad_norm": 0.07309215515851974, "learning_rate": 6.346650576256382e-05, "loss": 0.0038, "step": 175700 }, { "epoch": 20.735016517225105, "grad_norm": 0.12393464893102646, "learning_rate": 6.344861514426655e-05, "loss": 0.0044, "step": 175750 }, { "epoch": 20.7409155261916, "grad_norm": 0.2643546164035797, "learning_rate": 6.34307226696727e-05, "loss": 0.0046, "step": 175800 }, { "epoch": 20.746814535158094, "grad_norm": 0.037528157234191895, "learning_rate": 6.3412828341252e-05, "loss": 0.0043, "step": 175850 }, { "epoch": 20.752713544124585, "grad_norm": 0.11589918285608292, "learning_rate": 6.339493216147436e-05, "loss": 0.0038, "step": 175900 }, { "epoch": 20.75861255309108, "grad_norm": 0.07531819492578506, "learning_rate": 6.337703413280995e-05, "loss": 0.0046, "step": 175950 }, { "epoch": 20.764511562057574, "grad_norm": 0.08278215676546097, "learning_rate": 6.335913425772926e-05, "loss": 0.0048, "step": 176000 }, { "epoch": 20.764511562057574, "eval_cer": 0.08347676419965576, "eval_loss": 0.0004672835348173976, "eval_runtime": 2.0366, "eval_samples_per_second": 49.1, "eval_steps_per_second": 1.964, "eval_wer": 0.26, "step": 176000 }, { "epoch": 20.77041057102407, "grad_norm": 0.16358643770217896, "learning_rate": 6.334123253870295e-05, "loss": 0.0045, "step": 176050 }, { "epoch": 20.77630957999056, "grad_norm": 0.2260657548904419, "learning_rate": 6.3323328978202e-05, "loss": 0.0033, "step": 176100 }, { "epoch": 20.782208588957054, "grad_norm": 0.14047515392303467, "learning_rate": 6.330542357869758e-05, "loss": 0.0043, "step": 176150 }, { "epoch": 20.78810759792355, "grad_norm": 0.015251941047608852, "learning_rate": 6.328751634266118e-05, "loss": 0.0032, "step": 176200 }, { "epoch": 20.794006606890044, "grad_norm": 0.011744251474738121, "learning_rate": 6.32696072725645e-05, "loss": 0.0035, "step": 176250 }, { "epoch": 20.799905615856535, "grad_norm": 0.10761592537164688, "learning_rate": 6.325169637087953e-05, "loss": 0.0039, "step": 176300 }, { "epoch": 20.80580462482303, "grad_norm": 0.23867912590503693, "learning_rate": 6.323378364007848e-05, "loss": 0.0057, "step": 176350 }, { "epoch": 20.811703633789524, "grad_norm": 0.09386803209781647, "learning_rate": 6.321586908263382e-05, "loss": 0.0047, "step": 176400 }, { "epoch": 20.817602642756018, "grad_norm": 0.2476206123828888, "learning_rate": 6.319795270101827e-05, "loss": 0.0046, "step": 176450 }, { "epoch": 20.82350165172251, "grad_norm": 0.2231244444847107, "learning_rate": 6.31800344977048e-05, "loss": 0.0041, "step": 176500 }, { "epoch": 20.829400660689004, "grad_norm": 0.022870654240250587, "learning_rate": 6.316211447516667e-05, "loss": 0.0029, "step": 176550 }, { "epoch": 20.835299669655498, "grad_norm": 0.13165241479873657, "learning_rate": 6.314419263587732e-05, "loss": 0.0051, "step": 176600 }, { "epoch": 20.841198678621993, "grad_norm": 0.04066623002290726, "learning_rate": 6.312626898231052e-05, "loss": 0.0042, "step": 176650 }, { "epoch": 20.847097687588484, "grad_norm": 0.011887436732649803, "learning_rate": 6.310834351694023e-05, "loss": 0.0037, "step": 176700 }, { "epoch": 20.85299669655498, "grad_norm": 0.03554701805114746, "learning_rate": 6.309041624224067e-05, "loss": 0.0033, "step": 176750 }, { "epoch": 20.858895705521473, "grad_norm": 0.15981167554855347, "learning_rate": 6.307248716068637e-05, "loss": 0.0051, "step": 176800 }, { "epoch": 20.864794714487967, "grad_norm": 0.03391198813915253, "learning_rate": 6.305455627475199e-05, "loss": 0.0041, "step": 176850 }, { "epoch": 20.87069372345446, "grad_norm": 0.11337527632713318, "learning_rate": 6.303662358691255e-05, "loss": 0.0044, "step": 176900 }, { "epoch": 20.876592732420953, "grad_norm": 0.07781802117824554, "learning_rate": 6.301868909964329e-05, "loss": 0.0035, "step": 176950 }, { "epoch": 20.882491741387447, "grad_norm": 0.03492774814367294, "learning_rate": 6.300075281541963e-05, "loss": 0.0043, "step": 177000 }, { "epoch": 20.882491741387447, "eval_cer": 0.08347676419965576, "eval_loss": 0.0006689486908726394, "eval_runtime": 2.0513, "eval_samples_per_second": 48.749, "eval_steps_per_second": 1.95, "eval_wer": 0.26, "step": 177000 }, { "epoch": 20.888390750353942, "grad_norm": 0.11560339480638504, "learning_rate": 6.298281473671737e-05, "loss": 0.0044, "step": 177050 }, { "epoch": 20.894289759320433, "grad_norm": 0.1400122493505478, "learning_rate": 6.296487486601243e-05, "loss": 0.0043, "step": 177100 }, { "epoch": 20.900188768286927, "grad_norm": 0.060853540897369385, "learning_rate": 6.294693320578103e-05, "loss": 0.0044, "step": 177150 }, { "epoch": 20.906087777253422, "grad_norm": 0.22558912634849548, "learning_rate": 6.292898975849965e-05, "loss": 0.0044, "step": 177200 }, { "epoch": 20.911986786219916, "grad_norm": 0.1960742473602295, "learning_rate": 6.2911044526645e-05, "loss": 0.0041, "step": 177250 }, { "epoch": 20.917885795186407, "grad_norm": 0.1180751621723175, "learning_rate": 6.289309751269407e-05, "loss": 0.0045, "step": 177300 }, { "epoch": 20.923784804152902, "grad_norm": 0.04955894500017166, "learning_rate": 6.287514871912401e-05, "loss": 0.0045, "step": 177350 }, { "epoch": 20.929683813119397, "grad_norm": 0.08415676653385162, "learning_rate": 6.28571981484123e-05, "loss": 0.0035, "step": 177400 }, { "epoch": 20.93558282208589, "grad_norm": 0.05645836889743805, "learning_rate": 6.283924580303664e-05, "loss": 0.0052, "step": 177450 }, { "epoch": 20.941481831052382, "grad_norm": 0.03600942716002464, "learning_rate": 6.282129168547497e-05, "loss": 0.0039, "step": 177500 }, { "epoch": 20.947380840018877, "grad_norm": 0.08812668919563293, "learning_rate": 6.280333579820546e-05, "loss": 0.0041, "step": 177550 }, { "epoch": 20.95327984898537, "grad_norm": 0.004486215300858021, "learning_rate": 6.278537814370654e-05, "loss": 0.0045, "step": 177600 }, { "epoch": 20.959178857951866, "grad_norm": 0.04057137668132782, "learning_rate": 6.27674187244569e-05, "loss": 0.0044, "step": 177650 }, { "epoch": 20.965077866918357, "grad_norm": 0.07541421800851822, "learning_rate": 6.274945754293547e-05, "loss": 0.0043, "step": 177700 }, { "epoch": 20.97097687588485, "grad_norm": 0.131973996758461, "learning_rate": 6.273149460162138e-05, "loss": 0.0033, "step": 177750 }, { "epoch": 20.976875884851346, "grad_norm": 0.13347090780735016, "learning_rate": 6.271352990299406e-05, "loss": 0.0033, "step": 177800 }, { "epoch": 20.98277489381784, "grad_norm": 0.00867901649326086, "learning_rate": 6.269556344953313e-05, "loss": 0.0045, "step": 177850 }, { "epoch": 20.98867390278433, "grad_norm": 0.0712699294090271, "learning_rate": 6.267759524371847e-05, "loss": 0.0048, "step": 177900 }, { "epoch": 20.994572911750826, "grad_norm": 0.1336667239665985, "learning_rate": 6.265962528803027e-05, "loss": 0.0044, "step": 177950 }, { "epoch": 21.00047192071732, "grad_norm": 0.18516498804092407, "learning_rate": 6.264165358494884e-05, "loss": 0.0043, "step": 178000 }, { "epoch": 21.00047192071732, "eval_cer": 0.08605851979345955, "eval_loss": 0.0005989631754346192, "eval_runtime": 2.0767, "eval_samples_per_second": 48.153, "eval_steps_per_second": 1.926, "eval_wer": 0.27, "step": 178000 }, { "epoch": 21.006370929683815, "grad_norm": 0.014283038675785065, "learning_rate": 6.262368013695483e-05, "loss": 0.0027, "step": 178050 }, { "epoch": 21.012269938650306, "grad_norm": 0.08155696094036102, "learning_rate": 6.260570494652908e-05, "loss": 0.0032, "step": 178100 }, { "epoch": 21.0181689476168, "grad_norm": 0.2420044094324112, "learning_rate": 6.258772801615267e-05, "loss": 0.0031, "step": 178150 }, { "epoch": 21.024067956583295, "grad_norm": 0.0483098067343235, "learning_rate": 6.256974934830695e-05, "loss": 0.0035, "step": 178200 }, { "epoch": 21.02996696554979, "grad_norm": 0.18754339218139648, "learning_rate": 6.25517689454735e-05, "loss": 0.003, "step": 178250 }, { "epoch": 21.03586597451628, "grad_norm": 0.06030181422829628, "learning_rate": 6.253378681013411e-05, "loss": 0.0033, "step": 178300 }, { "epoch": 21.041764983482775, "grad_norm": 0.22289112210273743, "learning_rate": 6.251580294477089e-05, "loss": 0.0028, "step": 178350 }, { "epoch": 21.04766399244927, "grad_norm": 0.015444033779203892, "learning_rate": 6.249781735186606e-05, "loss": 0.0024, "step": 178400 }, { "epoch": 21.05356300141576, "grad_norm": 0.044025275856256485, "learning_rate": 6.24798300339022e-05, "loss": 0.0029, "step": 178450 }, { "epoch": 21.059462010382255, "grad_norm": 0.0977635458111763, "learning_rate": 6.246184099336205e-05, "loss": 0.003, "step": 178500 }, { "epoch": 21.06536101934875, "grad_norm": 0.03990093246102333, "learning_rate": 6.244385023272863e-05, "loss": 0.0031, "step": 178550 }, { "epoch": 21.071260028315244, "grad_norm": 0.41355282068252563, "learning_rate": 6.242585775448518e-05, "loss": 0.0031, "step": 178600 }, { "epoch": 21.077159037281735, "grad_norm": 0.18574729561805725, "learning_rate": 6.240786356111518e-05, "loss": 0.0035, "step": 178650 }, { "epoch": 21.08305804624823, "grad_norm": 0.1608712375164032, "learning_rate": 6.238986765510236e-05, "loss": 0.0039, "step": 178700 }, { "epoch": 21.088957055214724, "grad_norm": 0.06396622955799103, "learning_rate": 6.237187003893066e-05, "loss": 0.0039, "step": 178750 }, { "epoch": 21.09485606418122, "grad_norm": 0.020674733445048332, "learning_rate": 6.235387071508427e-05, "loss": 0.0025, "step": 178800 }, { "epoch": 21.10075507314771, "grad_norm": 0.0547616183757782, "learning_rate": 6.233586968604762e-05, "loss": 0.0032, "step": 178850 }, { "epoch": 21.106654082114204, "grad_norm": 0.05085277929902077, "learning_rate": 6.231786695430535e-05, "loss": 0.0031, "step": 178900 }, { "epoch": 21.1125530910807, "grad_norm": 0.03945000097155571, "learning_rate": 6.229986252234239e-05, "loss": 0.0035, "step": 178950 }, { "epoch": 21.118452100047193, "grad_norm": 0.14824965596199036, "learning_rate": 6.228185639264385e-05, "loss": 0.004, "step": 179000 }, { "epoch": 21.118452100047193, "eval_cer": 0.08605851979345955, "eval_loss": 0.0011827921262010932, "eval_runtime": 2.1037, "eval_samples_per_second": 47.534, "eval_steps_per_second": 1.901, "eval_wer": 0.27, "step": 179000 }, { "epoch": 21.124351109013684, "grad_norm": 0.15123194456100464, "learning_rate": 6.22638485676951e-05, "loss": 0.0027, "step": 179050 }, { "epoch": 21.13025011798018, "grad_norm": 0.13818471133708954, "learning_rate": 6.224583904998174e-05, "loss": 0.0044, "step": 179100 }, { "epoch": 21.136149126946673, "grad_norm": 0.19538745284080505, "learning_rate": 6.22278278419896e-05, "loss": 0.0037, "step": 179150 }, { "epoch": 21.142048135913168, "grad_norm": 0.2091563194990158, "learning_rate": 6.220981494620476e-05, "loss": 0.0034, "step": 179200 }, { "epoch": 21.14794714487966, "grad_norm": 0.12198295444250107, "learning_rate": 6.21918003651135e-05, "loss": 0.0039, "step": 179250 }, { "epoch": 21.153846153846153, "grad_norm": 0.014288827776908875, "learning_rate": 6.217378410120235e-05, "loss": 0.0032, "step": 179300 }, { "epoch": 21.159745162812648, "grad_norm": 0.1284065693616867, "learning_rate": 6.215576615695808e-05, "loss": 0.0041, "step": 179350 }, { "epoch": 21.165644171779142, "grad_norm": 0.028510985895991325, "learning_rate": 6.21377465348677e-05, "loss": 0.0026, "step": 179400 }, { "epoch": 21.171543180745633, "grad_norm": 0.012187471613287926, "learning_rate": 6.211972523741842e-05, "loss": 0.0031, "step": 179450 }, { "epoch": 21.177442189712128, "grad_norm": 0.031971827149391174, "learning_rate": 6.210170226709771e-05, "loss": 0.0031, "step": 179500 }, { "epoch": 21.183341198678622, "grad_norm": 0.09989463537931442, "learning_rate": 6.208367762639324e-05, "loss": 0.0028, "step": 179550 }, { "epoch": 21.189240207645117, "grad_norm": 0.03696685656905174, "learning_rate": 6.206565131779294e-05, "loss": 0.0035, "step": 179600 }, { "epoch": 21.195139216611608, "grad_norm": 0.11938690394163132, "learning_rate": 6.204762334378498e-05, "loss": 0.0038, "step": 179650 }, { "epoch": 21.201038225578102, "grad_norm": 0.012258200906217098, "learning_rate": 6.20295937068577e-05, "loss": 0.0036, "step": 179700 }, { "epoch": 21.206937234544597, "grad_norm": 0.08423107862472534, "learning_rate": 6.201156240949975e-05, "loss": 0.0035, "step": 179750 }, { "epoch": 21.21283624351109, "grad_norm": 0.1242159977555275, "learning_rate": 6.199352945419994e-05, "loss": 0.0028, "step": 179800 }, { "epoch": 21.218735252477583, "grad_norm": 0.0786069929599762, "learning_rate": 6.197549484344736e-05, "loss": 0.0032, "step": 179850 }, { "epoch": 21.224634261444077, "grad_norm": 0.007098794914782047, "learning_rate": 6.195745857973128e-05, "loss": 0.0037, "step": 179900 }, { "epoch": 21.23053327041057, "grad_norm": 0.16520509123802185, "learning_rate": 6.193942066554125e-05, "loss": 0.0043, "step": 179950 }, { "epoch": 21.236432279377066, "grad_norm": 0.18146182596683502, "learning_rate": 6.192138110336701e-05, "loss": 0.0038, "step": 180000 }, { "epoch": 21.236432279377066, "eval_cer": 0.08605851979345955, "eval_loss": 0.0011864117113873363, "eval_runtime": 2.0298, "eval_samples_per_second": 49.266, "eval_steps_per_second": 1.971, "eval_wer": 0.27, "step": 180000 }, { "epoch": 21.242331288343557, "grad_norm": 0.13275989890098572, "learning_rate": 6.190333989569855e-05, "loss": 0.0037, "step": 180050 }, { "epoch": 21.24823029731005, "grad_norm": 0.14245565235614777, "learning_rate": 6.188529704502605e-05, "loss": 0.0031, "step": 180100 }, { "epoch": 21.254129306276546, "grad_norm": 0.14025121927261353, "learning_rate": 6.186725255383997e-05, "loss": 0.0036, "step": 180150 }, { "epoch": 21.26002831524304, "grad_norm": 0.11646251380443573, "learning_rate": 6.184920642463094e-05, "loss": 0.0039, "step": 180200 }, { "epoch": 21.26592732420953, "grad_norm": 0.1684579998254776, "learning_rate": 6.18311586598899e-05, "loss": 0.0046, "step": 180250 }, { "epoch": 21.271826333176026, "grad_norm": 0.0547327920794487, "learning_rate": 6.181310926210792e-05, "loss": 0.0027, "step": 180300 }, { "epoch": 21.27772534214252, "grad_norm": 0.1809915155172348, "learning_rate": 6.179505823377634e-05, "loss": 0.0042, "step": 180350 }, { "epoch": 21.283624351109015, "grad_norm": 0.19248861074447632, "learning_rate": 6.177700557738673e-05, "loss": 0.0035, "step": 180400 }, { "epoch": 21.289523360075506, "grad_norm": 0.3716749846935272, "learning_rate": 6.175895129543087e-05, "loss": 0.0031, "step": 180450 }, { "epoch": 21.295422369042, "grad_norm": 0.264791876077652, "learning_rate": 6.174089539040078e-05, "loss": 0.0036, "step": 180500 }, { "epoch": 21.301321378008495, "grad_norm": 0.06890836358070374, "learning_rate": 6.172283786478869e-05, "loss": 0.0031, "step": 180550 }, { "epoch": 21.30722038697499, "grad_norm": 0.010362990200519562, "learning_rate": 6.170477872108706e-05, "loss": 0.0038, "step": 180600 }, { "epoch": 21.31311939594148, "grad_norm": 0.01971714198589325, "learning_rate": 6.168671796178856e-05, "loss": 0.0042, "step": 180650 }, { "epoch": 21.319018404907975, "grad_norm": 0.17997367680072784, "learning_rate": 6.166865558938614e-05, "loss": 0.0034, "step": 180700 }, { "epoch": 21.32491741387447, "grad_norm": 0.05945473909378052, "learning_rate": 6.165059160637288e-05, "loss": 0.0039, "step": 180750 }, { "epoch": 21.330816422840964, "grad_norm": 0.10318081080913544, "learning_rate": 6.163252601524216e-05, "loss": 0.0031, "step": 180800 }, { "epoch": 21.336715431807455, "grad_norm": 0.021314578130841255, "learning_rate": 6.161445881848751e-05, "loss": 0.0035, "step": 180850 }, { "epoch": 21.34261444077395, "grad_norm": 0.22816209495067596, "learning_rate": 6.159639001860277e-05, "loss": 0.0036, "step": 180900 }, { "epoch": 21.348513449740445, "grad_norm": 0.13055025041103363, "learning_rate": 6.157831961808193e-05, "loss": 0.0038, "step": 180950 }, { "epoch": 21.354412458706935, "grad_norm": 0.12250262498855591, "learning_rate": 6.156024761941926e-05, "loss": 0.0033, "step": 181000 }, { "epoch": 21.354412458706935, "eval_cer": 0.08777969018932874, "eval_loss": 0.0016939309425652027, "eval_runtime": 2.094, "eval_samples_per_second": 47.756, "eval_steps_per_second": 1.91, "eval_wer": 0.27, "step": 181000 }, { "epoch": 21.36031146767343, "grad_norm": 0.14654995501041412, "learning_rate": 6.154217402510918e-05, "loss": 0.0037, "step": 181050 }, { "epoch": 21.366210476639925, "grad_norm": 0.04518216848373413, "learning_rate": 6.152409883764638e-05, "loss": 0.0035, "step": 181100 }, { "epoch": 21.37210948560642, "grad_norm": 0.45269718766212463, "learning_rate": 6.150602205952575e-05, "loss": 0.0034, "step": 181150 }, { "epoch": 21.37800849457291, "grad_norm": 0.010456579737365246, "learning_rate": 6.14879436932424e-05, "loss": 0.0035, "step": 181200 }, { "epoch": 21.383907503539405, "grad_norm": 0.16644607484340668, "learning_rate": 6.146986374129168e-05, "loss": 0.0027, "step": 181250 }, { "epoch": 21.3898065125059, "grad_norm": 0.11968976259231567, "learning_rate": 6.145178220616916e-05, "loss": 0.0026, "step": 181300 }, { "epoch": 21.395705521472394, "grad_norm": 0.013289290480315685, "learning_rate": 6.143369909037057e-05, "loss": 0.0031, "step": 181350 }, { "epoch": 21.401604530438885, "grad_norm": 0.3537304997444153, "learning_rate": 6.141561439639196e-05, "loss": 0.0034, "step": 181400 }, { "epoch": 21.40750353940538, "grad_norm": 0.12870459258556366, "learning_rate": 6.139752812672948e-05, "loss": 0.0036, "step": 181450 }, { "epoch": 21.413402548371874, "grad_norm": 0.13457533717155457, "learning_rate": 6.137944028387958e-05, "loss": 0.0037, "step": 181500 }, { "epoch": 21.41930155733837, "grad_norm": 0.04375831037759781, "learning_rate": 6.136135087033891e-05, "loss": 0.0033, "step": 181550 }, { "epoch": 21.42520056630486, "grad_norm": 0.04078288748860359, "learning_rate": 6.134325988860433e-05, "loss": 0.0042, "step": 181600 }, { "epoch": 21.431099575271354, "grad_norm": 0.02126665785908699, "learning_rate": 6.132516734117291e-05, "loss": 0.0039, "step": 181650 }, { "epoch": 21.43699858423785, "grad_norm": 0.42932507395744324, "learning_rate": 6.130707323054195e-05, "loss": 0.0041, "step": 181700 }, { "epoch": 21.442897593204343, "grad_norm": 0.008910009637475014, "learning_rate": 6.128897755920896e-05, "loss": 0.0038, "step": 181750 }, { "epoch": 21.448796602170834, "grad_norm": 0.1393955647945404, "learning_rate": 6.127088032967166e-05, "loss": 0.0039, "step": 181800 }, { "epoch": 21.45469561113733, "grad_norm": 0.15695178508758545, "learning_rate": 6.125278154442797e-05, "loss": 0.005, "step": 181850 }, { "epoch": 21.460594620103823, "grad_norm": 0.03867996484041214, "learning_rate": 6.123468120597607e-05, "loss": 0.004, "step": 181900 }, { "epoch": 21.466493629070317, "grad_norm": 0.17569121718406677, "learning_rate": 6.121657931681433e-05, "loss": 0.0044, "step": 181950 }, { "epoch": 21.47239263803681, "grad_norm": 0.2011984884738922, "learning_rate": 6.119847587944131e-05, "loss": 0.0028, "step": 182000 }, { "epoch": 21.47239263803681, "eval_cer": 0.08605851979345955, "eval_loss": 0.0006946357898414135, "eval_runtime": 2.1168, "eval_samples_per_second": 47.242, "eval_steps_per_second": 1.89, "eval_wer": 0.27, "step": 182000 }, { "epoch": 21.478291647003303, "grad_norm": 0.014189237728714943, "learning_rate": 6.118037089635584e-05, "loss": 0.003, "step": 182050 }, { "epoch": 21.484190655969797, "grad_norm": 0.1981942057609558, "learning_rate": 6.11622643700569e-05, "loss": 0.0057, "step": 182100 }, { "epoch": 21.490089664936292, "grad_norm": 0.08448805660009384, "learning_rate": 6.114415630304372e-05, "loss": 0.0033, "step": 182150 }, { "epoch": 21.495988673902783, "grad_norm": 0.034376904368400574, "learning_rate": 6.112604669781572e-05, "loss": 0.004, "step": 182200 }, { "epoch": 21.501887682869278, "grad_norm": 0.24268294870853424, "learning_rate": 6.110793555687258e-05, "loss": 0.0043, "step": 182250 }, { "epoch": 21.507786691835772, "grad_norm": 0.18675321340560913, "learning_rate": 6.108982288271414e-05, "loss": 0.0035, "step": 182300 }, { "epoch": 21.513685700802267, "grad_norm": 0.07929840683937073, "learning_rate": 6.107170867784049e-05, "loss": 0.004, "step": 182350 }, { "epoch": 21.519584709768758, "grad_norm": 0.10196287930011749, "learning_rate": 6.105359294475188e-05, "loss": 0.0034, "step": 182400 }, { "epoch": 21.525483718735252, "grad_norm": 0.012942291796207428, "learning_rate": 6.103547568594883e-05, "loss": 0.0027, "step": 182450 }, { "epoch": 21.531382727701747, "grad_norm": 0.08301777392625809, "learning_rate": 6.1017356903932036e-05, "loss": 0.0028, "step": 182500 }, { "epoch": 21.53728173666824, "grad_norm": 0.07690899819135666, "learning_rate": 6.0999236601202406e-05, "loss": 0.003, "step": 182550 }, { "epoch": 21.543180745634732, "grad_norm": 0.18559950590133667, "learning_rate": 6.098111478026107e-05, "loss": 0.0037, "step": 182600 }, { "epoch": 21.549079754601227, "grad_norm": 0.49251192808151245, "learning_rate": 6.096299144360936e-05, "loss": 0.0036, "step": 182650 }, { "epoch": 21.55497876356772, "grad_norm": 0.1414470225572586, "learning_rate": 6.0944866593748826e-05, "loss": 0.003, "step": 182700 }, { "epoch": 21.560877772534216, "grad_norm": 0.008904559537768364, "learning_rate": 6.0926740233181215e-05, "loss": 0.0043, "step": 182750 }, { "epoch": 21.566776781500707, "grad_norm": 0.11586162447929382, "learning_rate": 6.090861236440848e-05, "loss": 0.0034, "step": 182800 }, { "epoch": 21.5726757904672, "grad_norm": 0.21537411212921143, "learning_rate": 6.0890482989932786e-05, "loss": 0.0033, "step": 182850 }, { "epoch": 21.578574799433696, "grad_norm": 0.1682521402835846, "learning_rate": 6.087235211225652e-05, "loss": 0.0034, "step": 182900 }, { "epoch": 21.58447380840019, "grad_norm": 0.11717044562101364, "learning_rate": 6.085421973388228e-05, "loss": 0.0035, "step": 182950 }, { "epoch": 21.59037281736668, "grad_norm": 0.4425283968448639, "learning_rate": 6.083608585731283e-05, "loss": 0.0033, "step": 183000 }, { "epoch": 21.59037281736668, "eval_cer": 0.08777969018932874, "eval_loss": 0.0029272621031850576, "eval_runtime": 2.0267, "eval_samples_per_second": 49.34, "eval_steps_per_second": 1.974, "eval_wer": 0.27, "step": 183000 }, { "epoch": 21.596271826333176, "grad_norm": 0.07596848160028458, "learning_rate": 6.081795048505119e-05, "loss": 0.0045, "step": 183050 }, { "epoch": 21.60217083529967, "grad_norm": 0.04697076603770256, "learning_rate": 6.079981361960053e-05, "loss": 0.0038, "step": 183100 }, { "epoch": 21.608069844266165, "grad_norm": 0.19131694734096527, "learning_rate": 6.07816752634643e-05, "loss": 0.0037, "step": 183150 }, { "epoch": 21.613968853232656, "grad_norm": 0.1485607773065567, "learning_rate": 6.0763535419146086e-05, "loss": 0.0032, "step": 183200 }, { "epoch": 21.61986786219915, "grad_norm": 0.13810276985168457, "learning_rate": 6.074539408914973e-05, "loss": 0.0038, "step": 183250 }, { "epoch": 21.625766871165645, "grad_norm": 0.05885416641831398, "learning_rate": 6.0727251275979255e-05, "loss": 0.0033, "step": 183300 }, { "epoch": 21.63166588013214, "grad_norm": 0.030875105410814285, "learning_rate": 6.07091069821389e-05, "loss": 0.0038, "step": 183350 }, { "epoch": 21.63756488909863, "grad_norm": 0.007402370218187571, "learning_rate": 6.069096121013307e-05, "loss": 0.0034, "step": 183400 }, { "epoch": 21.643463898065125, "grad_norm": 0.047545574605464935, "learning_rate": 6.067281396246642e-05, "loss": 0.0037, "step": 183450 }, { "epoch": 21.64936290703162, "grad_norm": 0.10933280736207962, "learning_rate": 6.065466524164381e-05, "loss": 0.0039, "step": 183500 }, { "epoch": 21.65526191599811, "grad_norm": 0.006219466216862202, "learning_rate": 6.063651505017026e-05, "loss": 0.0039, "step": 183550 }, { "epoch": 21.661160924964605, "grad_norm": 0.0139757189899683, "learning_rate": 6.061836339055105e-05, "loss": 0.0043, "step": 183600 }, { "epoch": 21.6670599339311, "grad_norm": 0.06501884758472443, "learning_rate": 6.0600210265291604e-05, "loss": 0.0031, "step": 183650 }, { "epoch": 21.672958942897594, "grad_norm": 0.10685618221759796, "learning_rate": 6.0582055676897585e-05, "loss": 0.0037, "step": 183700 }, { "epoch": 21.678857951864085, "grad_norm": 0.1671275943517685, "learning_rate": 6.056389962787486e-05, "loss": 0.0047, "step": 183750 }, { "epoch": 21.68475696083058, "grad_norm": 0.06343816220760345, "learning_rate": 6.054574212072949e-05, "loss": 0.0038, "step": 183800 }, { "epoch": 21.690655969797074, "grad_norm": 0.11492319405078888, "learning_rate": 6.0527583157967694e-05, "loss": 0.0033, "step": 183850 }, { "epoch": 21.69655497876357, "grad_norm": 0.04230453073978424, "learning_rate": 6.050942274209598e-05, "loss": 0.0035, "step": 183900 }, { "epoch": 21.70245398773006, "grad_norm": 0.3866733908653259, "learning_rate": 6.049126087562098e-05, "loss": 0.0034, "step": 183950 }, { "epoch": 21.708352996696554, "grad_norm": 0.0916527584195137, "learning_rate": 6.047309756104957e-05, "loss": 0.0046, "step": 184000 }, { "epoch": 21.708352996696554, "eval_cer": 0.08777969018932874, "eval_loss": 0.003167645074427128, "eval_runtime": 2.0342, "eval_samples_per_second": 49.16, "eval_steps_per_second": 1.966, "eval_wer": 0.27, "step": 184000 }, { "epoch": 21.71425200566305, "grad_norm": 0.2142249047756195, "learning_rate": 6.045493280088882e-05, "loss": 0.0043, "step": 184050 }, { "epoch": 21.720151014629543, "grad_norm": 0.07498923689126968, "learning_rate": 6.043676659764597e-05, "loss": 0.0038, "step": 184100 }, { "epoch": 21.726050023596034, "grad_norm": 0.033108435571193695, "learning_rate": 6.0418598953828487e-05, "loss": 0.0032, "step": 184150 }, { "epoch": 21.73194903256253, "grad_norm": 0.019606072455644608, "learning_rate": 6.040042987194401e-05, "loss": 0.0031, "step": 184200 }, { "epoch": 21.737848041529023, "grad_norm": 0.16229534149169922, "learning_rate": 6.038225935450044e-05, "loss": 0.0033, "step": 184250 }, { "epoch": 21.743747050495518, "grad_norm": 0.08980154246091843, "learning_rate": 6.03640874040058e-05, "loss": 0.0045, "step": 184300 }, { "epoch": 21.74964605946201, "grad_norm": 0.03512241318821907, "learning_rate": 6.0345914022968355e-05, "loss": 0.004, "step": 184350 }, { "epoch": 21.755545068428503, "grad_norm": 0.00416173692792654, "learning_rate": 6.032773921389655e-05, "loss": 0.0026, "step": 184400 }, { "epoch": 21.761444077394998, "grad_norm": 0.13382618129253387, "learning_rate": 6.030956297929903e-05, "loss": 0.0043, "step": 184450 }, { "epoch": 21.767343086361493, "grad_norm": 0.0621073953807354, "learning_rate": 6.029138532168464e-05, "loss": 0.0029, "step": 184500 }, { "epoch": 21.773242095327983, "grad_norm": 0.0007753579411655664, "learning_rate": 6.027320624356243e-05, "loss": 0.0027, "step": 184550 }, { "epoch": 21.779141104294478, "grad_norm": 0.011160239577293396, "learning_rate": 6.025502574744163e-05, "loss": 0.0032, "step": 184600 }, { "epoch": 21.785040113260973, "grad_norm": 0.08322349190711975, "learning_rate": 6.023684383583167e-05, "loss": 0.005, "step": 184650 }, { "epoch": 21.790939122227467, "grad_norm": 0.368801087141037, "learning_rate": 6.021866051124219e-05, "loss": 0.0041, "step": 184700 }, { "epoch": 21.796838131193958, "grad_norm": 0.33935725688934326, "learning_rate": 6.0200475776182984e-05, "loss": 0.0038, "step": 184750 }, { "epoch": 21.802737140160453, "grad_norm": 0.10536596924066544, "learning_rate": 6.01822896331641e-05, "loss": 0.0034, "step": 184800 }, { "epoch": 21.808636149126947, "grad_norm": 0.2658930718898773, "learning_rate": 6.016410208469574e-05, "loss": 0.003, "step": 184850 }, { "epoch": 21.81453515809344, "grad_norm": 0.5078563094139099, "learning_rate": 6.014591313328831e-05, "loss": 0.0034, "step": 184900 }, { "epoch": 21.820434167059933, "grad_norm": 0.05086196959018707, "learning_rate": 6.0127722781452415e-05, "loss": 0.0034, "step": 184950 }, { "epoch": 21.826333176026427, "grad_norm": 0.027043979614973068, "learning_rate": 6.010953103169883e-05, "loss": 0.0056, "step": 185000 }, { "epoch": 21.826333176026427, "eval_cer": 0.08347676419965576, "eval_loss": 0.0007880044868215919, "eval_runtime": 2.059, "eval_samples_per_second": 48.566, "eval_steps_per_second": 1.943, "eval_wer": 0.26, "step": 185000 }, { "epoch": 21.83223218499292, "grad_norm": 0.047507576644420624, "learning_rate": 6.009133788653857e-05, "loss": 0.0043, "step": 185050 }, { "epoch": 21.838131193959416, "grad_norm": 0.06836242973804474, "learning_rate": 6.007314334848279e-05, "loss": 0.0046, "step": 185100 }, { "epoch": 21.844030202925907, "grad_norm": 0.007086669560521841, "learning_rate": 6.005494742004285e-05, "loss": 0.0048, "step": 185150 }, { "epoch": 21.849929211892402, "grad_norm": 0.14635759592056274, "learning_rate": 6.003675010373034e-05, "loss": 0.0033, "step": 185200 }, { "epoch": 21.855828220858896, "grad_norm": 0.07955697923898697, "learning_rate": 6.001855140205701e-05, "loss": 0.0038, "step": 185250 }, { "epoch": 21.86172722982539, "grad_norm": 0.016602369025349617, "learning_rate": 6.00003513175348e-05, "loss": 0.0029, "step": 185300 }, { "epoch": 21.867626238791882, "grad_norm": 0.14761283993721008, "learning_rate": 5.998214985267584e-05, "loss": 0.0046, "step": 185350 }, { "epoch": 21.873525247758376, "grad_norm": 0.11471891403198242, "learning_rate": 5.996394700999246e-05, "loss": 0.0046, "step": 185400 }, { "epoch": 21.87942425672487, "grad_norm": 0.12677226960659027, "learning_rate": 5.9945742791997184e-05, "loss": 0.003, "step": 185450 }, { "epoch": 21.885323265691365, "grad_norm": 0.014509310945868492, "learning_rate": 5.992753720120271e-05, "loss": 0.0038, "step": 185500 }, { "epoch": 21.891222274657856, "grad_norm": 0.05522961542010307, "learning_rate": 5.990933024012194e-05, "loss": 0.0038, "step": 185550 }, { "epoch": 21.89712128362435, "grad_norm": 0.1118791326880455, "learning_rate": 5.9891121911267946e-05, "loss": 0.0036, "step": 185600 }, { "epoch": 21.903020292590845, "grad_norm": 0.038643643260002136, "learning_rate": 5.987291221715403e-05, "loss": 0.0039, "step": 185650 }, { "epoch": 21.90891930155734, "grad_norm": 0.3093961477279663, "learning_rate": 5.985470116029364e-05, "loss": 0.0044, "step": 185700 }, { "epoch": 21.91481831052383, "grad_norm": 0.10979331284761429, "learning_rate": 5.9836488743200426e-05, "loss": 0.0038, "step": 185750 }, { "epoch": 21.920717319490326, "grad_norm": 0.007052586879581213, "learning_rate": 5.981827496838822e-05, "loss": 0.004, "step": 185800 }, { "epoch": 21.92661632845682, "grad_norm": 0.2201511114835739, "learning_rate": 5.980005983837105e-05, "loss": 0.0039, "step": 185850 }, { "epoch": 21.93251533742331, "grad_norm": 0.04238523542881012, "learning_rate": 5.978184335566314e-05, "loss": 0.0037, "step": 185900 }, { "epoch": 21.938414346389806, "grad_norm": 0.008827971294522285, "learning_rate": 5.9763625522778875e-05, "loss": 0.0039, "step": 185950 }, { "epoch": 21.9443133553563, "grad_norm": 0.11052433401346207, "learning_rate": 5.9745406342232856e-05, "loss": 0.0032, "step": 186000 }, { "epoch": 21.9443133553563, "eval_cer": 0.08519793459552495, "eval_loss": 0.0013055844465270638, "eval_runtime": 2.0742, "eval_samples_per_second": 48.212, "eval_steps_per_second": 1.928, "eval_wer": 0.26, "step": 186000 }, { "epoch": 21.950212364322795, "grad_norm": 0.40634751319885254, "learning_rate": 5.9727185816539855e-05, "loss": 0.0039, "step": 186050 }, { "epoch": 21.95611137328929, "grad_norm": 0.060025330632925034, "learning_rate": 5.970896394821481e-05, "loss": 0.0039, "step": 186100 }, { "epoch": 21.96201038225578, "grad_norm": 0.039396658539772034, "learning_rate": 5.969074073977288e-05, "loss": 0.0041, "step": 186150 }, { "epoch": 21.967909391222275, "grad_norm": 0.07416534423828125, "learning_rate": 5.967251619372939e-05, "loss": 0.0037, "step": 186200 }, { "epoch": 21.97380840018877, "grad_norm": 0.07173505425453186, "learning_rate": 5.965429031259985e-05, "loss": 0.0043, "step": 186250 }, { "epoch": 21.97970740915526, "grad_norm": 0.0161035917699337, "learning_rate": 5.963606309889996e-05, "loss": 0.0044, "step": 186300 }, { "epoch": 21.985606418121755, "grad_norm": 0.038180287927389145, "learning_rate": 5.9617834555145614e-05, "loss": 0.003, "step": 186350 }, { "epoch": 21.99150542708825, "grad_norm": 0.09255795925855637, "learning_rate": 5.959960468385284e-05, "loss": 0.0024, "step": 186400 }, { "epoch": 21.997404436054744, "grad_norm": 0.014661731198430061, "learning_rate": 5.958137348753792e-05, "loss": 0.003, "step": 186450 }, { "epoch": 22.003303445021235, "grad_norm": 0.12972767651081085, "learning_rate": 5.956314096871727e-05, "loss": 0.0035, "step": 186500 }, { "epoch": 22.00920245398773, "grad_norm": 0.039628006517887115, "learning_rate": 5.954490712990751e-05, "loss": 0.0033, "step": 186550 }, { "epoch": 22.015101462954224, "grad_norm": 0.21082830429077148, "learning_rate": 5.9526671973625424e-05, "loss": 0.0032, "step": 186600 }, { "epoch": 22.02100047192072, "grad_norm": 0.03150377422571182, "learning_rate": 5.950843550238802e-05, "loss": 0.0034, "step": 186650 }, { "epoch": 22.02689948088721, "grad_norm": 0.01841445453464985, "learning_rate": 5.9490197718712416e-05, "loss": 0.0029, "step": 186700 }, { "epoch": 22.032798489853704, "grad_norm": 0.006882250774651766, "learning_rate": 5.947195862511597e-05, "loss": 0.0036, "step": 186750 }, { "epoch": 22.0386974988202, "grad_norm": 0.08979965001344681, "learning_rate": 5.945371822411621e-05, "loss": 0.0029, "step": 186800 }, { "epoch": 22.044596507786693, "grad_norm": 0.09507869184017181, "learning_rate": 5.94354765182308e-05, "loss": 0.0038, "step": 186850 }, { "epoch": 22.050495516753184, "grad_norm": 0.12905365228652954, "learning_rate": 5.9417233509977675e-05, "loss": 0.0033, "step": 186900 }, { "epoch": 22.05639452571968, "grad_norm": 0.18704211711883545, "learning_rate": 5.9398989201874875e-05, "loss": 0.0037, "step": 186950 }, { "epoch": 22.062293534686173, "grad_norm": 0.017846040427684784, "learning_rate": 5.938074359644064e-05, "loss": 0.0033, "step": 187000 }, { "epoch": 22.062293534686173, "eval_cer": 0.08519793459552495, "eval_loss": 0.0013349625514820218, "eval_runtime": 2.0761, "eval_samples_per_second": 48.168, "eval_steps_per_second": 1.927, "eval_wer": 0.26, "step": 187000 }, { "epoch": 22.068192543652668, "grad_norm": 0.09618982672691345, "learning_rate": 5.936249669619338e-05, "loss": 0.0039, "step": 187050 }, { "epoch": 22.07409155261916, "grad_norm": 0.20890989899635315, "learning_rate": 5.93442485036517e-05, "loss": 0.0035, "step": 187100 }, { "epoch": 22.079990561585653, "grad_norm": 0.07753171026706696, "learning_rate": 5.932599902133435e-05, "loss": 0.0028, "step": 187150 }, { "epoch": 22.085889570552148, "grad_norm": 0.021600391715765, "learning_rate": 5.930774825176034e-05, "loss": 0.0022, "step": 187200 }, { "epoch": 22.091788579518642, "grad_norm": 0.018455516546964645, "learning_rate": 5.928949619744876e-05, "loss": 0.004, "step": 187250 }, { "epoch": 22.097687588485133, "grad_norm": 0.10043461620807648, "learning_rate": 5.927124286091894e-05, "loss": 0.0035, "step": 187300 }, { "epoch": 22.103586597451628, "grad_norm": 0.024397097527980804, "learning_rate": 5.925298824469037e-05, "loss": 0.0035, "step": 187350 }, { "epoch": 22.109485606418122, "grad_norm": 0.20198504626750946, "learning_rate": 5.923473235128269e-05, "loss": 0.0028, "step": 187400 }, { "epoch": 22.115384615384617, "grad_norm": 0.04723767563700676, "learning_rate": 5.921647518321575e-05, "loss": 0.0026, "step": 187450 }, { "epoch": 22.121283624351108, "grad_norm": 0.4177541434764862, "learning_rate": 5.919821674300956e-05, "loss": 0.003, "step": 187500 }, { "epoch": 22.127182633317602, "grad_norm": 0.21796800196170807, "learning_rate": 5.917995703318433e-05, "loss": 0.0041, "step": 187550 }, { "epoch": 22.133081642284097, "grad_norm": 0.011756042949855328, "learning_rate": 5.916169605626042e-05, "loss": 0.0029, "step": 187600 }, { "epoch": 22.13898065125059, "grad_norm": 0.11558093130588531, "learning_rate": 5.914343381475837e-05, "loss": 0.0031, "step": 187650 }, { "epoch": 22.144879660217082, "grad_norm": 0.10102521628141403, "learning_rate": 5.9125170311198874e-05, "loss": 0.0034, "step": 187700 }, { "epoch": 22.150778669183577, "grad_norm": 0.1008286252617836, "learning_rate": 5.910690554810285e-05, "loss": 0.0024, "step": 187750 }, { "epoch": 22.15667767815007, "grad_norm": 0.13482223451137543, "learning_rate": 5.908863952799134e-05, "loss": 0.0024, "step": 187800 }, { "epoch": 22.162576687116566, "grad_norm": 0.1242944598197937, "learning_rate": 5.9070372253385596e-05, "loss": 0.0034, "step": 187850 }, { "epoch": 22.168475696083057, "grad_norm": 0.09110629558563232, "learning_rate": 5.905210372680704e-05, "loss": 0.0022, "step": 187900 }, { "epoch": 22.17437470504955, "grad_norm": 0.09089284390211105, "learning_rate": 5.903383395077723e-05, "loss": 0.0032, "step": 187950 }, { "epoch": 22.180273714016046, "grad_norm": 0.10997342318296432, "learning_rate": 5.9015562927817936e-05, "loss": 0.0035, "step": 188000 }, { "epoch": 22.180273714016046, "eval_cer": 0.08519793459552495, "eval_loss": 0.0015816933009773493, "eval_runtime": 2.0551, "eval_samples_per_second": 48.659, "eval_steps_per_second": 1.946, "eval_wer": 0.26, "step": 188000 }, { "epoch": 22.18617272298254, "grad_norm": 0.013232806697487831, "learning_rate": 5.899729066045107e-05, "loss": 0.0033, "step": 188050 }, { "epoch": 22.19207173194903, "grad_norm": 0.02332719787955284, "learning_rate": 5.8979017151198754e-05, "loss": 0.0033, "step": 188100 }, { "epoch": 22.197970740915526, "grad_norm": 0.43805593252182007, "learning_rate": 5.8960742402583234e-05, "loss": 0.0026, "step": 188150 }, { "epoch": 22.20386974988202, "grad_norm": 0.22247718274593353, "learning_rate": 5.8942466417126986e-05, "loss": 0.0033, "step": 188200 }, { "epoch": 22.209768758848515, "grad_norm": 0.009578037075698376, "learning_rate": 5.8924189197352596e-05, "loss": 0.0021, "step": 188250 }, { "epoch": 22.215667767815006, "grad_norm": 0.01993909291923046, "learning_rate": 5.890591074578285e-05, "loss": 0.0029, "step": 188300 }, { "epoch": 22.2215667767815, "grad_norm": 0.11529537290334702, "learning_rate": 5.8887631064940715e-05, "loss": 0.0039, "step": 188350 }, { "epoch": 22.227465785747995, "grad_norm": 0.06592118740081787, "learning_rate": 5.886935015734931e-05, "loss": 0.0041, "step": 188400 }, { "epoch": 22.23336479471449, "grad_norm": 0.218459352850914, "learning_rate": 5.885106802553192e-05, "loss": 0.0031, "step": 188450 }, { "epoch": 22.23926380368098, "grad_norm": 0.04491515830159187, "learning_rate": 5.8832784672011986e-05, "loss": 0.0039, "step": 188500 }, { "epoch": 22.245162812647475, "grad_norm": 0.016934018582105637, "learning_rate": 5.8814500099313174e-05, "loss": 0.0026, "step": 188550 }, { "epoch": 22.25106182161397, "grad_norm": 0.21955318748950958, "learning_rate": 5.8796214309959276e-05, "loss": 0.004, "step": 188600 }, { "epoch": 22.25696083058046, "grad_norm": 0.010200942866504192, "learning_rate": 5.8777927306474246e-05, "loss": 0.0025, "step": 188650 }, { "epoch": 22.262859839546955, "grad_norm": 0.13177381455898285, "learning_rate": 5.875963909138222e-05, "loss": 0.0027, "step": 188700 }, { "epoch": 22.26875884851345, "grad_norm": 0.08594776690006256, "learning_rate": 5.87413496672075e-05, "loss": 0.0039, "step": 188750 }, { "epoch": 22.274657857479944, "grad_norm": 0.11494344472885132, "learning_rate": 5.872305903647455e-05, "loss": 0.0041, "step": 188800 }, { "epoch": 22.280556866446435, "grad_norm": 0.06271383166313171, "learning_rate": 5.870476720170801e-05, "loss": 0.0036, "step": 188850 }, { "epoch": 22.28645587541293, "grad_norm": 0.012993982061743736, "learning_rate": 5.868647416543268e-05, "loss": 0.0027, "step": 188900 }, { "epoch": 22.292354884379424, "grad_norm": 0.09891486912965775, "learning_rate": 5.8668179930173525e-05, "loss": 0.0035, "step": 188950 }, { "epoch": 22.29825389334592, "grad_norm": 0.3198220431804657, "learning_rate": 5.8649884498455686e-05, "loss": 0.0031, "step": 189000 }, { "epoch": 22.29825389334592, "eval_cer": 0.08519793459552495, "eval_loss": 0.0022566127590835094, "eval_runtime": 2.0542, "eval_samples_per_second": 48.68, "eval_steps_per_second": 1.947, "eval_wer": 0.26, "step": 189000 }, { "epoch": 22.30415290231241, "grad_norm": 0.2709566354751587, "learning_rate": 5.863158787280444e-05, "loss": 0.0035, "step": 189050 }, { "epoch": 22.310051911278904, "grad_norm": 0.0299973227083683, "learning_rate": 5.8613290055745274e-05, "loss": 0.0026, "step": 189100 }, { "epoch": 22.3159509202454, "grad_norm": 0.07776670902967453, "learning_rate": 5.859499104980377e-05, "loss": 0.0032, "step": 189150 }, { "epoch": 22.321849929211893, "grad_norm": 0.01866266131401062, "learning_rate": 5.857669085750578e-05, "loss": 0.0031, "step": 189200 }, { "epoch": 22.327748938178384, "grad_norm": 0.08696092665195465, "learning_rate": 5.855838948137722e-05, "loss": 0.003, "step": 189250 }, { "epoch": 22.33364794714488, "grad_norm": 0.03948705270886421, "learning_rate": 5.8540086923944203e-05, "loss": 0.0037, "step": 189300 }, { "epoch": 22.339546956111374, "grad_norm": 0.008427358232438564, "learning_rate": 5.852178318773303e-05, "loss": 0.004, "step": 189350 }, { "epoch": 22.345445965077868, "grad_norm": 0.020915430039167404, "learning_rate": 5.850347827527013e-05, "loss": 0.0034, "step": 189400 }, { "epoch": 22.35134497404436, "grad_norm": 0.023592526093125343, "learning_rate": 5.8485172189082105e-05, "loss": 0.0027, "step": 189450 }, { "epoch": 22.357243983010854, "grad_norm": 0.4191177189350128, "learning_rate": 5.846686493169574e-05, "loss": 0.0035, "step": 189500 }, { "epoch": 22.363142991977348, "grad_norm": 0.10166800767183304, "learning_rate": 5.844855650563795e-05, "loss": 0.0031, "step": 189550 }, { "epoch": 22.369042000943843, "grad_norm": 0.014782415702939034, "learning_rate": 5.843024691343584e-05, "loss": 0.0035, "step": 189600 }, { "epoch": 22.374941009910334, "grad_norm": 0.004776113200932741, "learning_rate": 5.8411936157616644e-05, "loss": 0.0026, "step": 189650 }, { "epoch": 22.380840018876828, "grad_norm": 0.026200976222753525, "learning_rate": 5.839362424070778e-05, "loss": 0.0039, "step": 189700 }, { "epoch": 22.386739027843323, "grad_norm": 0.022010721266269684, "learning_rate": 5.837531116523682e-05, "loss": 0.0044, "step": 189750 }, { "epoch": 22.392638036809817, "grad_norm": 0.025836210697889328, "learning_rate": 5.83569969337315e-05, "loss": 0.0033, "step": 189800 }, { "epoch": 22.398537045776308, "grad_norm": 0.055159009993076324, "learning_rate": 5.833868154871972e-05, "loss": 0.0037, "step": 189850 }, { "epoch": 22.404436054742803, "grad_norm": 0.06235825642943382, "learning_rate": 5.832036501272952e-05, "loss": 0.0029, "step": 189900 }, { "epoch": 22.410335063709297, "grad_norm": 0.17115160822868347, "learning_rate": 5.8302047328289113e-05, "loss": 0.0037, "step": 189950 }, { "epoch": 22.416234072675792, "grad_norm": 0.10440502315759659, "learning_rate": 5.828372849792686e-05, "loss": 0.0029, "step": 190000 }, { "epoch": 22.416234072675792, "eval_cer": 0.08347676419965576, "eval_loss": 0.00033703510416671634, "eval_runtime": 2.1058, "eval_samples_per_second": 47.488, "eval_steps_per_second": 1.9, "eval_wer": 0.26, "step": 190000 }, { "epoch": 22.422133081642283, "grad_norm": 0.10149861872196198, "learning_rate": 5.82654085241713e-05, "loss": 0.0029, "step": 190050 }, { "epoch": 22.428032090608777, "grad_norm": 0.004842704627662897, "learning_rate": 5.824708740955111e-05, "loss": 0.0037, "step": 190100 }, { "epoch": 22.433931099575272, "grad_norm": 0.15347275137901306, "learning_rate": 5.822876515659511e-05, "loss": 0.0033, "step": 190150 }, { "epoch": 22.439830108541766, "grad_norm": 0.07767130434513092, "learning_rate": 5.8210441767832344e-05, "loss": 0.0041, "step": 190200 }, { "epoch": 22.445729117508257, "grad_norm": 0.04083928465843201, "learning_rate": 5.819211724579194e-05, "loss": 0.003, "step": 190250 }, { "epoch": 22.451628126474752, "grad_norm": 0.08880884200334549, "learning_rate": 5.817379159300324e-05, "loss": 0.0042, "step": 190300 }, { "epoch": 22.457527135441246, "grad_norm": 0.20215316116809845, "learning_rate": 5.8155464811995664e-05, "loss": 0.0036, "step": 190350 }, { "epoch": 22.46342614440774, "grad_norm": 0.10237134993076324, "learning_rate": 5.813713690529886e-05, "loss": 0.0035, "step": 190400 }, { "epoch": 22.469325153374232, "grad_norm": 0.3330472409725189, "learning_rate": 5.8118807875442604e-05, "loss": 0.0048, "step": 190450 }, { "epoch": 22.475224162340727, "grad_norm": 0.2095474749803543, "learning_rate": 5.8100477724956845e-05, "loss": 0.0036, "step": 190500 }, { "epoch": 22.48112317130722, "grad_norm": 0.0471770279109478, "learning_rate": 5.808214645637166e-05, "loss": 0.004, "step": 190550 }, { "epoch": 22.487022180273716, "grad_norm": 0.12093313038349152, "learning_rate": 5.8063814072217293e-05, "loss": 0.003, "step": 190600 }, { "epoch": 22.492921189240207, "grad_norm": 0.1824672669172287, "learning_rate": 5.8045480575024136e-05, "loss": 0.0037, "step": 190650 }, { "epoch": 22.4988201982067, "grad_norm": 0.07984858751296997, "learning_rate": 5.802714596732276e-05, "loss": 0.0027, "step": 190700 }, { "epoch": 22.504719207173196, "grad_norm": 0.10769741982221603, "learning_rate": 5.800881025164385e-05, "loss": 0.0035, "step": 190750 }, { "epoch": 22.51061821613969, "grad_norm": 0.08586721122264862, "learning_rate": 5.7990473430518264e-05, "loss": 0.0034, "step": 190800 }, { "epoch": 22.51651722510618, "grad_norm": 0.0835166648030281, "learning_rate": 5.7972135506477024e-05, "loss": 0.0043, "step": 190850 }, { "epoch": 22.522416234072676, "grad_norm": 0.1980547159910202, "learning_rate": 5.795379648205128e-05, "loss": 0.0033, "step": 190900 }, { "epoch": 22.52831524303917, "grad_norm": 0.01536504551768303, "learning_rate": 5.793545635977235e-05, "loss": 0.0039, "step": 190950 }, { "epoch": 22.534214252005665, "grad_norm": 0.25293174386024475, "learning_rate": 5.791711514217172e-05, "loss": 0.0042, "step": 191000 }, { "epoch": 22.534214252005665, "eval_cer": 0.08605851979345955, "eval_loss": 0.0011775310849770904, "eval_runtime": 2.0606, "eval_samples_per_second": 48.529, "eval_steps_per_second": 1.941, "eval_wer": 0.27, "step": 191000 }, { "epoch": 22.540113260972156, "grad_norm": 0.1638091802597046, "learning_rate": 5.7898772831780966e-05, "loss": 0.0039, "step": 191050 }, { "epoch": 22.54601226993865, "grad_norm": 0.15423455834388733, "learning_rate": 5.788042943113188e-05, "loss": 0.0037, "step": 191100 }, { "epoch": 22.551911278905145, "grad_norm": 0.04210822656750679, "learning_rate": 5.786208494275638e-05, "loss": 0.0028, "step": 191150 }, { "epoch": 22.55781028787164, "grad_norm": 0.02554468624293804, "learning_rate": 5.784373936918654e-05, "loss": 0.0035, "step": 191200 }, { "epoch": 22.56370929683813, "grad_norm": 0.05789441615343094, "learning_rate": 5.7825392712954566e-05, "loss": 0.0034, "step": 191250 }, { "epoch": 22.569608305804625, "grad_norm": 0.30588433146476746, "learning_rate": 5.7807044976592814e-05, "loss": 0.004, "step": 191300 }, { "epoch": 22.57550731477112, "grad_norm": 0.07251883298158646, "learning_rate": 5.778869616263383e-05, "loss": 0.0041, "step": 191350 }, { "epoch": 22.58140632373761, "grad_norm": 0.057759810239076614, "learning_rate": 5.7770346273610254e-05, "loss": 0.0033, "step": 191400 }, { "epoch": 22.587305332704105, "grad_norm": 0.14260581135749817, "learning_rate": 5.775199531205492e-05, "loss": 0.0045, "step": 191450 }, { "epoch": 22.5932043416706, "grad_norm": 0.13208520412445068, "learning_rate": 5.773364328050076e-05, "loss": 0.003, "step": 191500 }, { "epoch": 22.599103350637094, "grad_norm": 0.11117923259735107, "learning_rate": 5.771529018148092e-05, "loss": 0.0029, "step": 191550 }, { "epoch": 22.605002359603585, "grad_norm": 0.06568726897239685, "learning_rate": 5.7696936017528634e-05, "loss": 0.0045, "step": 191600 }, { "epoch": 22.61090136857008, "grad_norm": 0.043578241020441055, "learning_rate": 5.767858079117733e-05, "loss": 0.0031, "step": 191650 }, { "epoch": 22.616800377536574, "grad_norm": 0.20785821974277496, "learning_rate": 5.766022450496053e-05, "loss": 0.0031, "step": 191700 }, { "epoch": 22.62269938650307, "grad_norm": 0.14556990563869476, "learning_rate": 5.764186716141193e-05, "loss": 0.0036, "step": 191750 }, { "epoch": 22.62859839546956, "grad_norm": 0.10175082087516785, "learning_rate": 5.762350876306537e-05, "loss": 0.004, "step": 191800 }, { "epoch": 22.634497404436054, "grad_norm": 0.11435738205909729, "learning_rate": 5.760514931245488e-05, "loss": 0.0028, "step": 191850 }, { "epoch": 22.64039641340255, "grad_norm": 0.29631221294403076, "learning_rate": 5.758678881211456e-05, "loss": 0.0038, "step": 191900 }, { "epoch": 22.646295422369043, "grad_norm": 0.17311564087867737, "learning_rate": 5.7568427264578686e-05, "loss": 0.004, "step": 191950 }, { "epoch": 22.652194431335534, "grad_norm": 0.04003392904996872, "learning_rate": 5.755006467238169e-05, "loss": 0.003, "step": 192000 }, { "epoch": 22.652194431335534, "eval_cer": 0.08519793459552495, "eval_loss": 0.0021083205938339233, "eval_runtime": 2.0271, "eval_samples_per_second": 49.331, "eval_steps_per_second": 1.973, "eval_wer": 0.26, "step": 192000 }, { "epoch": 22.65809344030203, "grad_norm": 0.10980654507875443, "learning_rate": 5.7531701038058125e-05, "loss": 0.0038, "step": 192050 }, { "epoch": 22.663992449268523, "grad_norm": 0.2835071086883545, "learning_rate": 5.7513336364142726e-05, "loss": 0.0041, "step": 192100 }, { "epoch": 22.669891458235018, "grad_norm": 0.12713523209095, "learning_rate": 5.749497065317033e-05, "loss": 0.0032, "step": 192150 }, { "epoch": 22.67579046720151, "grad_norm": 0.03755998611450195, "learning_rate": 5.747660390767593e-05, "loss": 0.0039, "step": 192200 }, { "epoch": 22.681689476168003, "grad_norm": 0.057662732899188995, "learning_rate": 5.7458236130194687e-05, "loss": 0.0033, "step": 192250 }, { "epoch": 22.687588485134498, "grad_norm": 0.0067917765118181705, "learning_rate": 5.743986732326187e-05, "loss": 0.0042, "step": 192300 }, { "epoch": 22.693487494100992, "grad_norm": 0.2383059859275818, "learning_rate": 5.7421497489412924e-05, "loss": 0.0034, "step": 192350 }, { "epoch": 22.699386503067483, "grad_norm": 0.00918628927320242, "learning_rate": 5.740312663118338e-05, "loss": 0.003, "step": 192400 }, { "epoch": 22.705285512033978, "grad_norm": 0.18681243062019348, "learning_rate": 5.738475475110896e-05, "loss": 0.0031, "step": 192450 }, { "epoch": 22.711184521000472, "grad_norm": 0.20717786252498627, "learning_rate": 5.7366381851725525e-05, "loss": 0.003, "step": 192500 }, { "epoch": 22.717083529966967, "grad_norm": 0.06747470796108246, "learning_rate": 5.7348007935569045e-05, "loss": 0.0032, "step": 192550 }, { "epoch": 22.722982538933458, "grad_norm": 0.08870894461870193, "learning_rate": 5.732963300517569e-05, "loss": 0.0037, "step": 192600 }, { "epoch": 22.728881547899952, "grad_norm": 0.044606439769268036, "learning_rate": 5.731125706308168e-05, "loss": 0.0033, "step": 192650 }, { "epoch": 22.734780556866447, "grad_norm": 0.1881929188966751, "learning_rate": 5.7292880111823454e-05, "loss": 0.0028, "step": 192700 }, { "epoch": 22.74067956583294, "grad_norm": 0.15841874480247498, "learning_rate": 5.727450215393754e-05, "loss": 0.0042, "step": 192750 }, { "epoch": 22.746578574799432, "grad_norm": 0.004045382142066956, "learning_rate": 5.725612319196064e-05, "loss": 0.004, "step": 192800 }, { "epoch": 22.752477583765927, "grad_norm": 0.21680864691734314, "learning_rate": 5.7237743228429594e-05, "loss": 0.0024, "step": 192850 }, { "epoch": 22.75837659273242, "grad_norm": 0.01923236809670925, "learning_rate": 5.721936226588135e-05, "loss": 0.0029, "step": 192900 }, { "epoch": 22.764275601698916, "grad_norm": 0.14653867483139038, "learning_rate": 5.7200980306853015e-05, "loss": 0.0034, "step": 192950 }, { "epoch": 22.770174610665407, "grad_norm": 0.10433544218540192, "learning_rate": 5.718259735388181e-05, "loss": 0.0039, "step": 193000 }, { "epoch": 22.770174610665407, "eval_cer": 0.08519793459552495, "eval_loss": 0.0014457390643656254, "eval_runtime": 2.0788, "eval_samples_per_second": 48.104, "eval_steps_per_second": 1.924, "eval_wer": 0.26, "step": 193000 }, { "epoch": 22.7760736196319, "grad_norm": 0.25523805618286133, "learning_rate": 5.716421340950515e-05, "loss": 0.003, "step": 193050 }, { "epoch": 22.781972628598396, "grad_norm": 0.005004641134291887, "learning_rate": 5.714582847626052e-05, "loss": 0.003, "step": 193100 }, { "epoch": 22.78787163756489, "grad_norm": 0.0556304045021534, "learning_rate": 5.712744255668557e-05, "loss": 0.0027, "step": 193150 }, { "epoch": 22.79377064653138, "grad_norm": 0.04006147012114525, "learning_rate": 5.7109055653318113e-05, "loss": 0.0032, "step": 193200 }, { "epoch": 22.799669655497876, "grad_norm": 0.04598022997379303, "learning_rate": 5.709066776869605e-05, "loss": 0.0033, "step": 193250 }, { "epoch": 22.80556866446437, "grad_norm": 0.03716558590531349, "learning_rate": 5.7072278905357446e-05, "loss": 0.0023, "step": 193300 }, { "epoch": 22.811467673430865, "grad_norm": 0.020694224163889885, "learning_rate": 5.7053889065840504e-05, "loss": 0.0034, "step": 193350 }, { "epoch": 22.817366682397356, "grad_norm": 0.031244827434420586, "learning_rate": 5.703549825268353e-05, "loss": 0.0026, "step": 193400 }, { "epoch": 22.82326569136385, "grad_norm": 0.049630168825387955, "learning_rate": 5.7017106468424985e-05, "loss": 0.0031, "step": 193450 }, { "epoch": 22.829164700330345, "grad_norm": 0.016076838597655296, "learning_rate": 5.699871371560349e-05, "loss": 0.0029, "step": 193500 }, { "epoch": 22.83506370929684, "grad_norm": 0.15898366272449493, "learning_rate": 5.6980319996757756e-05, "loss": 0.0038, "step": 193550 }, { "epoch": 22.84096271826333, "grad_norm": 0.10438733547925949, "learning_rate": 5.696192531442667e-05, "loss": 0.0022, "step": 193600 }, { "epoch": 22.846861727229825, "grad_norm": 0.08283999562263489, "learning_rate": 5.69435296711492e-05, "loss": 0.0026, "step": 193650 }, { "epoch": 22.85276073619632, "grad_norm": 0.1529594361782074, "learning_rate": 5.6925133069464486e-05, "loss": 0.003, "step": 193700 }, { "epoch": 22.85865974516281, "grad_norm": 0.13748714327812195, "learning_rate": 5.690673551191178e-05, "loss": 0.0051, "step": 193750 }, { "epoch": 22.864558754129305, "grad_norm": 0.1481398046016693, "learning_rate": 5.6888337001030504e-05, "loss": 0.0041, "step": 193800 }, { "epoch": 22.8704577630958, "grad_norm": 0.11536111682653427, "learning_rate": 5.686993753936014e-05, "loss": 0.0032, "step": 193850 }, { "epoch": 22.876356772062294, "grad_norm": 0.14115917682647705, "learning_rate": 5.685153712944038e-05, "loss": 0.0048, "step": 193900 }, { "epoch": 22.88225578102879, "grad_norm": 0.12190954387187958, "learning_rate": 5.683313577381101e-05, "loss": 0.0032, "step": 193950 }, { "epoch": 22.88815478999528, "grad_norm": 0.13719196617603302, "learning_rate": 5.6814733475011916e-05, "loss": 0.0032, "step": 194000 }, { "epoch": 22.88815478999528, "eval_cer": 0.08347676419965576, "eval_loss": 0.0002756300673354417, "eval_runtime": 2.0459, "eval_samples_per_second": 48.878, "eval_steps_per_second": 1.955, "eval_wer": 0.26, "step": 194000 }, { "epoch": 22.894053798961775, "grad_norm": 0.052869122475385666, "learning_rate": 5.6796330235583186e-05, "loss": 0.004, "step": 194050 }, { "epoch": 22.89995280792827, "grad_norm": 0.26767393946647644, "learning_rate": 5.677792605806496e-05, "loss": 0.0034, "step": 194100 }, { "epoch": 22.90585181689476, "grad_norm": 0.1329454481601715, "learning_rate": 5.675952094499757e-05, "loss": 0.0036, "step": 194150 }, { "epoch": 22.911750825861255, "grad_norm": 0.07955838739871979, "learning_rate": 5.674111489892144e-05, "loss": 0.0039, "step": 194200 }, { "epoch": 22.91764983482775, "grad_norm": 0.1640133410692215, "learning_rate": 5.672270792237716e-05, "loss": 0.0043, "step": 194250 }, { "epoch": 22.923548843794244, "grad_norm": 0.02569018490612507, "learning_rate": 5.670430001790539e-05, "loss": 0.004, "step": 194300 }, { "epoch": 22.929447852760735, "grad_norm": 0.01026830542832613, "learning_rate": 5.668589118804696e-05, "loss": 0.0028, "step": 194350 }, { "epoch": 22.93534686172723, "grad_norm": 0.13821323215961456, "learning_rate": 5.6667481435342816e-05, "loss": 0.0035, "step": 194400 }, { "epoch": 22.941245870693724, "grad_norm": 0.14353281259536743, "learning_rate": 5.6649070762334054e-05, "loss": 0.0033, "step": 194450 }, { "epoch": 22.947144879660218, "grad_norm": 0.012117167934775352, "learning_rate": 5.663065917156186e-05, "loss": 0.0034, "step": 194500 }, { "epoch": 22.95304388862671, "grad_norm": 0.071614108979702, "learning_rate": 5.661224666556758e-05, "loss": 0.0037, "step": 194550 }, { "epoch": 22.958942897593204, "grad_norm": 0.03927464410662651, "learning_rate": 5.659383324689266e-05, "loss": 0.0028, "step": 194600 }, { "epoch": 22.9648419065597, "grad_norm": 0.0881594642996788, "learning_rate": 5.657541891807867e-05, "loss": 0.0032, "step": 194650 }, { "epoch": 22.970740915526193, "grad_norm": 0.08124255388975143, "learning_rate": 5.655700368166733e-05, "loss": 0.004, "step": 194700 }, { "epoch": 22.976639924492684, "grad_norm": 0.1469748616218567, "learning_rate": 5.653858754020049e-05, "loss": 0.0029, "step": 194750 }, { "epoch": 22.98253893345918, "grad_norm": 0.16420403122901917, "learning_rate": 5.652017049622007e-05, "loss": 0.004, "step": 194800 }, { "epoch": 22.988437942425673, "grad_norm": 0.00798194482922554, "learning_rate": 5.6501752552268196e-05, "loss": 0.0032, "step": 194850 }, { "epoch": 22.994336951392167, "grad_norm": 0.1678357869386673, "learning_rate": 5.648333371088706e-05, "loss": 0.0027, "step": 194900 }, { "epoch": 23.00023596035866, "grad_norm": 0.08059119433164597, "learning_rate": 5.646491397461897e-05, "loss": 0.0037, "step": 194950 }, { "epoch": 23.006134969325153, "grad_norm": 0.013832471333444118, "learning_rate": 5.644649334600641e-05, "loss": 0.0025, "step": 195000 }, { "epoch": 23.006134969325153, "eval_cer": 0.08347676419965576, "eval_loss": 0.0003656113985925913, "eval_runtime": 2.0501, "eval_samples_per_second": 48.777, "eval_steps_per_second": 1.951, "eval_wer": 0.26, "step": 195000 }, { "epoch": 23.012033978291647, "grad_norm": 0.14458495378494263, "learning_rate": 5.642807182759195e-05, "loss": 0.0025, "step": 195050 }, { "epoch": 23.017932987258142, "grad_norm": 0.042804524302482605, "learning_rate": 5.6409649421918275e-05, "loss": 0.0037, "step": 195100 }, { "epoch": 23.023831996224633, "grad_norm": 0.042551811784505844, "learning_rate": 5.639122613152823e-05, "loss": 0.0019, "step": 195150 }, { "epoch": 23.029731005191127, "grad_norm": 0.017353828996419907, "learning_rate": 5.637280195896474e-05, "loss": 0.0023, "step": 195200 }, { "epoch": 23.035630014157622, "grad_norm": 0.02184981480240822, "learning_rate": 5.635437690677089e-05, "loss": 0.0026, "step": 195250 }, { "epoch": 23.041529023124117, "grad_norm": 0.19615887105464935, "learning_rate": 5.633595097748986e-05, "loss": 0.0022, "step": 195300 }, { "epoch": 23.047428032090608, "grad_norm": 0.010201663710176945, "learning_rate": 5.631752417366496e-05, "loss": 0.0035, "step": 195350 }, { "epoch": 23.053327041057102, "grad_norm": 0.08149153739213943, "learning_rate": 5.629909649783961e-05, "loss": 0.0027, "step": 195400 }, { "epoch": 23.059226050023597, "grad_norm": 0.21280747652053833, "learning_rate": 5.628066795255738e-05, "loss": 0.0031, "step": 195450 }, { "epoch": 23.06512505899009, "grad_norm": 0.07391393184661865, "learning_rate": 5.626223854036192e-05, "loss": 0.0025, "step": 195500 }, { "epoch": 23.071024067956582, "grad_norm": 0.09142575412988663, "learning_rate": 5.6243808263797034e-05, "loss": 0.0039, "step": 195550 }, { "epoch": 23.076923076923077, "grad_norm": 0.039487455040216446, "learning_rate": 5.622537712540664e-05, "loss": 0.0037, "step": 195600 }, { "epoch": 23.08282208588957, "grad_norm": 0.04770667105913162, "learning_rate": 5.6206945127734746e-05, "loss": 0.0024, "step": 195650 }, { "epoch": 23.088721094856066, "grad_norm": 0.06403779983520508, "learning_rate": 5.618851227332551e-05, "loss": 0.0031, "step": 195700 }, { "epoch": 23.094620103822557, "grad_norm": 0.04301008582115173, "learning_rate": 5.6170078564723194e-05, "loss": 0.0024, "step": 195750 }, { "epoch": 23.10051911278905, "grad_norm": 0.23937825858592987, "learning_rate": 5.6151644004472185e-05, "loss": 0.0021, "step": 195800 }, { "epoch": 23.106418121755546, "grad_norm": 0.25540709495544434, "learning_rate": 5.613320859511697e-05, "loss": 0.0029, "step": 195850 }, { "epoch": 23.11231713072204, "grad_norm": 0.0064839948900043964, "learning_rate": 5.6114772339202194e-05, "loss": 0.003, "step": 195900 }, { "epoch": 23.11821613968853, "grad_norm": 0.01774640753865242, "learning_rate": 5.6096335239272567e-05, "loss": 0.0032, "step": 195950 }, { "epoch": 23.124115148655026, "grad_norm": 0.08251269906759262, "learning_rate": 5.607789729787295e-05, "loss": 0.0029, "step": 196000 }, { "epoch": 23.124115148655026, "eval_cer": 0.08347676419965576, "eval_loss": 0.0002414114132989198, "eval_runtime": 2.0428, "eval_samples_per_second": 48.953, "eval_steps_per_second": 1.958, "eval_wer": 0.26, "step": 196000 }, { "epoch": 23.13001415762152, "grad_norm": 0.06552862375974655, "learning_rate": 5.6059458517548304e-05, "loss": 0.0029, "step": 196050 }, { "epoch": 23.135913166588015, "grad_norm": 0.06776441633701324, "learning_rate": 5.6041018900843714e-05, "loss": 0.0032, "step": 196100 }, { "epoch": 23.141812175554506, "grad_norm": 0.05556147173047066, "learning_rate": 5.6022578450304386e-05, "loss": 0.003, "step": 196150 }, { "epoch": 23.147711184521, "grad_norm": 0.15237092971801758, "learning_rate": 5.6004137168475635e-05, "loss": 0.0036, "step": 196200 }, { "epoch": 23.153610193487495, "grad_norm": 0.053988274186849594, "learning_rate": 5.598569505790289e-05, "loss": 0.0032, "step": 196250 }, { "epoch": 23.15950920245399, "grad_norm": 0.044004060328006744, "learning_rate": 5.5967252121131665e-05, "loss": 0.0037, "step": 196300 }, { "epoch": 23.16540821142048, "grad_norm": 0.0094103142619133, "learning_rate": 5.594880836070765e-05, "loss": 0.0031, "step": 196350 }, { "epoch": 23.171307220386975, "grad_norm": 0.007246033754199743, "learning_rate": 5.593036377917661e-05, "loss": 0.003, "step": 196400 }, { "epoch": 23.17720622935347, "grad_norm": 0.06562285125255585, "learning_rate": 5.591191837908442e-05, "loss": 0.0032, "step": 196450 }, { "epoch": 23.18310523831996, "grad_norm": 0.16020122170448303, "learning_rate": 5.589347216297708e-05, "loss": 0.0026, "step": 196500 }, { "epoch": 23.189004247286455, "grad_norm": 0.07760694622993469, "learning_rate": 5.587502513340071e-05, "loss": 0.0035, "step": 196550 }, { "epoch": 23.19490325625295, "grad_norm": 0.03209671750664711, "learning_rate": 5.585657729290151e-05, "loss": 0.0029, "step": 196600 }, { "epoch": 23.200802265219444, "grad_norm": 0.013764386996626854, "learning_rate": 5.583812864402584e-05, "loss": 0.0023, "step": 196650 }, { "epoch": 23.206701274185935, "grad_norm": 0.18956199288368225, "learning_rate": 5.581967918932013e-05, "loss": 0.0028, "step": 196700 }, { "epoch": 23.21260028315243, "grad_norm": 0.4848942756652832, "learning_rate": 5.580122893133092e-05, "loss": 0.0042, "step": 196750 }, { "epoch": 23.218499292118924, "grad_norm": 0.053302135318517685, "learning_rate": 5.578277787260491e-05, "loss": 0.0029, "step": 196800 }, { "epoch": 23.22439830108542, "grad_norm": 0.007766781374812126, "learning_rate": 5.576432601568886e-05, "loss": 0.0027, "step": 196850 }, { "epoch": 23.23029731005191, "grad_norm": 0.10014113038778305, "learning_rate": 5.574587336312966e-05, "loss": 0.0031, "step": 196900 }, { "epoch": 23.236196319018404, "grad_norm": 0.003377842716872692, "learning_rate": 5.5727419917474324e-05, "loss": 0.002, "step": 196950 }, { "epoch": 23.2420953279849, "grad_norm": 0.08878155052661896, "learning_rate": 5.570896568126993e-05, "loss": 0.0027, "step": 197000 }, { "epoch": 23.2420953279849, "eval_cer": 0.08347676419965576, "eval_loss": 0.0004516037297435105, "eval_runtime": 2.0211, "eval_samples_per_second": 49.477, "eval_steps_per_second": 1.979, "eval_wer": 0.26, "step": 197000 }, { "epoch": 23.247994336951393, "grad_norm": 0.04431626945734024, "learning_rate": 5.569051065706371e-05, "loss": 0.0022, "step": 197050 }, { "epoch": 23.253893345917884, "grad_norm": 0.11699254810810089, "learning_rate": 5.5672054847402985e-05, "loss": 0.0032, "step": 197100 }, { "epoch": 23.25979235488438, "grad_norm": 0.032591983675956726, "learning_rate": 5.5653598254835195e-05, "loss": 0.0035, "step": 197150 }, { "epoch": 23.265691363850873, "grad_norm": 0.004434666130691767, "learning_rate": 5.563514088190789e-05, "loss": 0.0033, "step": 197200 }, { "epoch": 23.271590372817368, "grad_norm": 0.18141908943653107, "learning_rate": 5.56166827311687e-05, "loss": 0.0042, "step": 197250 }, { "epoch": 23.27748938178386, "grad_norm": 0.02350231632590294, "learning_rate": 5.559822380516539e-05, "loss": 0.0031, "step": 197300 }, { "epoch": 23.283388390750353, "grad_norm": 0.024483580142259598, "learning_rate": 5.557976410644584e-05, "loss": 0.0032, "step": 197350 }, { "epoch": 23.289287399716848, "grad_norm": 0.08420155942440033, "learning_rate": 5.556130363755798e-05, "loss": 0.0031, "step": 197400 }, { "epoch": 23.295186408683342, "grad_norm": 0.0033702701330184937, "learning_rate": 5.5542842401049935e-05, "loss": 0.0029, "step": 197450 }, { "epoch": 23.301085417649833, "grad_norm": 0.05848539248108864, "learning_rate": 5.552438039946986e-05, "loss": 0.0029, "step": 197500 }, { "epoch": 23.306984426616328, "grad_norm": 0.058722443878650665, "learning_rate": 5.5505917635366034e-05, "loss": 0.0031, "step": 197550 }, { "epoch": 23.312883435582823, "grad_norm": 0.009443257004022598, "learning_rate": 5.548745411128688e-05, "loss": 0.0025, "step": 197600 }, { "epoch": 23.318782444549317, "grad_norm": 0.20155897736549377, "learning_rate": 5.546898982978088e-05, "loss": 0.0026, "step": 197650 }, { "epoch": 23.324681453515808, "grad_norm": 0.16267739236354828, "learning_rate": 5.545052479339662e-05, "loss": 0.0025, "step": 197700 }, { "epoch": 23.330580462482303, "grad_norm": 0.14720699191093445, "learning_rate": 5.543205900468285e-05, "loss": 0.0024, "step": 197750 }, { "epoch": 23.336479471448797, "grad_norm": 0.008774938061833382, "learning_rate": 5.5413592466188346e-05, "loss": 0.003, "step": 197800 }, { "epoch": 23.34237848041529, "grad_norm": 0.03996730223298073, "learning_rate": 5.539512518046204e-05, "loss": 0.0032, "step": 197850 }, { "epoch": 23.348277489381783, "grad_norm": 0.08563396334648132, "learning_rate": 5.5376657150052934e-05, "loss": 0.0036, "step": 197900 }, { "epoch": 23.354176498348277, "grad_norm": 0.04480614885687828, "learning_rate": 5.5358188377510145e-05, "loss": 0.0034, "step": 197950 }, { "epoch": 23.36007550731477, "grad_norm": 0.0571649894118309, "learning_rate": 5.533971886538293e-05, "loss": 0.0026, "step": 198000 }, { "epoch": 23.36007550731477, "eval_cer": 0.08605851979345955, "eval_loss": 0.0006117118173278868, "eval_runtime": 2.0327, "eval_samples_per_second": 49.195, "eval_steps_per_second": 1.968, "eval_wer": 0.27, "step": 198000 }, { "epoch": 23.365974516281266, "grad_norm": 0.37163904309272766, "learning_rate": 5.532124861622059e-05, "loss": 0.0041, "step": 198050 }, { "epoch": 23.371873525247757, "grad_norm": 0.0952502191066742, "learning_rate": 5.5302777632572545e-05, "loss": 0.0024, "step": 198100 }, { "epoch": 23.37777253421425, "grad_norm": 0.06294964998960495, "learning_rate": 5.5284305916988344e-05, "loss": 0.0029, "step": 198150 }, { "epoch": 23.383671543180746, "grad_norm": 0.018439728766679764, "learning_rate": 5.5265833472017606e-05, "loss": 0.0027, "step": 198200 }, { "epoch": 23.38957055214724, "grad_norm": 0.3169838488101959, "learning_rate": 5.524736030021005e-05, "loss": 0.0021, "step": 198250 }, { "epoch": 23.39546956111373, "grad_norm": 0.0716521218419075, "learning_rate": 5.522888640411552e-05, "loss": 0.0027, "step": 198300 }, { "epoch": 23.401368570080226, "grad_norm": 0.18404696881771088, "learning_rate": 5.521041178628395e-05, "loss": 0.0032, "step": 198350 }, { "epoch": 23.40726757904672, "grad_norm": 0.005878578405827284, "learning_rate": 5.519193644926535e-05, "loss": 0.0034, "step": 198400 }, { "epoch": 23.413166588013215, "grad_norm": 0.14097696542739868, "learning_rate": 5.5173460395609855e-05, "loss": 0.003, "step": 198450 }, { "epoch": 23.419065596979706, "grad_norm": 0.07309971004724503, "learning_rate": 5.5154983627867715e-05, "loss": 0.0025, "step": 198500 }, { "epoch": 23.4249646059462, "grad_norm": 0.020277317613363266, "learning_rate": 5.513650614858924e-05, "loss": 0.0034, "step": 198550 }, { "epoch": 23.430863614912695, "grad_norm": 0.017640884965658188, "learning_rate": 5.511802796032485e-05, "loss": 0.0027, "step": 198600 }, { "epoch": 23.43676262387919, "grad_norm": 0.14352624118328094, "learning_rate": 5.509954906562508e-05, "loss": 0.0027, "step": 198650 }, { "epoch": 23.44266163284568, "grad_norm": 0.10621476918458939, "learning_rate": 5.508106946704052e-05, "loss": 0.0025, "step": 198700 }, { "epoch": 23.448560641812175, "grad_norm": 0.04775317758321762, "learning_rate": 5.506258916712194e-05, "loss": 0.002, "step": 198750 }, { "epoch": 23.45445965077867, "grad_norm": 0.0255584679543972, "learning_rate": 5.504410816842009e-05, "loss": 0.0023, "step": 198800 }, { "epoch": 23.460358659745165, "grad_norm": 0.02905711531639099, "learning_rate": 5.5025626473485944e-05, "loss": 0.0034, "step": 198850 }, { "epoch": 23.466257668711656, "grad_norm": 0.19963550567626953, "learning_rate": 5.5007144084870476e-05, "loss": 0.0033, "step": 198900 }, { "epoch": 23.47215667767815, "grad_norm": 0.015875406563282013, "learning_rate": 5.49886610051248e-05, "loss": 0.0029, "step": 198950 }, { "epoch": 23.478055686644645, "grad_norm": 0.011751501820981503, "learning_rate": 5.497017723680009e-05, "loss": 0.0026, "step": 199000 }, { "epoch": 23.478055686644645, "eval_cer": 0.08605851979345955, "eval_loss": 0.0004534620384220034, "eval_runtime": 2.0208, "eval_samples_per_second": 49.487, "eval_steps_per_second": 1.979, "eval_wer": 0.27, "step": 199000 }, { "epoch": 23.48395469561114, "grad_norm": 0.06155671924352646, "learning_rate": 5.495169278244765e-05, "loss": 0.0036, "step": 199050 }, { "epoch": 23.48985370457763, "grad_norm": 0.13998739421367645, "learning_rate": 5.493320764461888e-05, "loss": 0.0038, "step": 199100 }, { "epoch": 23.495752713544125, "grad_norm": 0.08901200443506241, "learning_rate": 5.491472182586526e-05, "loss": 0.003, "step": 199150 }, { "epoch": 23.50165172251062, "grad_norm": 0.13997782766819, "learning_rate": 5.489623532873835e-05, "loss": 0.0028, "step": 199200 }, { "epoch": 23.50755073147711, "grad_norm": 0.3996601700782776, "learning_rate": 5.4877748155789846e-05, "loss": 0.0041, "step": 199250 }, { "epoch": 23.513449740443605, "grad_norm": 0.1721712350845337, "learning_rate": 5.485926030957148e-05, "loss": 0.0035, "step": 199300 }, { "epoch": 23.5193487494101, "grad_norm": 0.09606218338012695, "learning_rate": 5.4840771792635115e-05, "loss": 0.0028, "step": 199350 }, { "epoch": 23.525247758376594, "grad_norm": 0.033264122903347015, "learning_rate": 5.4822282607532724e-05, "loss": 0.0033, "step": 199400 }, { "epoch": 23.531146767343085, "grad_norm": 0.05228656902909279, "learning_rate": 5.480379275681632e-05, "loss": 0.0028, "step": 199450 }, { "epoch": 23.53704577630958, "grad_norm": 0.18565037846565247, "learning_rate": 5.478530224303805e-05, "loss": 0.0037, "step": 199500 }, { "epoch": 23.542944785276074, "grad_norm": 0.299042284488678, "learning_rate": 5.4766811068750136e-05, "loss": 0.0033, "step": 199550 }, { "epoch": 23.54884379424257, "grad_norm": 0.2183542400598526, "learning_rate": 5.474831923650488e-05, "loss": 0.0033, "step": 199600 }, { "epoch": 23.55474280320906, "grad_norm": 0.05773425102233887, "learning_rate": 5.47298267488547e-05, "loss": 0.0025, "step": 199650 }, { "epoch": 23.560641812175554, "grad_norm": 0.013451571576297283, "learning_rate": 5.471133360835209e-05, "loss": 0.0031, "step": 199700 }, { "epoch": 23.56654082114205, "grad_norm": 0.05438726395368576, "learning_rate": 5.469283981754963e-05, "loss": 0.0028, "step": 199750 }, { "epoch": 23.572439830108543, "grad_norm": 0.14241202175617218, "learning_rate": 5.4674345379e-05, "loss": 0.0029, "step": 199800 }, { "epoch": 23.578338839075034, "grad_norm": 0.08878426253795624, "learning_rate": 5.465585029525598e-05, "loss": 0.0028, "step": 199850 }, { "epoch": 23.58423784804153, "grad_norm": 0.13539588451385498, "learning_rate": 5.4637354568870425e-05, "loss": 0.0033, "step": 199900 }, { "epoch": 23.590136857008023, "grad_norm": 0.3595251441001892, "learning_rate": 5.461885820239625e-05, "loss": 0.0043, "step": 199950 }, { "epoch": 23.596035865974518, "grad_norm": 0.19365359842777252, "learning_rate": 5.460036119838651e-05, "loss": 0.0027, "step": 200000 }, { "epoch": 23.596035865974518, "eval_cer": 0.08605851979345955, "eval_loss": 0.0008891906472854316, "eval_runtime": 2.0364, "eval_samples_per_second": 49.106, "eval_steps_per_second": 1.964, "eval_wer": 0.27, "step": 200000 }, { "epoch": 23.60193487494101, "grad_norm": 0.1163308173418045, "learning_rate": 5.4581863559394306e-05, "loss": 0.0033, "step": 200050 }, { "epoch": 23.607833883907503, "grad_norm": 0.03210970014333725, "learning_rate": 5.456336528797287e-05, "loss": 0.0027, "step": 200100 }, { "epoch": 23.613732892873998, "grad_norm": 0.017176097258925438, "learning_rate": 5.454486638667549e-05, "loss": 0.0027, "step": 200150 }, { "epoch": 23.619631901840492, "grad_norm": 0.005497022066265345, "learning_rate": 5.452636685805552e-05, "loss": 0.0032, "step": 200200 }, { "epoch": 23.625530910806983, "grad_norm": 0.017843738198280334, "learning_rate": 5.4507866704666476e-05, "loss": 0.0034, "step": 200250 }, { "epoch": 23.631429919773478, "grad_norm": 0.07380428165197372, "learning_rate": 5.448936592906187e-05, "loss": 0.0033, "step": 200300 }, { "epoch": 23.637328928739972, "grad_norm": 0.043293412774801254, "learning_rate": 5.4470864533795354e-05, "loss": 0.0031, "step": 200350 }, { "epoch": 23.643227937706467, "grad_norm": 0.005087992176413536, "learning_rate": 5.4452362521420665e-05, "loss": 0.0041, "step": 200400 }, { "epoch": 23.649126946672958, "grad_norm": 0.03503116965293884, "learning_rate": 5.44338598944916e-05, "loss": 0.0038, "step": 200450 }, { "epoch": 23.655025955639452, "grad_norm": 0.1606510430574417, "learning_rate": 5.441535665556208e-05, "loss": 0.0029, "step": 200500 }, { "epoch": 23.660924964605947, "grad_norm": 0.11982616782188416, "learning_rate": 5.439685280718605e-05, "loss": 0.0036, "step": 200550 }, { "epoch": 23.66682397357244, "grad_norm": 0.1502465009689331, "learning_rate": 5.4378348351917604e-05, "loss": 0.0034, "step": 200600 }, { "epoch": 23.672722982538932, "grad_norm": 0.012417818419635296, "learning_rate": 5.435984329231087e-05, "loss": 0.0033, "step": 200650 }, { "epoch": 23.678621991505427, "grad_norm": 0.021501583978533745, "learning_rate": 5.4341337630920096e-05, "loss": 0.0028, "step": 200700 }, { "epoch": 23.68452100047192, "grad_norm": 0.12094459682703018, "learning_rate": 5.432283137029959e-05, "loss": 0.0024, "step": 200750 }, { "epoch": 23.690420009438416, "grad_norm": 0.3806636333465576, "learning_rate": 5.4304324513003736e-05, "loss": 0.0024, "step": 200800 }, { "epoch": 23.696319018404907, "grad_norm": 0.1833769977092743, "learning_rate": 5.428581706158704e-05, "loss": 0.0037, "step": 200850 }, { "epoch": 23.7022180273714, "grad_norm": 0.03089764341711998, "learning_rate": 5.426730901860404e-05, "loss": 0.0036, "step": 200900 }, { "epoch": 23.708117036337896, "grad_norm": 0.17324885725975037, "learning_rate": 5.42488003866094e-05, "loss": 0.0037, "step": 200950 }, { "epoch": 23.71401604530439, "grad_norm": 0.00802326388657093, "learning_rate": 5.423029116815782e-05, "loss": 0.0035, "step": 201000 }, { "epoch": 23.71401604530439, "eval_cer": 0.08605851979345955, "eval_loss": 0.0005757386097684503, "eval_runtime": 2.0859, "eval_samples_per_second": 47.941, "eval_steps_per_second": 1.918, "eval_wer": 0.27, "step": 201000 }, { "epoch": 23.71991505427088, "grad_norm": 0.05134508013725281, "learning_rate": 5.4211781365804124e-05, "loss": 0.0032, "step": 201050 }, { "epoch": 23.725814063237376, "grad_norm": 0.03744105249643326, "learning_rate": 5.419327098210319e-05, "loss": 0.0025, "step": 201100 }, { "epoch": 23.73171307220387, "grad_norm": 0.0727916955947876, "learning_rate": 5.4174760019610004e-05, "loss": 0.0032, "step": 201150 }, { "epoch": 23.737612081170365, "grad_norm": 0.1880478411912918, "learning_rate": 5.415624848087959e-05, "loss": 0.003, "step": 201200 }, { "epoch": 23.743511090136856, "grad_norm": 0.12050607055425644, "learning_rate": 5.4137736368467084e-05, "loss": 0.004, "step": 201250 }, { "epoch": 23.74941009910335, "grad_norm": 0.038625605404376984, "learning_rate": 5.4119223684927665e-05, "loss": 0.0036, "step": 201300 }, { "epoch": 23.755309108069845, "grad_norm": 0.11252900213003159, "learning_rate": 5.410071043281666e-05, "loss": 0.0029, "step": 201350 }, { "epoch": 23.76120811703634, "grad_norm": 0.18178242444992065, "learning_rate": 5.4082196614689395e-05, "loss": 0.0036, "step": 201400 }, { "epoch": 23.76710712600283, "grad_norm": 0.028797106817364693, "learning_rate": 5.4063682233101346e-05, "loss": 0.0033, "step": 201450 }, { "epoch": 23.773006134969325, "grad_norm": 0.11591297388076782, "learning_rate": 5.404516729060801e-05, "loss": 0.0039, "step": 201500 }, { "epoch": 23.77890514393582, "grad_norm": 0.04839365929365158, "learning_rate": 5.4026651789764984e-05, "loss": 0.0037, "step": 201550 }, { "epoch": 23.78480415290231, "grad_norm": 0.03665119782090187, "learning_rate": 5.400813573312794e-05, "loss": 0.0041, "step": 201600 }, { "epoch": 23.790703161868805, "grad_norm": 0.038180433213710785, "learning_rate": 5.398961912325262e-05, "loss": 0.004, "step": 201650 }, { "epoch": 23.7966021708353, "grad_norm": 0.006158416159451008, "learning_rate": 5.397110196269486e-05, "loss": 0.0029, "step": 201700 }, { "epoch": 23.802501179801794, "grad_norm": 0.02091432921588421, "learning_rate": 5.395258425401058e-05, "loss": 0.0031, "step": 201750 }, { "epoch": 23.808400188768285, "grad_norm": 0.24011367559432983, "learning_rate": 5.3934065999755725e-05, "loss": 0.0023, "step": 201800 }, { "epoch": 23.81429919773478, "grad_norm": 0.12114392220973969, "learning_rate": 5.391554720248636e-05, "loss": 0.004, "step": 201850 }, { "epoch": 23.820198206701274, "grad_norm": 0.021701307967305183, "learning_rate": 5.389702786475862e-05, "loss": 0.0028, "step": 201900 }, { "epoch": 23.82609721566777, "grad_norm": 0.029615895822644234, "learning_rate": 5.38785079891287e-05, "loss": 0.0027, "step": 201950 }, { "epoch": 23.83199622463426, "grad_norm": 0.12490930408239365, "learning_rate": 5.385998757815287e-05, "loss": 0.0033, "step": 202000 }, { "epoch": 23.83199622463426, "eval_cer": 0.08605851979345955, "eval_loss": 0.0005886023282073438, "eval_runtime": 2.0233, "eval_samples_per_second": 49.425, "eval_steps_per_second": 1.977, "eval_wer": 0.27, "step": 202000 }, { "epoch": 23.837895233600754, "grad_norm": 0.12295976281166077, "learning_rate": 5.38414666343875e-05, "loss": 0.0037, "step": 202050 }, { "epoch": 23.84379424256725, "grad_norm": 0.14990738034248352, "learning_rate": 5.3822945160388994e-05, "loss": 0.0038, "step": 202100 }, { "epoch": 23.849693251533743, "grad_norm": 0.20847997069358826, "learning_rate": 5.3804423158713855e-05, "loss": 0.0027, "step": 202150 }, { "epoch": 23.855592260500234, "grad_norm": 0.11537410318851471, "learning_rate": 5.3785900631918674e-05, "loss": 0.0026, "step": 202200 }, { "epoch": 23.86149126946673, "grad_norm": 0.03721575438976288, "learning_rate": 5.3767377582560076e-05, "loss": 0.0032, "step": 202250 }, { "epoch": 23.867390278433223, "grad_norm": 0.099583700299263, "learning_rate": 5.374885401319476e-05, "loss": 0.003, "step": 202300 }, { "epoch": 23.873289287399718, "grad_norm": 0.057808320969343185, "learning_rate": 5.373032992637953e-05, "loss": 0.0034, "step": 202350 }, { "epoch": 23.87918829636621, "grad_norm": 0.037890829145908356, "learning_rate": 5.371180532467125e-05, "loss": 0.004, "step": 202400 }, { "epoch": 23.885087305332704, "grad_norm": 0.015763631090521812, "learning_rate": 5.369328021062684e-05, "loss": 0.0024, "step": 202450 }, { "epoch": 23.890986314299198, "grad_norm": 0.07986859232187271, "learning_rate": 5.367475458680331e-05, "loss": 0.0039, "step": 202500 }, { "epoch": 23.896885323265693, "grad_norm": 0.05837564170360565, "learning_rate": 5.365622845575772e-05, "loss": 0.0031, "step": 202550 }, { "epoch": 23.902784332232184, "grad_norm": 0.1258251816034317, "learning_rate": 5.36377018200472e-05, "loss": 0.0033, "step": 202600 }, { "epoch": 23.908683341198678, "grad_norm": 0.05018511041998863, "learning_rate": 5.361917468222897e-05, "loss": 0.0022, "step": 202650 }, { "epoch": 23.914582350165173, "grad_norm": 0.09331151843070984, "learning_rate": 5.3600647044860307e-05, "loss": 0.0024, "step": 202700 }, { "epoch": 23.920481359131667, "grad_norm": 0.008691655471920967, "learning_rate": 5.358211891049858e-05, "loss": 0.0033, "step": 202750 }, { "epoch": 23.926380368098158, "grad_norm": 0.177779883146286, "learning_rate": 5.356359028170118e-05, "loss": 0.0028, "step": 202800 }, { "epoch": 23.932279377064653, "grad_norm": 0.1435641646385193, "learning_rate": 5.35450611610256e-05, "loss": 0.0042, "step": 202850 }, { "epoch": 23.938178386031147, "grad_norm": 0.06953854858875275, "learning_rate": 5.352653155102939e-05, "loss": 0.0046, "step": 202900 }, { "epoch": 23.94407739499764, "grad_norm": 0.22262148559093475, "learning_rate": 5.3508001454270166e-05, "loss": 0.0039, "step": 202950 }, { "epoch": 23.949976403964133, "grad_norm": 0.1051403060555458, "learning_rate": 5.3489470873305634e-05, "loss": 0.0034, "step": 203000 }, { "epoch": 23.949976403964133, "eval_cer": 0.08605851979345955, "eval_loss": 0.0009812379721552134, "eval_runtime": 2.0161, "eval_samples_per_second": 49.6, "eval_steps_per_second": 1.984, "eval_wer": 0.27, "step": 203000 }, { "epoch": 23.955875412930627, "grad_norm": 0.02251409739255905, "learning_rate": 5.347093981069352e-05, "loss": 0.0032, "step": 203050 }, { "epoch": 23.961774421897122, "grad_norm": 0.07678020000457764, "learning_rate": 5.345240826899168e-05, "loss": 0.0033, "step": 203100 }, { "epoch": 23.967673430863616, "grad_norm": 0.03280920535326004, "learning_rate": 5.3433876250757976e-05, "loss": 0.0035, "step": 203150 }, { "epoch": 23.973572439830107, "grad_norm": 0.0705048069357872, "learning_rate": 5.3415343758550375e-05, "loss": 0.0038, "step": 203200 }, { "epoch": 23.979471448796602, "grad_norm": 0.0981299951672554, "learning_rate": 5.339681079492688e-05, "loss": 0.0038, "step": 203250 }, { "epoch": 23.985370457763096, "grad_norm": 0.009632621891796589, "learning_rate": 5.337827736244558e-05, "loss": 0.0029, "step": 203300 }, { "epoch": 23.99126946672959, "grad_norm": 0.06139414384961128, "learning_rate": 5.335974346366461e-05, "loss": 0.0026, "step": 203350 }, { "epoch": 23.997168475696082, "grad_norm": 0.17845702171325684, "learning_rate": 5.334120910114222e-05, "loss": 0.0026, "step": 203400 }, { "epoch": 24.003067484662576, "grad_norm": 0.0036625356879085302, "learning_rate": 5.3322674277436656e-05, "loss": 0.0033, "step": 203450 }, { "epoch": 24.00896649362907, "grad_norm": 0.04109852388501167, "learning_rate": 5.330413899510627e-05, "loss": 0.0038, "step": 203500 }, { "epoch": 24.014865502595566, "grad_norm": 0.2499050796031952, "learning_rate": 5.328560325670945e-05, "loss": 0.0026, "step": 203550 }, { "epoch": 24.020764511562056, "grad_norm": 0.02510845474898815, "learning_rate": 5.3267067064804675e-05, "loss": 0.0028, "step": 203600 }, { "epoch": 24.02666352052855, "grad_norm": 0.04259128496050835, "learning_rate": 5.324853042195046e-05, "loss": 0.0037, "step": 203650 }, { "epoch": 24.032562529495046, "grad_norm": 0.15995575487613678, "learning_rate": 5.322999333070542e-05, "loss": 0.0023, "step": 203700 }, { "epoch": 24.03846153846154, "grad_norm": 0.04231133684515953, "learning_rate": 5.3211455793628185e-05, "loss": 0.0032, "step": 203750 }, { "epoch": 24.04436054742803, "grad_norm": 0.06766333431005478, "learning_rate": 5.319291781327749e-05, "loss": 0.0027, "step": 203800 }, { "epoch": 24.050259556394526, "grad_norm": 0.007139753084629774, "learning_rate": 5.31743793922121e-05, "loss": 0.0026, "step": 203850 }, { "epoch": 24.05615856536102, "grad_norm": 0.026096461340785027, "learning_rate": 5.3155840532990853e-05, "loss": 0.0029, "step": 203900 }, { "epoch": 24.062057574327515, "grad_norm": 0.0016816123388707638, "learning_rate": 5.313730123817264e-05, "loss": 0.0021, "step": 203950 }, { "epoch": 24.067956583294006, "grad_norm": 0.12096003443002701, "learning_rate": 5.3118761510316416e-05, "loss": 0.0034, "step": 204000 }, { "epoch": 24.067956583294006, "eval_cer": 0.08605851979345955, "eval_loss": 0.0007893825531937182, "eval_runtime": 2.1165, "eval_samples_per_second": 47.249, "eval_steps_per_second": 1.89, "eval_wer": 0.27, "step": 204000 }, { "epoch": 24.0738555922605, "grad_norm": 0.10171893239021301, "learning_rate": 5.3100221351981215e-05, "loss": 0.0022, "step": 204050 }, { "epoch": 24.079754601226995, "grad_norm": 0.08491843938827515, "learning_rate": 5.308168076572611e-05, "loss": 0.0026, "step": 204100 }, { "epoch": 24.08565361019349, "grad_norm": 0.02576964721083641, "learning_rate": 5.306313975411021e-05, "loss": 0.0029, "step": 204150 }, { "epoch": 24.09155261915998, "grad_norm": 0.006952292751520872, "learning_rate": 5.304459831969274e-05, "loss": 0.0023, "step": 204200 }, { "epoch": 24.097451628126475, "grad_norm": 0.13459618389606476, "learning_rate": 5.3026056465032936e-05, "loss": 0.0023, "step": 204250 }, { "epoch": 24.10335063709297, "grad_norm": 0.12841442227363586, "learning_rate": 5.300751419269011e-05, "loss": 0.0027, "step": 204300 }, { "epoch": 24.10924964605946, "grad_norm": 0.2529584765434265, "learning_rate": 5.298897150522364e-05, "loss": 0.0037, "step": 204350 }, { "epoch": 24.115148655025955, "grad_norm": 0.011563803069293499, "learning_rate": 5.297042840519294e-05, "loss": 0.0029, "step": 204400 }, { "epoch": 24.12104766399245, "grad_norm": 0.007979833520948887, "learning_rate": 5.29518848951575e-05, "loss": 0.0031, "step": 204450 }, { "epoch": 24.126946672958944, "grad_norm": 0.12896062433719635, "learning_rate": 5.293334097767686e-05, "loss": 0.0029, "step": 204500 }, { "epoch": 24.132845681925435, "grad_norm": 0.021119337528944016, "learning_rate": 5.29147966553106e-05, "loss": 0.0031, "step": 204550 }, { "epoch": 24.13874469089193, "grad_norm": 0.04443163424730301, "learning_rate": 5.289625193061838e-05, "loss": 0.0039, "step": 204600 }, { "epoch": 24.144643699858424, "grad_norm": 0.03938964381814003, "learning_rate": 5.287770680615992e-05, "loss": 0.0028, "step": 204650 }, { "epoch": 24.15054270882492, "grad_norm": 0.021397512406110764, "learning_rate": 5.285916128449495e-05, "loss": 0.0023, "step": 204700 }, { "epoch": 24.15644171779141, "grad_norm": 0.044538743793964386, "learning_rate": 5.284061536818333e-05, "loss": 0.0024, "step": 204750 }, { "epoch": 24.162340726757904, "grad_norm": 0.11694314330816269, "learning_rate": 5.28220690597849e-05, "loss": 0.003, "step": 204800 }, { "epoch": 24.1682397357244, "grad_norm": 0.04696257784962654, "learning_rate": 5.2803522361859594e-05, "loss": 0.002, "step": 204850 }, { "epoch": 24.174138744690893, "grad_norm": 0.13042058050632477, "learning_rate": 5.2784975276967395e-05, "loss": 0.0042, "step": 204900 }, { "epoch": 24.180037753657384, "grad_norm": 0.012959128245711327, "learning_rate": 5.2766427807668315e-05, "loss": 0.0023, "step": 204950 }, { "epoch": 24.18593676262388, "grad_norm": 0.12593835592269897, "learning_rate": 5.274787995652246e-05, "loss": 0.0029, "step": 205000 }, { "epoch": 24.18593676262388, "eval_cer": 0.08347676419965576, "eval_loss": 0.00043749812175519764, "eval_runtime": 2.1007, "eval_samples_per_second": 47.604, "eval_steps_per_second": 1.904, "eval_wer": 0.26, "step": 205000 }, { "epoch": 24.191835771590373, "grad_norm": 0.07189544290304184, "learning_rate": 5.272933172608997e-05, "loss": 0.004, "step": 205050 }, { "epoch": 24.197734780556868, "grad_norm": 0.0014845463447272778, "learning_rate": 5.2710783118931026e-05, "loss": 0.0044, "step": 205100 }, { "epoch": 24.20363378952336, "grad_norm": 0.08837633579969406, "learning_rate": 5.2692234137605865e-05, "loss": 0.0028, "step": 205150 }, { "epoch": 24.209532798489853, "grad_norm": 0.13134704530239105, "learning_rate": 5.26736847846748e-05, "loss": 0.0027, "step": 205200 }, { "epoch": 24.215431807456348, "grad_norm": 0.010990875773131847, "learning_rate": 5.265513506269816e-05, "loss": 0.003, "step": 205250 }, { "epoch": 24.221330816422842, "grad_norm": 0.026279618963599205, "learning_rate": 5.263658497423634e-05, "loss": 0.0035, "step": 205300 }, { "epoch": 24.227229825389333, "grad_norm": 0.013840099796652794, "learning_rate": 5.26180345218498e-05, "loss": 0.0036, "step": 205350 }, { "epoch": 24.233128834355828, "grad_norm": 0.059979066252708435, "learning_rate": 5.2599483708099016e-05, "loss": 0.0031, "step": 205400 }, { "epoch": 24.239027843322322, "grad_norm": 0.01230631023645401, "learning_rate": 5.258093253554457e-05, "loss": 0.0026, "step": 205450 }, { "epoch": 24.244926852288817, "grad_norm": 0.16945309937000275, "learning_rate": 5.256238100674703e-05, "loss": 0.002, "step": 205500 }, { "epoch": 24.250825861255308, "grad_norm": 0.14043954014778137, "learning_rate": 5.2543829124267054e-05, "loss": 0.0022, "step": 205550 }, { "epoch": 24.256724870221802, "grad_norm": 0.11676882207393646, "learning_rate": 5.252527689066533e-05, "loss": 0.0016, "step": 205600 }, { "epoch": 24.262623879188297, "grad_norm": 0.10012597590684891, "learning_rate": 5.250672430850259e-05, "loss": 0.0034, "step": 205650 }, { "epoch": 24.26852288815479, "grad_norm": 0.008180576376616955, "learning_rate": 5.248817138033966e-05, "loss": 0.003, "step": 205700 }, { "epoch": 24.274421897121282, "grad_norm": 0.28204014897346497, "learning_rate": 5.246961810873734e-05, "loss": 0.0027, "step": 205750 }, { "epoch": 24.280320906087777, "grad_norm": 0.052401892840862274, "learning_rate": 5.2451064496256544e-05, "loss": 0.003, "step": 205800 }, { "epoch": 24.28621991505427, "grad_norm": 0.03566604107618332, "learning_rate": 5.2432510545458193e-05, "loss": 0.0028, "step": 205850 }, { "epoch": 24.292118924020766, "grad_norm": 0.087832510471344, "learning_rate": 5.2413956258903274e-05, "loss": 0.0031, "step": 205900 }, { "epoch": 24.298017932987257, "grad_norm": 0.3998783230781555, "learning_rate": 5.23954016391528e-05, "loss": 0.0033, "step": 205950 }, { "epoch": 24.30391694195375, "grad_norm": 0.1355237066745758, "learning_rate": 5.2376846688767845e-05, "loss": 0.0021, "step": 206000 }, { "epoch": 24.30391694195375, "eval_cer": 0.08433734939759036, "eval_loss": 0.0013563546817749739, "eval_runtime": 2.0773, "eval_samples_per_second": 48.14, "eval_steps_per_second": 1.926, "eval_wer": 0.27, "step": 206000 }, { "epoch": 24.309815950920246, "grad_norm": 0.22981946170330048, "learning_rate": 5.235829141030955e-05, "loss": 0.0035, "step": 206050 }, { "epoch": 24.31571495988674, "grad_norm": 0.09547362476587296, "learning_rate": 5.233973580633905e-05, "loss": 0.0023, "step": 206100 }, { "epoch": 24.32161396885323, "grad_norm": 0.03619932383298874, "learning_rate": 5.2321179879417584e-05, "loss": 0.0021, "step": 206150 }, { "epoch": 24.327512977819726, "grad_norm": 0.21163016557693481, "learning_rate": 5.230262363210637e-05, "loss": 0.0019, "step": 206200 }, { "epoch": 24.33341198678622, "grad_norm": 0.16215068101882935, "learning_rate": 5.2284067066966716e-05, "loss": 0.0028, "step": 206250 }, { "epoch": 24.339310995752715, "grad_norm": 0.18009810149669647, "learning_rate": 5.226551018655997e-05, "loss": 0.0025, "step": 206300 }, { "epoch": 24.345210004719206, "grad_norm": 0.0018182552885264158, "learning_rate": 5.224695299344752e-05, "loss": 0.0019, "step": 206350 }, { "epoch": 24.3511090136857, "grad_norm": 0.03741103783249855, "learning_rate": 5.222839549019078e-05, "loss": 0.0023, "step": 206400 }, { "epoch": 24.357008022652195, "grad_norm": 0.11259844899177551, "learning_rate": 5.220983767935124e-05, "loss": 0.0025, "step": 206450 }, { "epoch": 24.36290703161869, "grad_norm": 0.10819394141435623, "learning_rate": 5.21912795634904e-05, "loss": 0.003, "step": 206500 }, { "epoch": 24.36880604058518, "grad_norm": 0.005127080716192722, "learning_rate": 5.217272114516981e-05, "loss": 0.0033, "step": 206550 }, { "epoch": 24.374705049551675, "grad_norm": 0.14595754444599152, "learning_rate": 5.215416242695108e-05, "loss": 0.0039, "step": 206600 }, { "epoch": 24.38060405851817, "grad_norm": 0.16843101382255554, "learning_rate": 5.213560341139583e-05, "loss": 0.0028, "step": 206650 }, { "epoch": 24.38650306748466, "grad_norm": 0.008403075858950615, "learning_rate": 5.211704410106576e-05, "loss": 0.0033, "step": 206700 }, { "epoch": 24.392402076451155, "grad_norm": 0.052637021988630295, "learning_rate": 5.209848449852258e-05, "loss": 0.0036, "step": 206750 }, { "epoch": 24.39830108541765, "grad_norm": 0.06291588395833969, "learning_rate": 5.2079924606328045e-05, "loss": 0.0027, "step": 206800 }, { "epoch": 24.404200094384144, "grad_norm": 0.13094748556613922, "learning_rate": 5.206136442704397e-05, "loss": 0.0038, "step": 206850 }, { "epoch": 24.410099103350635, "grad_norm": 0.011821294203400612, "learning_rate": 5.204280396323217e-05, "loss": 0.002, "step": 206900 }, { "epoch": 24.41599811231713, "grad_norm": 0.05404631048440933, "learning_rate": 5.202424321745454e-05, "loss": 0.0022, "step": 206950 }, { "epoch": 24.421897121283624, "grad_norm": 0.037869781255722046, "learning_rate": 5.2005682192272996e-05, "loss": 0.0025, "step": 207000 }, { "epoch": 24.421897121283624, "eval_cer": 0.08347676419965576, "eval_loss": 0.0007178888190537691, "eval_runtime": 2.0547, "eval_samples_per_second": 48.668, "eval_steps_per_second": 1.947, "eval_wer": 0.26, "step": 207000 }, { "epoch": 24.42779613025012, "grad_norm": 0.06147170811891556, "learning_rate": 5.1987120890249494e-05, "loss": 0.0034, "step": 207050 }, { "epoch": 24.43369513921661, "grad_norm": 0.04163504019379616, "learning_rate": 5.1968559313946044e-05, "loss": 0.0027, "step": 207100 }, { "epoch": 24.439594148183104, "grad_norm": 0.02711188793182373, "learning_rate": 5.194999746592465e-05, "loss": 0.0022, "step": 207150 }, { "epoch": 24.4454931571496, "grad_norm": 0.1841249316930771, "learning_rate": 5.19314353487474e-05, "loss": 0.0027, "step": 207200 }, { "epoch": 24.451392166116094, "grad_norm": 0.012637226842343807, "learning_rate": 5.191287296497641e-05, "loss": 0.0027, "step": 207250 }, { "epoch": 24.457291175082585, "grad_norm": 0.07444628328084946, "learning_rate": 5.189431031717379e-05, "loss": 0.0028, "step": 207300 }, { "epoch": 24.46319018404908, "grad_norm": 0.017362548038363457, "learning_rate": 5.1875747407901764e-05, "loss": 0.004, "step": 207350 }, { "epoch": 24.469089193015574, "grad_norm": 0.04160372167825699, "learning_rate": 5.185718423972251e-05, "loss": 0.0029, "step": 207400 }, { "epoch": 24.474988201982068, "grad_norm": 0.24139244854450226, "learning_rate": 5.1838620815198315e-05, "loss": 0.0032, "step": 207450 }, { "epoch": 24.48088721094856, "grad_norm": 0.014558755792677402, "learning_rate": 5.1820057136891445e-05, "loss": 0.0031, "step": 207500 }, { "epoch": 24.486786219915054, "grad_norm": 0.1111568734049797, "learning_rate": 5.180149320736422e-05, "loss": 0.0028, "step": 207550 }, { "epoch": 24.492685228881548, "grad_norm": 0.022344619035720825, "learning_rate": 5.1782929029178985e-05, "loss": 0.0029, "step": 207600 }, { "epoch": 24.498584237848043, "grad_norm": 0.020452134311199188, "learning_rate": 5.1764364604898174e-05, "loss": 0.0031, "step": 207650 }, { "epoch": 24.504483246814534, "grad_norm": 0.02738315984606743, "learning_rate": 5.1745799937084183e-05, "loss": 0.0027, "step": 207700 }, { "epoch": 24.51038225578103, "grad_norm": 0.08339492976665497, "learning_rate": 5.172723502829948e-05, "loss": 0.0025, "step": 207750 }, { "epoch": 24.516281264747523, "grad_norm": 0.040187012404203415, "learning_rate": 5.1708669881106566e-05, "loss": 0.0039, "step": 207800 }, { "epoch": 24.522180273714017, "grad_norm": 0.08126626163721085, "learning_rate": 5.1690104498067935e-05, "loss": 0.0033, "step": 207850 }, { "epoch": 24.52807928268051, "grad_norm": 0.006419636774808168, "learning_rate": 5.167153888174617e-05, "loss": 0.0031, "step": 207900 }, { "epoch": 24.533978291647003, "grad_norm": 0.032581787556409836, "learning_rate": 5.165297303470387e-05, "loss": 0.0025, "step": 207950 }, { "epoch": 24.539877300613497, "grad_norm": 0.10651110112667084, "learning_rate": 5.163440695950362e-05, "loss": 0.0035, "step": 208000 }, { "epoch": 24.539877300613497, "eval_cer": 0.08519793459552495, "eval_loss": 0.0007416805601678789, "eval_runtime": 2.0477, "eval_samples_per_second": 48.835, "eval_steps_per_second": 1.953, "eval_wer": 0.26, "step": 208000 }, { "epoch": 24.545776309579992, "grad_norm": 0.057858437299728394, "learning_rate": 5.161584065870811e-05, "loss": 0.0037, "step": 208050 }, { "epoch": 24.551675318546483, "grad_norm": 0.22406929731369019, "learning_rate": 5.1597274134880026e-05, "loss": 0.0035, "step": 208100 }, { "epoch": 24.557574327512977, "grad_norm": 0.02824665978550911, "learning_rate": 5.157870739058206e-05, "loss": 0.0024, "step": 208150 }, { "epoch": 24.563473336479472, "grad_norm": 0.010740534402430058, "learning_rate": 5.1560140428376956e-05, "loss": 0.0042, "step": 208200 }, { "epoch": 24.569372345445966, "grad_norm": 0.21182583272457123, "learning_rate": 5.1541573250827524e-05, "loss": 0.003, "step": 208250 }, { "epoch": 24.575271354412457, "grad_norm": 0.12382736802101135, "learning_rate": 5.152300586049652e-05, "loss": 0.0036, "step": 208300 }, { "epoch": 24.581170363378952, "grad_norm": 0.13337461650371552, "learning_rate": 5.150443825994682e-05, "loss": 0.0034, "step": 208350 }, { "epoch": 24.587069372345447, "grad_norm": 0.3765498399734497, "learning_rate": 5.148587045174128e-05, "loss": 0.0041, "step": 208400 }, { "epoch": 24.59296838131194, "grad_norm": 0.01035609282553196, "learning_rate": 5.146730243844278e-05, "loss": 0.0025, "step": 208450 }, { "epoch": 24.598867390278432, "grad_norm": 0.24785518646240234, "learning_rate": 5.144873422261426e-05, "loss": 0.0026, "step": 208500 }, { "epoch": 24.604766399244927, "grad_norm": 0.03545897826552391, "learning_rate": 5.143016580681864e-05, "loss": 0.0024, "step": 208550 }, { "epoch": 24.61066540821142, "grad_norm": 0.03165625408291817, "learning_rate": 5.141159719361891e-05, "loss": 0.0034, "step": 208600 }, { "epoch": 24.616564417177916, "grad_norm": 0.05394778400659561, "learning_rate": 5.139302838557809e-05, "loss": 0.003, "step": 208650 }, { "epoch": 24.622463426144407, "grad_norm": 0.01450174581259489, "learning_rate": 5.137445938525919e-05, "loss": 0.0028, "step": 208700 }, { "epoch": 24.6283624351109, "grad_norm": 0.029758980497717857, "learning_rate": 5.1355890195225285e-05, "loss": 0.0031, "step": 208750 }, { "epoch": 24.634261444077396, "grad_norm": 0.1798313707113266, "learning_rate": 5.1337320818039445e-05, "loss": 0.0026, "step": 208800 }, { "epoch": 24.64016045304389, "grad_norm": 0.04188016057014465, "learning_rate": 5.13187512562648e-05, "loss": 0.0022, "step": 208850 }, { "epoch": 24.64605946201038, "grad_norm": 0.015185698866844177, "learning_rate": 5.130018151246445e-05, "loss": 0.0027, "step": 208900 }, { "epoch": 24.651958470976876, "grad_norm": 0.008215803653001785, "learning_rate": 5.128161158920156e-05, "loss": 0.0029, "step": 208950 }, { "epoch": 24.65785747994337, "grad_norm": 0.0051024300046265125, "learning_rate": 5.126304148903936e-05, "loss": 0.0037, "step": 209000 }, { "epoch": 24.65785747994337, "eval_cer": 0.08347676419965576, "eval_loss": 0.0003071193932555616, "eval_runtime": 2.0474, "eval_samples_per_second": 48.844, "eval_steps_per_second": 1.954, "eval_wer": 0.26, "step": 209000 }, { "epoch": 24.663756488909865, "grad_norm": 0.08068764954805374, "learning_rate": 5.1244471214541e-05, "loss": 0.0026, "step": 209050 }, { "epoch": 24.669655497876356, "grad_norm": 0.07828421145677567, "learning_rate": 5.122590076826975e-05, "loss": 0.0037, "step": 209100 }, { "epoch": 24.67555450684285, "grad_norm": 0.04209233075380325, "learning_rate": 5.120733015278887e-05, "loss": 0.003, "step": 209150 }, { "epoch": 24.681453515809345, "grad_norm": 0.2761760354042053, "learning_rate": 5.118875937066161e-05, "loss": 0.0024, "step": 209200 }, { "epoch": 24.68735252477584, "grad_norm": 0.22615283727645874, "learning_rate": 5.1170188424451294e-05, "loss": 0.0031, "step": 209250 }, { "epoch": 24.69325153374233, "grad_norm": 0.0182314645498991, "learning_rate": 5.1151617316721245e-05, "loss": 0.0028, "step": 209300 }, { "epoch": 24.699150542708825, "grad_norm": 0.08875065296888351, "learning_rate": 5.113304605003482e-05, "loss": 0.0038, "step": 209350 }, { "epoch": 24.70504955167532, "grad_norm": 0.009165139868855476, "learning_rate": 5.1114474626955365e-05, "loss": 0.0028, "step": 209400 }, { "epoch": 24.71094856064181, "grad_norm": 0.045029789209365845, "learning_rate": 5.10959030500463e-05, "loss": 0.0025, "step": 209450 }, { "epoch": 24.716847569608305, "grad_norm": 0.15326927602291107, "learning_rate": 5.107733132187103e-05, "loss": 0.0029, "step": 209500 }, { "epoch": 24.7227465785748, "grad_norm": 0.133149653673172, "learning_rate": 5.1058759444992954e-05, "loss": 0.0029, "step": 209550 }, { "epoch": 24.728645587541294, "grad_norm": 0.01753072254359722, "learning_rate": 5.104018742197557e-05, "loss": 0.0027, "step": 209600 }, { "epoch": 24.734544596507785, "grad_norm": 0.2891552448272705, "learning_rate": 5.102161525538234e-05, "loss": 0.0042, "step": 209650 }, { "epoch": 24.74044360547428, "grad_norm": 0.06293901056051254, "learning_rate": 5.1003042947776757e-05, "loss": 0.0029, "step": 209700 }, { "epoch": 24.746342614440774, "grad_norm": 0.026215430349111557, "learning_rate": 5.098447050172234e-05, "loss": 0.0031, "step": 209750 }, { "epoch": 24.75224162340727, "grad_norm": 0.05591927841305733, "learning_rate": 5.096589791978261e-05, "loss": 0.0028, "step": 209800 }, { "epoch": 24.75814063237376, "grad_norm": 0.06739958375692368, "learning_rate": 5.094732520452112e-05, "loss": 0.0025, "step": 209850 }, { "epoch": 24.764039641340254, "grad_norm": 0.007239018566906452, "learning_rate": 5.0928752358501465e-05, "loss": 0.0029, "step": 209900 }, { "epoch": 24.76993865030675, "grad_norm": 0.13689149916172028, "learning_rate": 5.091017938428718e-05, "loss": 0.0021, "step": 209950 }, { "epoch": 24.775837659273243, "grad_norm": 0.06428669393062592, "learning_rate": 5.0891606284441926e-05, "loss": 0.0032, "step": 210000 }, { "epoch": 24.775837659273243, "eval_cer": 0.08347676419965576, "eval_loss": 0.0003409715718589723, "eval_runtime": 2.1402, "eval_samples_per_second": 46.725, "eval_steps_per_second": 1.869, "eval_wer": 0.26, "step": 210000 }, { "epoch": 24.781736668239734, "grad_norm": 0.19657549262046814, "learning_rate": 5.087303306152931e-05, "loss": 0.0032, "step": 210050 }, { "epoch": 24.78763567720623, "grad_norm": 0.20869454741477966, "learning_rate": 5.0854459718112954e-05, "loss": 0.0025, "step": 210100 }, { "epoch": 24.793534686172723, "grad_norm": 0.12750780582427979, "learning_rate": 5.0835886256756524e-05, "loss": 0.0028, "step": 210150 }, { "epoch": 24.799433695139218, "grad_norm": 0.003986249212175608, "learning_rate": 5.081731268002371e-05, "loss": 0.0028, "step": 210200 }, { "epoch": 24.80533270410571, "grad_norm": 0.014941445551812649, "learning_rate": 5.079873899047817e-05, "loss": 0.0035, "step": 210250 }, { "epoch": 24.811231713072203, "grad_norm": 0.0783686563372612, "learning_rate": 5.078016519068364e-05, "loss": 0.0019, "step": 210300 }, { "epoch": 24.817130722038698, "grad_norm": 0.23493129014968872, "learning_rate": 5.07615912832038e-05, "loss": 0.0024, "step": 210350 }, { "epoch": 24.823029731005192, "grad_norm": 0.002144575584679842, "learning_rate": 5.0743017270602434e-05, "loss": 0.0029, "step": 210400 }, { "epoch": 24.828928739971683, "grad_norm": 0.27616244554519653, "learning_rate": 5.072444315544326e-05, "loss": 0.0029, "step": 210450 }, { "epoch": 24.834827748938178, "grad_norm": 0.1646765172481537, "learning_rate": 5.070586894029006e-05, "loss": 0.0034, "step": 210500 }, { "epoch": 24.840726757904672, "grad_norm": 0.03239620104432106, "learning_rate": 5.068729462770658e-05, "loss": 0.0016, "step": 210550 }, { "epoch": 24.846625766871167, "grad_norm": 0.010646622627973557, "learning_rate": 5.066872022025664e-05, "loss": 0.0026, "step": 210600 }, { "epoch": 24.852524775837658, "grad_norm": 0.11541419476270676, "learning_rate": 5.065014572050403e-05, "loss": 0.0029, "step": 210650 }, { "epoch": 24.858423784804152, "grad_norm": 0.11899608373641968, "learning_rate": 5.063157113101258e-05, "loss": 0.0029, "step": 210700 }, { "epoch": 24.864322793770647, "grad_norm": 0.15568041801452637, "learning_rate": 5.0612996454346095e-05, "loss": 0.0033, "step": 210750 }, { "epoch": 24.87022180273714, "grad_norm": 0.21394017338752747, "learning_rate": 5.059442169306844e-05, "loss": 0.0037, "step": 210800 }, { "epoch": 24.876120811703633, "grad_norm": 0.1298477053642273, "learning_rate": 5.0575846849743456e-05, "loss": 0.0033, "step": 210850 }, { "epoch": 24.882019820670127, "grad_norm": 0.1610802710056305, "learning_rate": 5.0557271926935e-05, "loss": 0.0025, "step": 210900 }, { "epoch": 24.88791882963662, "grad_norm": 0.01591372676193714, "learning_rate": 5.0538696927206954e-05, "loss": 0.0028, "step": 210950 }, { "epoch": 24.893817838603116, "grad_norm": 0.014780929312109947, "learning_rate": 5.052012185312322e-05, "loss": 0.0036, "step": 211000 }, { "epoch": 24.893817838603116, "eval_cer": 0.08347676419965576, "eval_loss": 0.00031399662839248776, "eval_runtime": 2.0898, "eval_samples_per_second": 47.851, "eval_steps_per_second": 1.914, "eval_wer": 0.26, "step": 211000 }, { "epoch": 24.899716847569607, "grad_norm": 0.005211938638240099, "learning_rate": 5.050154670724766e-05, "loss": 0.0034, "step": 211050 }, { "epoch": 24.9056158565361, "grad_norm": 0.11502063274383545, "learning_rate": 5.0482971492144206e-05, "loss": 0.0023, "step": 211100 }, { "epoch": 24.911514865502596, "grad_norm": 0.1269443929195404, "learning_rate": 5.046439621037676e-05, "loss": 0.0029, "step": 211150 }, { "epoch": 24.91741387446909, "grad_norm": 0.3220004439353943, "learning_rate": 5.0445820864509255e-05, "loss": 0.0033, "step": 211200 }, { "epoch": 24.92331288343558, "grad_norm": 0.011535336263477802, "learning_rate": 5.042724545710562e-05, "loss": 0.0025, "step": 211250 }, { "epoch": 24.929211892402076, "grad_norm": 0.036271002143621445, "learning_rate": 5.040866999072978e-05, "loss": 0.0027, "step": 211300 }, { "epoch": 24.93511090136857, "grad_norm": 0.21406634151935577, "learning_rate": 5.039009446794572e-05, "loss": 0.0031, "step": 211350 }, { "epoch": 24.941009910335065, "grad_norm": 0.16697105765342712, "learning_rate": 5.037151889131737e-05, "loss": 0.0026, "step": 211400 }, { "epoch": 24.946908919301556, "grad_norm": 0.06118035316467285, "learning_rate": 5.0352943263408715e-05, "loss": 0.0039, "step": 211450 }, { "epoch": 24.95280792826805, "grad_norm": 0.20273283123970032, "learning_rate": 5.0334367586783714e-05, "loss": 0.0031, "step": 211500 }, { "epoch": 24.958706937234545, "grad_norm": 0.05992729216814041, "learning_rate": 5.031579186400635e-05, "loss": 0.0029, "step": 211550 }, { "epoch": 24.96460594620104, "grad_norm": 0.07664177566766739, "learning_rate": 5.0297216097640585e-05, "loss": 0.0036, "step": 211600 }, { "epoch": 24.97050495516753, "grad_norm": 0.04169083759188652, "learning_rate": 5.027864029025044e-05, "loss": 0.0025, "step": 211650 }, { "epoch": 24.976403964134025, "grad_norm": 0.0030015490483492613, "learning_rate": 5.026006444439991e-05, "loss": 0.0024, "step": 211700 }, { "epoch": 24.98230297310052, "grad_norm": 0.17102773487567902, "learning_rate": 5.0241488562652996e-05, "loss": 0.0026, "step": 211750 }, { "epoch": 24.98820198206701, "grad_norm": 0.007077766582369804, "learning_rate": 5.022291264757369e-05, "loss": 0.0024, "step": 211800 }, { "epoch": 24.994100991033505, "grad_norm": 0.20274071395397186, "learning_rate": 5.020433670172602e-05, "loss": 0.003, "step": 211850 }, { "epoch": 25.0, "grad_norm": 0.013296147808432579, "learning_rate": 5.018576072767398e-05, "loss": 0.0044, "step": 211900 }, { "epoch": 25.005899008966495, "grad_norm": 0.15966816246509552, "learning_rate": 5.016718472798162e-05, "loss": 0.0026, "step": 211950 }, { "epoch": 25.011798017932986, "grad_norm": 0.11505299061536789, "learning_rate": 5.014860870521293e-05, "loss": 0.0029, "step": 212000 }, { "epoch": 25.011798017932986, "eval_cer": 0.08433734939759036, "eval_loss": 0.00288334209471941, "eval_runtime": 2.0447, "eval_samples_per_second": 48.906, "eval_steps_per_second": 1.956, "eval_wer": 0.26, "step": 212000 }, { "epoch": 25.01769702689948, "grad_norm": 0.1032780259847641, "learning_rate": 5.013003266193197e-05, "loss": 0.003, "step": 212050 }, { "epoch": 25.023596035865975, "grad_norm": 0.012024047784507275, "learning_rate": 5.011145660070276e-05, "loss": 0.0018, "step": 212100 }, { "epoch": 25.02949504483247, "grad_norm": 0.011459054425358772, "learning_rate": 5.0092880524089325e-05, "loss": 0.0022, "step": 212150 }, { "epoch": 25.03539405379896, "grad_norm": 0.01965515874326229, "learning_rate": 5.0074304434655696e-05, "loss": 0.002, "step": 212200 }, { "epoch": 25.041293062765455, "grad_norm": 0.028584901243448257, "learning_rate": 5.00557283349659e-05, "loss": 0.0027, "step": 212250 }, { "epoch": 25.04719207173195, "grad_norm": 0.02111070230603218, "learning_rate": 5.0037152227583995e-05, "loss": 0.0033, "step": 212300 }, { "epoch": 25.053091080698444, "grad_norm": 0.03140688315033913, "learning_rate": 5.0018576115074024e-05, "loss": 0.0027, "step": 212350 }, { "epoch": 25.058990089664935, "grad_norm": 0.1866319179534912, "learning_rate": 5e-05, "loss": 0.0027, "step": 212400 }, { "epoch": 25.06488909863143, "grad_norm": 0.07425747811794281, "learning_rate": 4.998142388492599e-05, "loss": 0.0027, "step": 212450 }, { "epoch": 25.070788107597924, "grad_norm": 0.22990378737449646, "learning_rate": 4.996284777241601e-05, "loss": 0.0029, "step": 212500 }, { "epoch": 25.07668711656442, "grad_norm": 0.05782562494277954, "learning_rate": 4.994427166503411e-05, "loss": 0.0036, "step": 212550 }, { "epoch": 25.08258612553091, "grad_norm": 0.21264763176441193, "learning_rate": 4.992569556534432e-05, "loss": 0.0025, "step": 212600 }, { "epoch": 25.088485134497404, "grad_norm": 0.2902589738368988, "learning_rate": 4.990711947591069e-05, "loss": 0.0026, "step": 212650 }, { "epoch": 25.0943841434639, "grad_norm": 0.07600565999746323, "learning_rate": 4.988854339929725e-05, "loss": 0.0021, "step": 212700 }, { "epoch": 25.100283152430393, "grad_norm": 0.16211408376693726, "learning_rate": 4.986996733806803e-05, "loss": 0.003, "step": 212750 }, { "epoch": 25.106182161396884, "grad_norm": 0.09684325009584427, "learning_rate": 4.985139129478707e-05, "loss": 0.0026, "step": 212800 }, { "epoch": 25.11208117036338, "grad_norm": 0.20292864739894867, "learning_rate": 4.98328152720184e-05, "loss": 0.0028, "step": 212850 }, { "epoch": 25.117980179329873, "grad_norm": 0.11965112388134003, "learning_rate": 4.981423927232603e-05, "loss": 0.0026, "step": 212900 }, { "epoch": 25.123879188296367, "grad_norm": 0.01881955750286579, "learning_rate": 4.9795663298274006e-05, "loss": 0.0028, "step": 212950 }, { "epoch": 25.12977819726286, "grad_norm": 0.20046518743038177, "learning_rate": 4.9777087352426326e-05, "loss": 0.0025, "step": 213000 }, { "epoch": 25.12977819726286, "eval_cer": 0.08691910499139414, "eval_loss": 0.001651424914598465, "eval_runtime": 2.0828, "eval_samples_per_second": 48.012, "eval_steps_per_second": 1.92, "eval_wer": 0.27, "step": 213000 }, { "epoch": 25.135677206229353, "grad_norm": 0.03388666361570358, "learning_rate": 4.975851143734703e-05, "loss": 0.002, "step": 213050 }, { "epoch": 25.141576215195848, "grad_norm": 0.13501840829849243, "learning_rate": 4.973993555560011e-05, "loss": 0.0025, "step": 213100 }, { "epoch": 25.147475224162342, "grad_norm": 0.24824045598506927, "learning_rate": 4.972135970974958e-05, "loss": 0.0034, "step": 213150 }, { "epoch": 25.153374233128833, "grad_norm": 0.04620514437556267, "learning_rate": 4.9702783902359426e-05, "loss": 0.0032, "step": 213200 }, { "epoch": 25.159273242095328, "grad_norm": 0.145551398396492, "learning_rate": 4.968420813599367e-05, "loss": 0.0019, "step": 213250 }, { "epoch": 25.165172251061822, "grad_norm": 0.5026041865348816, "learning_rate": 4.966563241321629e-05, "loss": 0.0032, "step": 213300 }, { "epoch": 25.171071260028317, "grad_norm": 0.02402862347662449, "learning_rate": 4.964705673659128e-05, "loss": 0.0034, "step": 213350 }, { "epoch": 25.176970268994808, "grad_norm": 0.018238598480820656, "learning_rate": 4.9628481108682624e-05, "loss": 0.0025, "step": 213400 }, { "epoch": 25.182869277961302, "grad_norm": 0.06929679960012436, "learning_rate": 4.960990553205428e-05, "loss": 0.0027, "step": 213450 }, { "epoch": 25.188768286927797, "grad_norm": 0.02517264522612095, "learning_rate": 4.959133000927022e-05, "loss": 0.0025, "step": 213500 }, { "epoch": 25.19466729589429, "grad_norm": 0.013441038317978382, "learning_rate": 4.957275454289439e-05, "loss": 0.0021, "step": 213550 }, { "epoch": 25.200566304860782, "grad_norm": 0.08381810039281845, "learning_rate": 4.955417913549075e-05, "loss": 0.0026, "step": 213600 }, { "epoch": 25.206465313827277, "grad_norm": 0.10888919979333878, "learning_rate": 4.9535603789623245e-05, "loss": 0.0029, "step": 213650 }, { "epoch": 25.21236432279377, "grad_norm": 0.025466563180088997, "learning_rate": 4.95170285078558e-05, "loss": 0.0027, "step": 213700 }, { "epoch": 25.218263331760266, "grad_norm": 0.007743606809526682, "learning_rate": 4.9498453292752346e-05, "loss": 0.0017, "step": 213750 }, { "epoch": 25.224162340726757, "grad_norm": 0.0346931628882885, "learning_rate": 4.9479878146876794e-05, "loss": 0.002, "step": 213800 }, { "epoch": 25.23006134969325, "grad_norm": 0.24605205655097961, "learning_rate": 4.946130307279305e-05, "loss": 0.0025, "step": 213850 }, { "epoch": 25.235960358659746, "grad_norm": 0.05775510147213936, "learning_rate": 4.9442728073065004e-05, "loss": 0.0029, "step": 213900 }, { "epoch": 25.24185936762624, "grad_norm": 0.22486606240272522, "learning_rate": 4.9424153150256555e-05, "loss": 0.0025, "step": 213950 }, { "epoch": 25.24775837659273, "grad_norm": 0.1952757090330124, "learning_rate": 4.940557830693157e-05, "loss": 0.0025, "step": 214000 }, { "epoch": 25.24775837659273, "eval_cer": 0.08519793459552495, "eval_loss": 0.0032264466863125563, "eval_runtime": 2.0399, "eval_samples_per_second": 49.022, "eval_steps_per_second": 1.961, "eval_wer": 0.26, "step": 214000 }, { "epoch": 25.253657385559226, "grad_norm": 0.033200304955244064, "learning_rate": 4.938700354565392e-05, "loss": 0.0028, "step": 214050 }, { "epoch": 25.25955639452572, "grad_norm": 0.024513188749551773, "learning_rate": 4.9368428868987435e-05, "loss": 0.0021, "step": 214100 }, { "epoch": 25.265455403492215, "grad_norm": 0.01382982637733221, "learning_rate": 4.9349854279495984e-05, "loss": 0.0013, "step": 214150 }, { "epoch": 25.271354412458706, "grad_norm": 0.013455763459205627, "learning_rate": 4.933127977974338e-05, "loss": 0.0028, "step": 214200 }, { "epoch": 25.2772534214252, "grad_norm": 0.0161344762891531, "learning_rate": 4.931270537229343e-05, "loss": 0.0019, "step": 214250 }, { "epoch": 25.283152430391695, "grad_norm": 0.16213533282279968, "learning_rate": 4.929413105970996e-05, "loss": 0.0031, "step": 214300 }, { "epoch": 25.28905143935819, "grad_norm": 0.006447490304708481, "learning_rate": 4.9275556844556744e-05, "loss": 0.0021, "step": 214350 }, { "epoch": 25.29495044832468, "grad_norm": 0.14906089007854462, "learning_rate": 4.925698272939758e-05, "loss": 0.0029, "step": 214400 }, { "epoch": 25.300849457291175, "grad_norm": 0.1908620148897171, "learning_rate": 4.9238408716796205e-05, "loss": 0.0027, "step": 214450 }, { "epoch": 25.30674846625767, "grad_norm": 0.02020317129790783, "learning_rate": 4.921983480931639e-05, "loss": 0.0029, "step": 214500 }, { "epoch": 25.31264747522416, "grad_norm": 0.030454710125923157, "learning_rate": 4.920126100952184e-05, "loss": 0.0025, "step": 214550 }, { "epoch": 25.318546484190655, "grad_norm": 0.09151306003332138, "learning_rate": 4.918268731997631e-05, "loss": 0.0028, "step": 214600 }, { "epoch": 25.32444549315715, "grad_norm": 0.07586164027452469, "learning_rate": 4.9164113743243494e-05, "loss": 0.0031, "step": 214650 }, { "epoch": 25.330344502123644, "grad_norm": 0.10989215970039368, "learning_rate": 4.914554028188707e-05, "loss": 0.0022, "step": 214700 }, { "epoch": 25.336243511090135, "grad_norm": 0.07379806786775589, "learning_rate": 4.912696693847072e-05, "loss": 0.0022, "step": 214750 }, { "epoch": 25.34214252005663, "grad_norm": 0.049549635499715805, "learning_rate": 4.910839371555809e-05, "loss": 0.0022, "step": 214800 }, { "epoch": 25.348041529023124, "grad_norm": 0.2374982386827469, "learning_rate": 4.908982061571283e-05, "loss": 0.0023, "step": 214850 }, { "epoch": 25.35394053798962, "grad_norm": 0.05730825290083885, "learning_rate": 4.907124764149855e-05, "loss": 0.0026, "step": 214900 }, { "epoch": 25.35983954695611, "grad_norm": 0.03968815505504608, "learning_rate": 4.905267479547888e-05, "loss": 0.0023, "step": 214950 }, { "epoch": 25.365738555922604, "grad_norm": 0.17633798718452454, "learning_rate": 4.903410208021739e-05, "loss": 0.0026, "step": 215000 }, { "epoch": 25.365738555922604, "eval_cer": 0.08519793459552495, "eval_loss": 0.002480910625308752, "eval_runtime": 2.0444, "eval_samples_per_second": 48.915, "eval_steps_per_second": 1.957, "eval_wer": 0.26, "step": 215000 }, { "epoch": 25.3716375648891, "grad_norm": 0.018874846398830414, "learning_rate": 4.901552949827766e-05, "loss": 0.0035, "step": 215050 }, { "epoch": 25.377536573855593, "grad_norm": 0.025448400527238846, "learning_rate": 4.899695705222324e-05, "loss": 0.0029, "step": 215100 }, { "epoch": 25.383435582822084, "grad_norm": 0.2984764873981476, "learning_rate": 4.897838474461766e-05, "loss": 0.0032, "step": 215150 }, { "epoch": 25.38933459178858, "grad_norm": 0.13856174051761627, "learning_rate": 4.8959812578024436e-05, "loss": 0.0027, "step": 215200 }, { "epoch": 25.395233600755073, "grad_norm": 0.00499220984056592, "learning_rate": 4.8941240555007044e-05, "loss": 0.0029, "step": 215250 }, { "epoch": 25.401132609721568, "grad_norm": 0.2139102965593338, "learning_rate": 4.892266867812899e-05, "loss": 0.0033, "step": 215300 }, { "epoch": 25.40703161868806, "grad_norm": 0.40294066071510315, "learning_rate": 4.890409694995371e-05, "loss": 0.004, "step": 215350 }, { "epoch": 25.412930627654553, "grad_norm": 0.04787183180451393, "learning_rate": 4.888552537304464e-05, "loss": 0.003, "step": 215400 }, { "epoch": 25.418829636621048, "grad_norm": 0.0046961442567408085, "learning_rate": 4.8866953949965194e-05, "loss": 0.0024, "step": 215450 }, { "epoch": 25.424728645587543, "grad_norm": 0.14896075427532196, "learning_rate": 4.884838268327877e-05, "loss": 0.0033, "step": 215500 }, { "epoch": 25.430627654554034, "grad_norm": 0.32832396030426025, "learning_rate": 4.882981157554872e-05, "loss": 0.0032, "step": 215550 }, { "epoch": 25.436526663520528, "grad_norm": 0.08969693630933762, "learning_rate": 4.881124062933839e-05, "loss": 0.0029, "step": 215600 }, { "epoch": 25.442425672487023, "grad_norm": 0.03160577639937401, "learning_rate": 4.879266984721114e-05, "loss": 0.0026, "step": 215650 }, { "epoch": 25.448324681453517, "grad_norm": 0.09246065467596054, "learning_rate": 4.877409923173025e-05, "loss": 0.0036, "step": 215700 }, { "epoch": 25.454223690420008, "grad_norm": 0.05594652146100998, "learning_rate": 4.8755528785459006e-05, "loss": 0.0032, "step": 215750 }, { "epoch": 25.460122699386503, "grad_norm": 0.1728009730577469, "learning_rate": 4.873695851096066e-05, "loss": 0.0019, "step": 215800 }, { "epoch": 25.466021708352997, "grad_norm": 0.04783838987350464, "learning_rate": 4.871838841079845e-05, "loss": 0.0021, "step": 215850 }, { "epoch": 25.47192071731949, "grad_norm": 0.09627915173768997, "learning_rate": 4.869981848753556e-05, "loss": 0.0027, "step": 215900 }, { "epoch": 25.477819726285983, "grad_norm": 0.20127400755882263, "learning_rate": 4.868124874373522e-05, "loss": 0.0033, "step": 215950 }, { "epoch": 25.483718735252477, "grad_norm": 0.003913762979209423, "learning_rate": 4.8662679181960566e-05, "loss": 0.0022, "step": 216000 }, { "epoch": 25.483718735252477, "eval_cer": 0.08519793459552495, "eval_loss": 0.0012606538366526365, "eval_runtime": 2.0853, "eval_samples_per_second": 47.954, "eval_steps_per_second": 1.918, "eval_wer": 0.26, "step": 216000 }, { "epoch": 25.48961774421897, "grad_norm": 0.05143974348902702, "learning_rate": 4.8644109804774726e-05, "loss": 0.0025, "step": 216050 }, { "epoch": 25.495516753185466, "grad_norm": 0.0104188472032547, "learning_rate": 4.8625540614740816e-05, "loss": 0.0024, "step": 216100 }, { "epoch": 25.501415762151957, "grad_norm": 0.05112294480204582, "learning_rate": 4.860697161442192e-05, "loss": 0.0021, "step": 216150 }, { "epoch": 25.507314771118452, "grad_norm": 0.03723694756627083, "learning_rate": 4.8588402806381094e-05, "loss": 0.0031, "step": 216200 }, { "epoch": 25.513213780084946, "grad_norm": 0.18271103501319885, "learning_rate": 4.856983419318138e-05, "loss": 0.0025, "step": 216250 }, { "epoch": 25.51911278905144, "grad_norm": 0.028968049213290215, "learning_rate": 4.8551265777385766e-05, "loss": 0.0027, "step": 216300 }, { "epoch": 25.525011798017932, "grad_norm": 0.13445472717285156, "learning_rate": 4.853269756155724e-05, "loss": 0.003, "step": 216350 }, { "epoch": 25.530910806984426, "grad_norm": 0.12597781419754028, "learning_rate": 4.851412954825874e-05, "loss": 0.0025, "step": 216400 }, { "epoch": 25.53680981595092, "grad_norm": 0.06074519827961922, "learning_rate": 4.84955617400532e-05, "loss": 0.0034, "step": 216450 }, { "epoch": 25.542708824917415, "grad_norm": 0.19781731069087982, "learning_rate": 4.847699413950349e-05, "loss": 0.0024, "step": 216500 }, { "epoch": 25.548607833883906, "grad_norm": 0.10138993710279465, "learning_rate": 4.8458426749172494e-05, "loss": 0.0024, "step": 216550 }, { "epoch": 25.5545068428504, "grad_norm": 0.00716885132715106, "learning_rate": 4.8439859571623035e-05, "loss": 0.0021, "step": 216600 }, { "epoch": 25.560405851816896, "grad_norm": 0.028040723875164986, "learning_rate": 4.842129260941795e-05, "loss": 0.0028, "step": 216650 }, { "epoch": 25.56630486078339, "grad_norm": 0.0902816504240036, "learning_rate": 4.840272586511998e-05, "loss": 0.0028, "step": 216700 }, { "epoch": 25.57220386974988, "grad_norm": 0.2508023679256439, "learning_rate": 4.8384159341291887e-05, "loss": 0.0027, "step": 216750 }, { "epoch": 25.578102878716376, "grad_norm": 0.05463327839970589, "learning_rate": 4.836559304049638e-05, "loss": 0.0032, "step": 216800 }, { "epoch": 25.58400188768287, "grad_norm": 0.006674876436591148, "learning_rate": 4.834702696529614e-05, "loss": 0.0024, "step": 216850 }, { "epoch": 25.589900896649365, "grad_norm": 0.005626855418086052, "learning_rate": 4.8328461118253826e-05, "loss": 0.0025, "step": 216900 }, { "epoch": 25.595799905615856, "grad_norm": 0.19360969960689545, "learning_rate": 4.830989550193207e-05, "loss": 0.003, "step": 216950 }, { "epoch": 25.60169891458235, "grad_norm": 0.06045164540410042, "learning_rate": 4.8291330118893446e-05, "loss": 0.0029, "step": 217000 }, { "epoch": 25.60169891458235, "eval_cer": 0.08347676419965576, "eval_loss": 0.0004040388739667833, "eval_runtime": 2.0234, "eval_samples_per_second": 49.422, "eval_steps_per_second": 1.977, "eval_wer": 0.26, "step": 217000 }, { "epoch": 25.607597923548845, "grad_norm": 0.003052718238905072, "learning_rate": 4.827276497170052e-05, "loss": 0.0027, "step": 217050 }, { "epoch": 25.61349693251534, "grad_norm": 0.06179841235280037, "learning_rate": 4.825420006291582e-05, "loss": 0.0026, "step": 217100 }, { "epoch": 25.61939594148183, "grad_norm": 0.18618354201316833, "learning_rate": 4.823563539510184e-05, "loss": 0.0035, "step": 217150 }, { "epoch": 25.625294950448325, "grad_norm": 0.008615842089056969, "learning_rate": 4.821707097082102e-05, "loss": 0.0035, "step": 217200 }, { "epoch": 25.63119395941482, "grad_norm": 0.12641236186027527, "learning_rate": 4.819850679263579e-05, "loss": 0.0027, "step": 217250 }, { "epoch": 25.63709296838131, "grad_norm": 0.013244721107184887, "learning_rate": 4.817994286310857e-05, "loss": 0.0026, "step": 217300 }, { "epoch": 25.642991977347805, "grad_norm": 0.002075859112665057, "learning_rate": 4.8161379184801696e-05, "loss": 0.0025, "step": 217350 }, { "epoch": 25.6488909863143, "grad_norm": 0.0937531515955925, "learning_rate": 4.814281576027749e-05, "loss": 0.0023, "step": 217400 }, { "epoch": 25.654789995280794, "grad_norm": 0.014157408848404884, "learning_rate": 4.8124252592098254e-05, "loss": 0.0028, "step": 217450 }, { "epoch": 25.660689004247285, "grad_norm": 0.0074109649285674095, "learning_rate": 4.810568968282622e-05, "loss": 0.0029, "step": 217500 }, { "epoch": 25.66658801321378, "grad_norm": 0.009144342504441738, "learning_rate": 4.80871270350236e-05, "loss": 0.0018, "step": 217550 }, { "epoch": 25.672487022180274, "grad_norm": 0.0033101621083915234, "learning_rate": 4.80685646512526e-05, "loss": 0.004, "step": 217600 }, { "epoch": 25.67838603114677, "grad_norm": 0.1814204305410385, "learning_rate": 4.8050002534075355e-05, "loss": 0.0025, "step": 217650 }, { "epoch": 25.68428504011326, "grad_norm": 0.006984215695410967, "learning_rate": 4.8031440686053975e-05, "loss": 0.0028, "step": 217700 }, { "epoch": 25.690184049079754, "grad_norm": 0.21149593591690063, "learning_rate": 4.801287910975052e-05, "loss": 0.0019, "step": 217750 }, { "epoch": 25.69608305804625, "grad_norm": 0.13096989691257477, "learning_rate": 4.799431780772702e-05, "loss": 0.0025, "step": 217800 }, { "epoch": 25.701982067012743, "grad_norm": 0.34093767404556274, "learning_rate": 4.797575678254547e-05, "loss": 0.0027, "step": 217850 }, { "epoch": 25.707881075979234, "grad_norm": 0.01785815879702568, "learning_rate": 4.795719603676785e-05, "loss": 0.0022, "step": 217900 }, { "epoch": 25.71378008494573, "grad_norm": 0.01831400953233242, "learning_rate": 4.793863557295606e-05, "loss": 0.0021, "step": 217950 }, { "epoch": 25.719679093912223, "grad_norm": 0.07025427371263504, "learning_rate": 4.792007539367197e-05, "loss": 0.0024, "step": 218000 }, { "epoch": 25.719679093912223, "eval_cer": 0.08605851979345955, "eval_loss": 0.00042366606066934764, "eval_runtime": 2.056, "eval_samples_per_second": 48.638, "eval_steps_per_second": 1.946, "eval_wer": 0.27, "step": 218000 }, { "epoch": 25.725578102878718, "grad_norm": 0.10540129989385605, "learning_rate": 4.7901515501477443e-05, "loss": 0.0044, "step": 218050 }, { "epoch": 25.73147711184521, "grad_norm": 0.30812567472457886, "learning_rate": 4.788295589893426e-05, "loss": 0.0034, "step": 218100 }, { "epoch": 25.737376120811703, "grad_norm": 0.13508561253547668, "learning_rate": 4.7864396588604175e-05, "loss": 0.0022, "step": 218150 }, { "epoch": 25.743275129778198, "grad_norm": 0.0013485491508617997, "learning_rate": 4.784583757304893e-05, "loss": 0.0021, "step": 218200 }, { "epoch": 25.749174138744692, "grad_norm": 0.04413854330778122, "learning_rate": 4.782727885483018e-05, "loss": 0.0029, "step": 218250 }, { "epoch": 25.755073147711183, "grad_norm": 0.0939720943570137, "learning_rate": 4.7808720436509596e-05, "loss": 0.0029, "step": 218300 }, { "epoch": 25.760972156677678, "grad_norm": 0.2973043918609619, "learning_rate": 4.779016232064876e-05, "loss": 0.0027, "step": 218350 }, { "epoch": 25.766871165644172, "grad_norm": 0.05231078341603279, "learning_rate": 4.7771604509809215e-05, "loss": 0.0023, "step": 218400 }, { "epoch": 25.772770174610667, "grad_norm": 0.14316894114017487, "learning_rate": 4.775304700655249e-05, "loss": 0.0027, "step": 218450 }, { "epoch": 25.778669183577158, "grad_norm": 0.008201136253774166, "learning_rate": 4.773448981344004e-05, "loss": 0.0027, "step": 218500 }, { "epoch": 25.784568192543652, "grad_norm": 0.033317115157842636, "learning_rate": 4.771593293303329e-05, "loss": 0.0042, "step": 218550 }, { "epoch": 25.790467201510147, "grad_norm": 0.0855126604437828, "learning_rate": 4.7697376367893646e-05, "loss": 0.0032, "step": 218600 }, { "epoch": 25.79636621047664, "grad_norm": 0.040869612246751785, "learning_rate": 4.7678820120582434e-05, "loss": 0.003, "step": 218650 }, { "epoch": 25.802265219443132, "grad_norm": 0.08830059319734573, "learning_rate": 4.7660264193660954e-05, "loss": 0.0028, "step": 218700 }, { "epoch": 25.808164228409627, "grad_norm": 0.01327885128557682, "learning_rate": 4.764170858969046e-05, "loss": 0.0022, "step": 218750 }, { "epoch": 25.81406323737612, "grad_norm": 0.1319090574979782, "learning_rate": 4.762315331123216e-05, "loss": 0.0028, "step": 218800 }, { "epoch": 25.819962246342616, "grad_norm": 0.23876923322677612, "learning_rate": 4.760459836084722e-05, "loss": 0.0028, "step": 218850 }, { "epoch": 25.825861255309107, "grad_norm": 0.1190933957695961, "learning_rate": 4.758604374109674e-05, "loss": 0.0036, "step": 218900 }, { "epoch": 25.8317602642756, "grad_norm": 0.09317845851182938, "learning_rate": 4.756748945454181e-05, "loss": 0.0036, "step": 218950 }, { "epoch": 25.837659273242096, "grad_norm": 0.1541723757982254, "learning_rate": 4.754893550374346e-05, "loss": 0.0024, "step": 219000 }, { "epoch": 25.837659273242096, "eval_cer": 0.08347676419965576, "eval_loss": 0.0004147661093156785, "eval_runtime": 2.0235, "eval_samples_per_second": 49.419, "eval_steps_per_second": 1.977, "eval_wer": 0.26, "step": 219000 }, { "epoch": 25.84355828220859, "grad_norm": 0.1268303245306015, "learning_rate": 4.753038189126267e-05, "loss": 0.0023, "step": 219050 }, { "epoch": 25.84945729117508, "grad_norm": 0.017972545698285103, "learning_rate": 4.751182861966036e-05, "loss": 0.003, "step": 219100 }, { "epoch": 25.855356300141576, "grad_norm": 0.04830751568078995, "learning_rate": 4.7493275691497417e-05, "loss": 0.0034, "step": 219150 }, { "epoch": 25.86125530910807, "grad_norm": 0.042617689818143845, "learning_rate": 4.7474723109334685e-05, "loss": 0.0033, "step": 219200 }, { "epoch": 25.867154318074565, "grad_norm": 0.17979796230793, "learning_rate": 4.745617087573296e-05, "loss": 0.0033, "step": 219250 }, { "epoch": 25.873053327041056, "grad_norm": 0.0951690599322319, "learning_rate": 4.743761899325298e-05, "loss": 0.0019, "step": 219300 }, { "epoch": 25.87895233600755, "grad_norm": 0.019677771255373955, "learning_rate": 4.7419067464455444e-05, "loss": 0.0021, "step": 219350 }, { "epoch": 25.884851344974045, "grad_norm": 0.13000093400478363, "learning_rate": 4.740051629190099e-05, "loss": 0.0025, "step": 219400 }, { "epoch": 25.89075035394054, "grad_norm": 0.17496483027935028, "learning_rate": 4.7381965478150226e-05, "loss": 0.0025, "step": 219450 }, { "epoch": 25.89664936290703, "grad_norm": 0.1400487869977951, "learning_rate": 4.7363415025763675e-05, "loss": 0.0026, "step": 219500 }, { "epoch": 25.902548371873525, "grad_norm": 0.026284152641892433, "learning_rate": 4.734486493730186e-05, "loss": 0.0022, "step": 219550 }, { "epoch": 25.90844738084002, "grad_norm": 0.16566318273544312, "learning_rate": 4.7326315215325224e-05, "loss": 0.0019, "step": 219600 }, { "epoch": 25.91434638980651, "grad_norm": 0.16369466483592987, "learning_rate": 4.730776586239415e-05, "loss": 0.0027, "step": 219650 }, { "epoch": 25.920245398773005, "grad_norm": 0.0727231502532959, "learning_rate": 4.7289216881069e-05, "loss": 0.0029, "step": 219700 }, { "epoch": 25.9261444077395, "grad_norm": 0.10543547570705414, "learning_rate": 4.727066827391006e-05, "loss": 0.0024, "step": 219750 }, { "epoch": 25.932043416705994, "grad_norm": 0.0013768606586381793, "learning_rate": 4.7252120043477546e-05, "loss": 0.0028, "step": 219800 }, { "epoch": 25.937942425672485, "grad_norm": 0.05443676933646202, "learning_rate": 4.723357219233169e-05, "loss": 0.0026, "step": 219850 }, { "epoch": 25.94384143463898, "grad_norm": 0.2589868903160095, "learning_rate": 4.721502472303261e-05, "loss": 0.0026, "step": 219900 }, { "epoch": 25.949740443605474, "grad_norm": 0.3668622076511383, "learning_rate": 4.7196477638140404e-05, "loss": 0.0029, "step": 219950 }, { "epoch": 25.95563945257197, "grad_norm": 0.009285508655011654, "learning_rate": 4.71779309402151e-05, "loss": 0.003, "step": 220000 }, { "epoch": 25.95563945257197, "eval_cer": 0.08605851979345955, "eval_loss": 0.0005875176284462214, "eval_runtime": 2.012, "eval_samples_per_second": 49.702, "eval_steps_per_second": 1.988, "eval_wer": 0.27, "step": 220000 }, { "epoch": 25.96153846153846, "grad_norm": 0.06502509862184525, "learning_rate": 4.715938463181667e-05, "loss": 0.0035, "step": 220050 }, { "epoch": 25.967437470504954, "grad_norm": 0.1398748755455017, "learning_rate": 4.714083871550504e-05, "loss": 0.0027, "step": 220100 }, { "epoch": 25.97333647947145, "grad_norm": 0.1642150729894638, "learning_rate": 4.712229319384009e-05, "loss": 0.0025, "step": 220150 }, { "epoch": 25.979235488437944, "grad_norm": 0.03875922039151192, "learning_rate": 4.7103748069381626e-05, "loss": 0.0029, "step": 220200 }, { "epoch": 25.985134497404434, "grad_norm": 0.029104549437761307, "learning_rate": 4.70852033446894e-05, "loss": 0.002, "step": 220250 }, { "epoch": 25.99103350637093, "grad_norm": 0.13916058838367462, "learning_rate": 4.706665902232315e-05, "loss": 0.0025, "step": 220300 }, { "epoch": 25.996932515337424, "grad_norm": 0.09224116802215576, "learning_rate": 4.704811510484251e-05, "loss": 0.0024, "step": 220350 }, { "epoch": 26.002831524303918, "grad_norm": 0.20346419513225555, "learning_rate": 4.702957159480707e-05, "loss": 0.0025, "step": 220400 }, { "epoch": 26.00873053327041, "grad_norm": 0.007135641295462847, "learning_rate": 4.701102849477637e-05, "loss": 0.0032, "step": 220450 }, { "epoch": 26.014629542236904, "grad_norm": 0.058157894760370255, "learning_rate": 4.699248580730991e-05, "loss": 0.0019, "step": 220500 }, { "epoch": 26.020528551203398, "grad_norm": 0.13698574900627136, "learning_rate": 4.6973943534967075e-05, "loss": 0.0023, "step": 220550 }, { "epoch": 26.026427560169893, "grad_norm": 0.011353322304785252, "learning_rate": 4.695540168030727e-05, "loss": 0.0021, "step": 220600 }, { "epoch": 26.032326569136384, "grad_norm": 0.08313658833503723, "learning_rate": 4.69368602458898e-05, "loss": 0.0026, "step": 220650 }, { "epoch": 26.038225578102878, "grad_norm": 0.014071330428123474, "learning_rate": 4.691831923427391e-05, "loss": 0.0033, "step": 220700 }, { "epoch": 26.044124587069373, "grad_norm": 0.12208976596593857, "learning_rate": 4.6899778648018796e-05, "loss": 0.0014, "step": 220750 }, { "epoch": 26.050023596035867, "grad_norm": 0.032701052725315094, "learning_rate": 4.6881238489683596e-05, "loss": 0.0022, "step": 220800 }, { "epoch": 26.05592260500236, "grad_norm": 0.017640087753534317, "learning_rate": 4.6862698761827364e-05, "loss": 0.0027, "step": 220850 }, { "epoch": 26.061821613968853, "grad_norm": 0.037321511656045914, "learning_rate": 4.684415946700916e-05, "loss": 0.0033, "step": 220900 }, { "epoch": 26.067720622935347, "grad_norm": 0.007154776714742184, "learning_rate": 4.682562060778791e-05, "loss": 0.002, "step": 220950 }, { "epoch": 26.073619631901842, "grad_norm": 0.01070210337638855, "learning_rate": 4.6807082186722516e-05, "loss": 0.0024, "step": 221000 }, { "epoch": 26.073619631901842, "eval_cer": 0.08347676419965576, "eval_loss": 0.0003517703153192997, "eval_runtime": 2.0861, "eval_samples_per_second": 47.935, "eval_steps_per_second": 1.917, "eval_wer": 0.26, "step": 221000 }, { "epoch": 26.079518640868333, "grad_norm": 0.29344967007637024, "learning_rate": 4.678854420637183e-05, "loss": 0.0018, "step": 221050 }, { "epoch": 26.085417649834827, "grad_norm": 0.09184079617261887, "learning_rate": 4.67700066692946e-05, "loss": 0.002, "step": 221100 }, { "epoch": 26.091316658801322, "grad_norm": 0.024261845275759697, "learning_rate": 4.675146957804955e-05, "loss": 0.0021, "step": 221150 }, { "epoch": 26.097215667767816, "grad_norm": 0.021538952365517616, "learning_rate": 4.673293293519535e-05, "loss": 0.0021, "step": 221200 }, { "epoch": 26.103114676734307, "grad_norm": 0.09714313596487045, "learning_rate": 4.671439674329057e-05, "loss": 0.0022, "step": 221250 }, { "epoch": 26.109013685700802, "grad_norm": 0.024744346737861633, "learning_rate": 4.669586100489376e-05, "loss": 0.0019, "step": 221300 }, { "epoch": 26.114912694667296, "grad_norm": 0.015508783049881458, "learning_rate": 4.667732572256337e-05, "loss": 0.0028, "step": 221350 }, { "epoch": 26.12081170363379, "grad_norm": 0.004334617871791124, "learning_rate": 4.6658790898857805e-05, "loss": 0.0023, "step": 221400 }, { "epoch": 26.126710712600282, "grad_norm": 0.11596301198005676, "learning_rate": 4.6640256536335395e-05, "loss": 0.0027, "step": 221450 }, { "epoch": 26.132609721566777, "grad_norm": 0.05420698970556259, "learning_rate": 4.662172263755443e-05, "loss": 0.0021, "step": 221500 }, { "epoch": 26.13850873053327, "grad_norm": 0.10146130621433258, "learning_rate": 4.6603189205073125e-05, "loss": 0.0024, "step": 221550 }, { "epoch": 26.144407739499766, "grad_norm": 0.15671993792057037, "learning_rate": 4.658465624144963e-05, "loss": 0.0026, "step": 221600 }, { "epoch": 26.150306748466257, "grad_norm": 0.02369624376296997, "learning_rate": 4.656612374924202e-05, "loss": 0.0025, "step": 221650 }, { "epoch": 26.15620575743275, "grad_norm": 0.018373653292655945, "learning_rate": 4.654759173100832e-05, "loss": 0.0016, "step": 221700 }, { "epoch": 26.162104766399246, "grad_norm": 0.008484087884426117, "learning_rate": 4.6529060189306475e-05, "loss": 0.003, "step": 221750 }, { "epoch": 26.16800377536574, "grad_norm": 0.35108810663223267, "learning_rate": 4.651052912669438e-05, "loss": 0.003, "step": 221800 }, { "epoch": 26.17390278433223, "grad_norm": 0.0224415585398674, "learning_rate": 4.649199854572984e-05, "loss": 0.0028, "step": 221850 }, { "epoch": 26.179801793298726, "grad_norm": 0.12293001264333725, "learning_rate": 4.647346844897062e-05, "loss": 0.0018, "step": 221900 }, { "epoch": 26.18570080226522, "grad_norm": 0.16488812863826752, "learning_rate": 4.645493883897441e-05, "loss": 0.0017, "step": 221950 }, { "epoch": 26.191599811231715, "grad_norm": 0.2428424209356308, "learning_rate": 4.643640971829883e-05, "loss": 0.0025, "step": 222000 }, { "epoch": 26.191599811231715, "eval_cer": 0.08605851979345955, "eval_loss": 0.000772296916693449, "eval_runtime": 2.0104, "eval_samples_per_second": 49.741, "eval_steps_per_second": 1.99, "eval_wer": 0.27, "step": 222000 }, { "epoch": 26.197498820198206, "grad_norm": 0.09457575529813766, "learning_rate": 4.641788108950143e-05, "loss": 0.0023, "step": 222050 }, { "epoch": 26.2033978291647, "grad_norm": 0.15636104345321655, "learning_rate": 4.63993529551397e-05, "loss": 0.0037, "step": 222100 }, { "epoch": 26.209296838131195, "grad_norm": 0.08998030424118042, "learning_rate": 4.638082531777105e-05, "loss": 0.0028, "step": 222150 }, { "epoch": 26.21519584709769, "grad_norm": 0.007676107808947563, "learning_rate": 4.636229817995281e-05, "loss": 0.002, "step": 222200 }, { "epoch": 26.22109485606418, "grad_norm": 0.1848790943622589, "learning_rate": 4.63437715442423e-05, "loss": 0.002, "step": 222250 }, { "epoch": 26.226993865030675, "grad_norm": 0.04155666381120682, "learning_rate": 4.63252454131967e-05, "loss": 0.0017, "step": 222300 }, { "epoch": 26.23289287399717, "grad_norm": 0.0835743173956871, "learning_rate": 4.6306719789373165e-05, "loss": 0.0021, "step": 222350 }, { "epoch": 26.23879188296366, "grad_norm": 0.08749467134475708, "learning_rate": 4.628819467532876e-05, "loss": 0.0022, "step": 222400 }, { "epoch": 26.244690891930155, "grad_norm": 0.016153208911418915, "learning_rate": 4.626967007362048e-05, "loss": 0.0018, "step": 222450 }, { "epoch": 26.25058990089665, "grad_norm": 0.029498204588890076, "learning_rate": 4.625114598680525e-05, "loss": 0.0016, "step": 222500 }, { "epoch": 26.256488909863144, "grad_norm": 0.11572325229644775, "learning_rate": 4.623262241743994e-05, "loss": 0.0022, "step": 222550 }, { "epoch": 26.262387918829635, "grad_norm": 0.0022231275215744972, "learning_rate": 4.621409936808134e-05, "loss": 0.0031, "step": 222600 }, { "epoch": 26.26828692779613, "grad_norm": 0.02411244437098503, "learning_rate": 4.619557684128615e-05, "loss": 0.0028, "step": 222650 }, { "epoch": 26.274185936762624, "grad_norm": 0.11810406297445297, "learning_rate": 4.6177054839611024e-05, "loss": 0.0017, "step": 222700 }, { "epoch": 26.28008494572912, "grad_norm": 0.14079992473125458, "learning_rate": 4.615853336561252e-05, "loss": 0.0029, "step": 222750 }, { "epoch": 26.28598395469561, "grad_norm": 0.2624160945415497, "learning_rate": 4.614001242184714e-05, "loss": 0.0034, "step": 222800 }, { "epoch": 26.291882963662104, "grad_norm": 0.01746271178126335, "learning_rate": 4.612149201087132e-05, "loss": 0.0019, "step": 222850 }, { "epoch": 26.2977819726286, "grad_norm": 0.00723037077113986, "learning_rate": 4.61029721352414e-05, "loss": 0.0017, "step": 222900 }, { "epoch": 26.303680981595093, "grad_norm": 0.11639261990785599, "learning_rate": 4.6084452797513664e-05, "loss": 0.0017, "step": 222950 }, { "epoch": 26.309579990561584, "grad_norm": 0.05044050142168999, "learning_rate": 4.6065934000244307e-05, "loss": 0.0018, "step": 223000 }, { "epoch": 26.309579990561584, "eval_cer": 0.08347676419965576, "eval_loss": 0.0004250677302479744, "eval_runtime": 2.0178, "eval_samples_per_second": 49.56, "eval_steps_per_second": 1.982, "eval_wer": 0.26, "step": 223000 }, { "epoch": 26.31547899952808, "grad_norm": 0.019527986645698547, "learning_rate": 4.604741574598945e-05, "loss": 0.0019, "step": 223050 }, { "epoch": 26.321378008494573, "grad_norm": 0.19253869354724884, "learning_rate": 4.6028898037305144e-05, "loss": 0.0018, "step": 223100 }, { "epoch": 26.327277017461068, "grad_norm": 0.0012504345504567027, "learning_rate": 4.6010380876747386e-05, "loss": 0.0019, "step": 223150 }, { "epoch": 26.33317602642756, "grad_norm": 0.008273559622466564, "learning_rate": 4.599186426687207e-05, "loss": 0.002, "step": 223200 }, { "epoch": 26.339075035394053, "grad_norm": 0.0027208938263356686, "learning_rate": 4.5973348210235014e-05, "loss": 0.0021, "step": 223250 }, { "epoch": 26.344974044360548, "grad_norm": 0.11420384049415588, "learning_rate": 4.5954832709391994e-05, "loss": 0.0024, "step": 223300 }, { "epoch": 26.350873053327042, "grad_norm": 0.023884767666459084, "learning_rate": 4.593631776689865e-05, "loss": 0.0024, "step": 223350 }, { "epoch": 26.356772062293533, "grad_norm": 0.004837725777179003, "learning_rate": 4.5917803385310596e-05, "loss": 0.0032, "step": 223400 }, { "epoch": 26.362671071260028, "grad_norm": 0.2116387039422989, "learning_rate": 4.589928956718335e-05, "loss": 0.0031, "step": 223450 }, { "epoch": 26.368570080226522, "grad_norm": 0.14986968040466309, "learning_rate": 4.588077631507235e-05, "loss": 0.002, "step": 223500 }, { "epoch": 26.374469089193017, "grad_norm": 0.13738521933555603, "learning_rate": 4.5862263631532934e-05, "loss": 0.0029, "step": 223550 }, { "epoch": 26.380368098159508, "grad_norm": 0.0011826810659840703, "learning_rate": 4.584375151912042e-05, "loss": 0.0018, "step": 223600 }, { "epoch": 26.386267107126002, "grad_norm": 0.09837210923433304, "learning_rate": 4.582523998039001e-05, "loss": 0.0015, "step": 223650 }, { "epoch": 26.392166116092497, "grad_norm": 0.293526828289032, "learning_rate": 4.580672901789681e-05, "loss": 0.0027, "step": 223700 }, { "epoch": 26.39806512505899, "grad_norm": 0.20667658746242523, "learning_rate": 4.578821863419588e-05, "loss": 0.0023, "step": 223750 }, { "epoch": 26.403964134025482, "grad_norm": 0.027077479287981987, "learning_rate": 4.5769708831842196e-05, "loss": 0.0018, "step": 223800 }, { "epoch": 26.409863142991977, "grad_norm": 0.11106430739164352, "learning_rate": 4.575119961339061e-05, "loss": 0.0027, "step": 223850 }, { "epoch": 26.41576215195847, "grad_norm": 0.004322267137467861, "learning_rate": 4.5732690981395964e-05, "loss": 0.0018, "step": 223900 }, { "epoch": 26.421661160924966, "grad_norm": 0.06829892843961716, "learning_rate": 4.571418293841297e-05, "loss": 0.0026, "step": 223950 }, { "epoch": 26.427560169891457, "grad_norm": 0.0841703712940216, "learning_rate": 4.569567548699627e-05, "loss": 0.0027, "step": 224000 }, { "epoch": 26.427560169891457, "eval_cer": 0.08777969018932874, "eval_loss": 0.003339051268994808, "eval_runtime": 2.0531, "eval_samples_per_second": 48.706, "eval_steps_per_second": 1.948, "eval_wer": 0.27, "step": 224000 }, { "epoch": 26.43345917885795, "grad_norm": 0.010970455594360828, "learning_rate": 4.567716862970043e-05, "loss": 0.0033, "step": 224050 }, { "epoch": 26.439358187824446, "grad_norm": 0.01770102046430111, "learning_rate": 4.5658662369079916e-05, "loss": 0.0021, "step": 224100 }, { "epoch": 26.44525719679094, "grad_norm": 0.12285184115171432, "learning_rate": 4.564015670768913e-05, "loss": 0.0025, "step": 224150 }, { "epoch": 26.45115620575743, "grad_norm": 0.09390042722225189, "learning_rate": 4.562165164808241e-05, "loss": 0.0027, "step": 224200 }, { "epoch": 26.457055214723926, "grad_norm": 0.03929469361901283, "learning_rate": 4.560314719281396e-05, "loss": 0.0018, "step": 224250 }, { "epoch": 26.46295422369042, "grad_norm": 0.13774222135543823, "learning_rate": 4.558464334443794e-05, "loss": 0.0023, "step": 224300 }, { "epoch": 26.468853232656915, "grad_norm": 0.2732272148132324, "learning_rate": 4.556614010550841e-05, "loss": 0.0034, "step": 224350 }, { "epoch": 26.474752241623406, "grad_norm": 0.003538960125297308, "learning_rate": 4.554763747857936e-05, "loss": 0.0031, "step": 224400 }, { "epoch": 26.4806512505899, "grad_norm": 0.0755028948187828, "learning_rate": 4.552913546620466e-05, "loss": 0.0026, "step": 224450 }, { "epoch": 26.486550259556395, "grad_norm": 0.08917535096406937, "learning_rate": 4.551063407093815e-05, "loss": 0.0024, "step": 224500 }, { "epoch": 26.49244926852289, "grad_norm": 0.042976923286914825, "learning_rate": 4.549213329533355e-05, "loss": 0.0017, "step": 224550 }, { "epoch": 26.49834827748938, "grad_norm": 0.010916617698967457, "learning_rate": 4.5473633141944495e-05, "loss": 0.0028, "step": 224600 }, { "epoch": 26.504247286455875, "grad_norm": 0.02047913335263729, "learning_rate": 4.545513361332454e-05, "loss": 0.0022, "step": 224650 }, { "epoch": 26.51014629542237, "grad_norm": 0.028303150087594986, "learning_rate": 4.543663471202715e-05, "loss": 0.0027, "step": 224700 }, { "epoch": 26.51604530438886, "grad_norm": 0.07189327478408813, "learning_rate": 4.54181364406057e-05, "loss": 0.0027, "step": 224750 }, { "epoch": 26.521944313355355, "grad_norm": 0.0490654818713665, "learning_rate": 4.53996388016135e-05, "loss": 0.0032, "step": 224800 }, { "epoch": 26.52784332232185, "grad_norm": 0.0049583325162529945, "learning_rate": 4.538114179760376e-05, "loss": 0.0034, "step": 224850 }, { "epoch": 26.533742331288344, "grad_norm": 0.019416263327002525, "learning_rate": 4.536264543112958e-05, "loss": 0.0022, "step": 224900 }, { "epoch": 26.53964134025484, "grad_norm": 0.21827244758605957, "learning_rate": 4.534414970474401e-05, "loss": 0.003, "step": 224950 }, { "epoch": 26.54554034922133, "grad_norm": 0.020775822922587395, "learning_rate": 4.532565462099999e-05, "loss": 0.0026, "step": 225000 }, { "epoch": 26.54554034922133, "eval_cer": 0.08777969018932874, "eval_loss": 0.0022942409850656986, "eval_runtime": 2.0189, "eval_samples_per_second": 49.532, "eval_steps_per_second": 1.981, "eval_wer": 0.27, "step": 225000 }, { "epoch": 26.551439358187825, "grad_norm": 0.02578655630350113, "learning_rate": 4.530716018245038e-05, "loss": 0.0024, "step": 225050 }, { "epoch": 26.55733836715432, "grad_norm": 0.05833723023533821, "learning_rate": 4.528866639164793e-05, "loss": 0.0021, "step": 225100 }, { "epoch": 26.56323737612081, "grad_norm": 0.10288926959037781, "learning_rate": 4.527017325114532e-05, "loss": 0.0026, "step": 225150 }, { "epoch": 26.569136385087305, "grad_norm": 0.14990286529064178, "learning_rate": 4.525168076349513e-05, "loss": 0.0024, "step": 225200 }, { "epoch": 26.5750353940538, "grad_norm": 0.08787843585014343, "learning_rate": 4.523318893124988e-05, "loss": 0.0026, "step": 225250 }, { "epoch": 26.580934403020294, "grad_norm": 0.07117478549480438, "learning_rate": 4.521469775696196e-05, "loss": 0.0024, "step": 225300 }, { "epoch": 26.586833411986785, "grad_norm": 0.12244179099798203, "learning_rate": 4.519620724318369e-05, "loss": 0.003, "step": 225350 }, { "epoch": 26.59273242095328, "grad_norm": 0.14156660437583923, "learning_rate": 4.517771739246729e-05, "loss": 0.0022, "step": 225400 }, { "epoch": 26.598631429919774, "grad_norm": 0.21610644459724426, "learning_rate": 4.515922820736489e-05, "loss": 0.0025, "step": 225450 }, { "epoch": 26.60453043888627, "grad_norm": 0.09031306207180023, "learning_rate": 4.514073969042853e-05, "loss": 0.0022, "step": 225500 }, { "epoch": 26.61042944785276, "grad_norm": 0.23297332227230072, "learning_rate": 4.5122251844210165e-05, "loss": 0.0022, "step": 225550 }, { "epoch": 26.616328456819254, "grad_norm": 0.36072927713394165, "learning_rate": 4.510376467126165e-05, "loss": 0.0026, "step": 225600 }, { "epoch": 26.62222746578575, "grad_norm": 0.24555902183055878, "learning_rate": 4.508527817413475e-05, "loss": 0.0029, "step": 225650 }, { "epoch": 26.628126474752243, "grad_norm": 0.12281373888254166, "learning_rate": 4.506679235538113e-05, "loss": 0.0022, "step": 225700 }, { "epoch": 26.634025483718734, "grad_norm": 0.05928319692611694, "learning_rate": 4.504830721755236e-05, "loss": 0.0024, "step": 225750 }, { "epoch": 26.63992449268523, "grad_norm": 0.1426519900560379, "learning_rate": 4.502982276319992e-05, "loss": 0.0028, "step": 225800 }, { "epoch": 26.645823501651723, "grad_norm": 0.04399280250072479, "learning_rate": 4.501133899487522e-05, "loss": 0.0036, "step": 225850 }, { "epoch": 26.651722510618217, "grad_norm": 0.16064898669719696, "learning_rate": 4.4992855915129535e-05, "loss": 0.0032, "step": 225900 }, { "epoch": 26.65762151958471, "grad_norm": 0.1420368254184723, "learning_rate": 4.497437352651406e-05, "loss": 0.004, "step": 225950 }, { "epoch": 26.663520528551203, "grad_norm": 0.0903027132153511, "learning_rate": 4.495589183157991e-05, "loss": 0.0019, "step": 226000 }, { "epoch": 26.663520528551203, "eval_cer": 0.08347676419965576, "eval_loss": 7.781814929330721e-05, "eval_runtime": 2.0058, "eval_samples_per_second": 49.856, "eval_steps_per_second": 1.994, "eval_wer": 0.26, "step": 226000 }, { "epoch": 26.669419537517697, "grad_norm": 0.043174583464860916, "learning_rate": 4.4937410832878087e-05, "loss": 0.0023, "step": 226050 }, { "epoch": 26.675318546484192, "grad_norm": 0.08255447447299957, "learning_rate": 4.491893053295948e-05, "loss": 0.0025, "step": 226100 }, { "epoch": 26.681217555450683, "grad_norm": 0.1422061175107956, "learning_rate": 4.4900450934374944e-05, "loss": 0.0026, "step": 226150 }, { "epoch": 26.687116564417177, "grad_norm": 0.09774792939424515, "learning_rate": 4.488197203967517e-05, "loss": 0.0023, "step": 226200 }, { "epoch": 26.693015573383672, "grad_norm": 0.12366823107004166, "learning_rate": 4.486349385141078e-05, "loss": 0.0029, "step": 226250 }, { "epoch": 26.698914582350167, "grad_norm": 0.022855281829833984, "learning_rate": 4.48450163721323e-05, "loss": 0.0023, "step": 226300 }, { "epoch": 26.704813591316658, "grad_norm": 0.060978975147008896, "learning_rate": 4.482653960439016e-05, "loss": 0.0027, "step": 226350 }, { "epoch": 26.710712600283152, "grad_norm": 0.07343766838312149, "learning_rate": 4.480806355073467e-05, "loss": 0.0018, "step": 226400 }, { "epoch": 26.716611609249647, "grad_norm": 0.0027839376125484705, "learning_rate": 4.4789588213716065e-05, "loss": 0.0019, "step": 226450 }, { "epoch": 26.72251061821614, "grad_norm": 0.021975303068757057, "learning_rate": 4.477111359588449e-05, "loss": 0.0025, "step": 226500 }, { "epoch": 26.728409627182632, "grad_norm": 0.06520712375640869, "learning_rate": 4.475263969978995e-05, "loss": 0.0029, "step": 226550 }, { "epoch": 26.734308636149127, "grad_norm": 0.04295315220952034, "learning_rate": 4.47341665279824e-05, "loss": 0.0021, "step": 226600 }, { "epoch": 26.74020764511562, "grad_norm": 0.1902911216020584, "learning_rate": 4.471569408301166e-05, "loss": 0.0025, "step": 226650 }, { "epoch": 26.746106654082116, "grad_norm": 0.010978609323501587, "learning_rate": 4.469722236742745e-05, "loss": 0.0026, "step": 226700 }, { "epoch": 26.752005663048607, "grad_norm": 0.013944494538009167, "learning_rate": 4.467875138377942e-05, "loss": 0.0028, "step": 226750 }, { "epoch": 26.7579046720151, "grad_norm": 0.007553169969469309, "learning_rate": 4.466028113461708e-05, "loss": 0.0013, "step": 226800 }, { "epoch": 26.763803680981596, "grad_norm": 0.004640303086489439, "learning_rate": 4.4641811622489846e-05, "loss": 0.0028, "step": 226850 }, { "epoch": 26.76970268994809, "grad_norm": 0.01993761584162712, "learning_rate": 4.462334284994708e-05, "loss": 0.0025, "step": 226900 }, { "epoch": 26.77560169891458, "grad_norm": 0.03762796148657799, "learning_rate": 4.460487481953798e-05, "loss": 0.0022, "step": 226950 }, { "epoch": 26.781500707881076, "grad_norm": 0.022100863978266716, "learning_rate": 4.458640753381167e-05, "loss": 0.0026, "step": 227000 }, { "epoch": 26.781500707881076, "eval_cer": 0.08347676419965576, "eval_loss": 9.704372496344149e-05, "eval_runtime": 2.0461, "eval_samples_per_second": 48.873, "eval_steps_per_second": 1.955, "eval_wer": 0.26, "step": 227000 }, { "epoch": 26.78739971684757, "grad_norm": 0.009939328767359257, "learning_rate": 4.456794099531717e-05, "loss": 0.0018, "step": 227050 }, { "epoch": 26.793298725814065, "grad_norm": 0.006452625617384911, "learning_rate": 4.454947520660339e-05, "loss": 0.003, "step": 227100 }, { "epoch": 26.799197734780556, "grad_norm": 0.0869249701499939, "learning_rate": 4.4531010170219136e-05, "loss": 0.0027, "step": 227150 }, { "epoch": 26.80509674374705, "grad_norm": 0.05573615804314613, "learning_rate": 4.451254588871313e-05, "loss": 0.0038, "step": 227200 }, { "epoch": 26.810995752713545, "grad_norm": 0.04412493854761124, "learning_rate": 4.449408236463397e-05, "loss": 0.003, "step": 227250 }, { "epoch": 26.81689476168004, "grad_norm": 0.14197583496570587, "learning_rate": 4.447561960053016e-05, "loss": 0.0024, "step": 227300 }, { "epoch": 26.82279377064653, "grad_norm": 0.08372156322002411, "learning_rate": 4.445715759895008e-05, "loss": 0.0032, "step": 227350 }, { "epoch": 26.828692779613025, "grad_norm": 0.051899343729019165, "learning_rate": 4.443869636244203e-05, "loss": 0.0031, "step": 227400 }, { "epoch": 26.83459178857952, "grad_norm": 0.035579800605773926, "learning_rate": 4.442023589355417e-05, "loss": 0.0022, "step": 227450 }, { "epoch": 26.84049079754601, "grad_norm": 0.144632026553154, "learning_rate": 4.4401776194834613e-05, "loss": 0.0026, "step": 227500 }, { "epoch": 26.846389806512505, "grad_norm": 0.046326059848070145, "learning_rate": 4.438331726883131e-05, "loss": 0.0022, "step": 227550 }, { "epoch": 26.852288815479, "grad_norm": 0.006084148772060871, "learning_rate": 4.436485911809212e-05, "loss": 0.0018, "step": 227600 }, { "epoch": 26.858187824445494, "grad_norm": 0.2224143147468567, "learning_rate": 4.434640174516481e-05, "loss": 0.003, "step": 227650 }, { "epoch": 26.864086833411985, "grad_norm": 0.008004355244338512, "learning_rate": 4.432794515259703e-05, "loss": 0.002, "step": 227700 }, { "epoch": 26.86998584237848, "grad_norm": 0.0657331719994545, "learning_rate": 4.430948934293631e-05, "loss": 0.0022, "step": 227750 }, { "epoch": 26.875884851344974, "grad_norm": 0.16018815338611603, "learning_rate": 4.429103431873009e-05, "loss": 0.0019, "step": 227800 }, { "epoch": 26.88178386031147, "grad_norm": 0.015558620914816856, "learning_rate": 4.42725800825257e-05, "loss": 0.0024, "step": 227850 }, { "epoch": 26.88768286927796, "grad_norm": 0.01814964786171913, "learning_rate": 4.425412663687035e-05, "loss": 0.0019, "step": 227900 }, { "epoch": 26.893581878244454, "grad_norm": 0.005705071613192558, "learning_rate": 4.423567398431116e-05, "loss": 0.0028, "step": 227950 }, { "epoch": 26.89948088721095, "grad_norm": 0.017632335424423218, "learning_rate": 4.421722212739511e-05, "loss": 0.0026, "step": 228000 }, { "epoch": 26.89948088721095, "eval_cer": 0.08347676419965576, "eval_loss": 5.45218754268717e-05, "eval_runtime": 2.0507, "eval_samples_per_second": 48.763, "eval_steps_per_second": 1.951, "eval_wer": 0.26, "step": 228000 }, { "epoch": 26.905379896177443, "grad_norm": 0.13823841512203217, "learning_rate": 4.419877106866909e-05, "loss": 0.0022, "step": 228050 }, { "epoch": 26.911278905143934, "grad_norm": 0.0755995586514473, "learning_rate": 4.418032081067989e-05, "loss": 0.0029, "step": 228100 }, { "epoch": 26.91717791411043, "grad_norm": 0.08642518520355225, "learning_rate": 4.416187135597417e-05, "loss": 0.0025, "step": 228150 }, { "epoch": 26.923076923076923, "grad_norm": 0.15304699540138245, "learning_rate": 4.414342270709848e-05, "loss": 0.0026, "step": 228200 }, { "epoch": 26.928975932043418, "grad_norm": 0.21879836916923523, "learning_rate": 4.412497486659929e-05, "loss": 0.0026, "step": 228250 }, { "epoch": 26.93487494100991, "grad_norm": 0.27022695541381836, "learning_rate": 4.410652783702292e-05, "loss": 0.0023, "step": 228300 }, { "epoch": 26.940773949976403, "grad_norm": 0.009332247078418732, "learning_rate": 4.408808162091558e-05, "loss": 0.0026, "step": 228350 }, { "epoch": 26.946672958942898, "grad_norm": 0.10012940317392349, "learning_rate": 4.4069636220823394e-05, "loss": 0.0024, "step": 228400 }, { "epoch": 26.952571967909392, "grad_norm": 0.001787978340871632, "learning_rate": 4.4051191639292355e-05, "loss": 0.0026, "step": 228450 }, { "epoch": 26.958470976875883, "grad_norm": 0.03450939059257507, "learning_rate": 4.403274787886833e-05, "loss": 0.0027, "step": 228500 }, { "epoch": 26.964369985842378, "grad_norm": 0.052823156118392944, "learning_rate": 4.401430494209713e-05, "loss": 0.0031, "step": 228550 }, { "epoch": 26.970268994808873, "grad_norm": 0.04474564269185066, "learning_rate": 4.399586283152437e-05, "loss": 0.003, "step": 228600 }, { "epoch": 26.976168003775367, "grad_norm": 0.128171905875206, "learning_rate": 4.397742154969562e-05, "loss": 0.0019, "step": 228650 }, { "epoch": 26.982067012741858, "grad_norm": 0.31202268600463867, "learning_rate": 4.395898109915629e-05, "loss": 0.0026, "step": 228700 }, { "epoch": 26.987966021708353, "grad_norm": 0.048667702823877335, "learning_rate": 4.3940541482451714e-05, "loss": 0.0031, "step": 228750 }, { "epoch": 26.993865030674847, "grad_norm": 0.011085529811680317, "learning_rate": 4.392210270212706e-05, "loss": 0.0032, "step": 228800 }, { "epoch": 26.99976403964134, "grad_norm": 0.13777713477611542, "learning_rate": 4.3903664760727445e-05, "loss": 0.0023, "step": 228850 }, { "epoch": 27.005663048607833, "grad_norm": 0.13473817706108093, "learning_rate": 4.388522766079782e-05, "loss": 0.0024, "step": 228900 }, { "epoch": 27.011562057574327, "grad_norm": 0.016332020983099937, "learning_rate": 4.3866791404883037e-05, "loss": 0.0022, "step": 228950 }, { "epoch": 27.01746106654082, "grad_norm": 0.0595366545021534, "learning_rate": 4.384835599552783e-05, "loss": 0.0027, "step": 229000 }, { "epoch": 27.01746106654082, "eval_cer": 0.08347676419965576, "eval_loss": 5.3248517360771075e-05, "eval_runtime": 2.0196, "eval_samples_per_second": 49.514, "eval_steps_per_second": 1.981, "eval_wer": 0.26, "step": 229000 }, { "epoch": 27.023360075507316, "grad_norm": 0.0488116554915905, "learning_rate": 4.3829921435276824e-05, "loss": 0.0021, "step": 229050 }, { "epoch": 27.029259084473807, "grad_norm": 0.0006278500077314675, "learning_rate": 4.3811487726674495e-05, "loss": 0.0016, "step": 229100 }, { "epoch": 27.0351580934403, "grad_norm": 0.07988902926445007, "learning_rate": 4.379305487226526e-05, "loss": 0.0022, "step": 229150 }, { "epoch": 27.041057102406796, "grad_norm": 0.09466777741909027, "learning_rate": 4.377462287459337e-05, "loss": 0.0014, "step": 229200 }, { "epoch": 27.04695611137329, "grad_norm": 0.016203822568058968, "learning_rate": 4.375619173620297e-05, "loss": 0.0017, "step": 229250 }, { "epoch": 27.052855120339782, "grad_norm": 0.0034310079645365477, "learning_rate": 4.373776145963809e-05, "loss": 0.0017, "step": 229300 }, { "epoch": 27.058754129306276, "grad_norm": 0.17395882308483124, "learning_rate": 4.371933204744264e-05, "loss": 0.0018, "step": 229350 }, { "epoch": 27.06465313827277, "grad_norm": 0.030394962057471275, "learning_rate": 4.37009035021604e-05, "loss": 0.0025, "step": 229400 }, { "epoch": 27.070552147239265, "grad_norm": 0.1376882642507553, "learning_rate": 4.368247582633506e-05, "loss": 0.0017, "step": 229450 }, { "epoch": 27.076451156205756, "grad_norm": 0.008759966120123863, "learning_rate": 4.366404902251016e-05, "loss": 0.0029, "step": 229500 }, { "epoch": 27.08235016517225, "grad_norm": 0.08632684499025345, "learning_rate": 4.364562309322913e-05, "loss": 0.0018, "step": 229550 }, { "epoch": 27.088249174138745, "grad_norm": 0.10790404677391052, "learning_rate": 4.362719804103528e-05, "loss": 0.0021, "step": 229600 }, { "epoch": 27.09414818310524, "grad_norm": 0.04962154105305672, "learning_rate": 4.36087738684718e-05, "loss": 0.0015, "step": 229650 }, { "epoch": 27.10004719207173, "grad_norm": 0.0008494952926412225, "learning_rate": 4.359035057808174e-05, "loss": 0.0023, "step": 229700 }, { "epoch": 27.105946201038225, "grad_norm": 0.0031228309962898493, "learning_rate": 4.357192817240806e-05, "loss": 0.0022, "step": 229750 }, { "epoch": 27.11184521000472, "grad_norm": 0.09525559097528458, "learning_rate": 4.3553506653993595e-05, "loss": 0.0018, "step": 229800 }, { "epoch": 27.117744218971215, "grad_norm": 0.06647945940494537, "learning_rate": 4.353508602538102e-05, "loss": 0.002, "step": 229850 }, { "epoch": 27.123643227937706, "grad_norm": 0.05640225112438202, "learning_rate": 4.351666628911295e-05, "loss": 0.0018, "step": 229900 }, { "epoch": 27.1295422369042, "grad_norm": 0.025609014555811882, "learning_rate": 4.34982474477318e-05, "loss": 0.0015, "step": 229950 }, { "epoch": 27.135441245870695, "grad_norm": 0.1541026085615158, "learning_rate": 4.347982950377992e-05, "loss": 0.0024, "step": 230000 }, { "epoch": 27.135441245870695, "eval_cer": 0.08347676419965576, "eval_loss": 0.00015598340542055666, "eval_runtime": 2.0273, "eval_samples_per_second": 49.326, "eval_steps_per_second": 1.973, "eval_wer": 0.26, "step": 230000 }, { "epoch": 27.141340254837186, "grad_norm": 0.24144026637077332, "learning_rate": 4.346141245979952e-05, "loss": 0.0032, "step": 230050 }, { "epoch": 27.14723926380368, "grad_norm": 0.05648206174373627, "learning_rate": 4.3442996318332674e-05, "loss": 0.0014, "step": 230100 }, { "epoch": 27.153138272770175, "grad_norm": 0.046327367424964905, "learning_rate": 4.342458108192133e-05, "loss": 0.002, "step": 230150 }, { "epoch": 27.15903728173667, "grad_norm": 0.01470743864774704, "learning_rate": 4.340616675310735e-05, "loss": 0.002, "step": 230200 }, { "epoch": 27.16493629070316, "grad_norm": 0.25207751989364624, "learning_rate": 4.338775333443243e-05, "loss": 0.0024, "step": 230250 }, { "epoch": 27.170835299669655, "grad_norm": 0.015593297779560089, "learning_rate": 4.336934082843814e-05, "loss": 0.0023, "step": 230300 }, { "epoch": 27.17673430863615, "grad_norm": 0.07323654741048813, "learning_rate": 4.335092923766596e-05, "loss": 0.0029, "step": 230350 }, { "epoch": 27.182633317602644, "grad_norm": 0.004644942469894886, "learning_rate": 4.3332518564657196e-05, "loss": 0.0024, "step": 230400 }, { "epoch": 27.188532326569135, "grad_norm": 0.05201044678688049, "learning_rate": 4.331410881195305e-05, "loss": 0.0021, "step": 230450 }, { "epoch": 27.19443133553563, "grad_norm": 0.16143254935741425, "learning_rate": 4.3295699982094624e-05, "loss": 0.0031, "step": 230500 }, { "epoch": 27.200330344502124, "grad_norm": 0.05713813751935959, "learning_rate": 4.3277292077622856e-05, "loss": 0.0026, "step": 230550 }, { "epoch": 27.20622935346862, "grad_norm": 0.03483269736170769, "learning_rate": 4.325888510107856e-05, "loss": 0.0021, "step": 230600 }, { "epoch": 27.21212836243511, "grad_norm": 0.014085503295063972, "learning_rate": 4.324047905500243e-05, "loss": 0.0023, "step": 230650 }, { "epoch": 27.218027371401604, "grad_norm": 0.0715804174542427, "learning_rate": 4.3222073941935054e-05, "loss": 0.002, "step": 230700 }, { "epoch": 27.2239263803681, "grad_norm": 0.09350229054689407, "learning_rate": 4.3203669764416826e-05, "loss": 0.0021, "step": 230750 }, { "epoch": 27.229825389334593, "grad_norm": 0.06844506412744522, "learning_rate": 4.318526652498809e-05, "loss": 0.002, "step": 230800 }, { "epoch": 27.235724398301084, "grad_norm": 0.003745409194380045, "learning_rate": 4.3166864226189005e-05, "loss": 0.0027, "step": 230850 }, { "epoch": 27.24162340726758, "grad_norm": 0.006382802035659552, "learning_rate": 4.314846287055963e-05, "loss": 0.0021, "step": 230900 }, { "epoch": 27.247522416234073, "grad_norm": 0.027194904163479805, "learning_rate": 4.313006246063987e-05, "loss": 0.0019, "step": 230950 }, { "epoch": 27.253421425200568, "grad_norm": 0.09864602237939835, "learning_rate": 4.311166299896953e-05, "loss": 0.0018, "step": 231000 }, { "epoch": 27.253421425200568, "eval_cer": 0.08347676419965576, "eval_loss": 7.64078795327805e-05, "eval_runtime": 2.0715, "eval_samples_per_second": 48.275, "eval_steps_per_second": 1.931, "eval_wer": 0.26, "step": 231000 }, { "epoch": 27.25932043416706, "grad_norm": 0.0055065457709133625, "learning_rate": 4.309326448808823e-05, "loss": 0.0023, "step": 231050 }, { "epoch": 27.265219443133553, "grad_norm": 0.11235018819570541, "learning_rate": 4.307486693053553e-05, "loss": 0.0019, "step": 231100 }, { "epoch": 27.271118452100048, "grad_norm": 0.11381274461746216, "learning_rate": 4.3056470328850826e-05, "loss": 0.0024, "step": 231150 }, { "epoch": 27.277017461066542, "grad_norm": 0.25339990854263306, "learning_rate": 4.3038074685573356e-05, "loss": 0.0022, "step": 231200 }, { "epoch": 27.282916470033033, "grad_norm": 0.019223695620894432, "learning_rate": 4.3019680003242255e-05, "loss": 0.002, "step": 231250 }, { "epoch": 27.288815478999528, "grad_norm": 0.1118415892124176, "learning_rate": 4.300128628439653e-05, "loss": 0.0021, "step": 231300 }, { "epoch": 27.294714487966022, "grad_norm": 0.02481871098279953, "learning_rate": 4.298289353157503e-05, "loss": 0.002, "step": 231350 }, { "epoch": 27.300613496932517, "grad_norm": 0.2520940899848938, "learning_rate": 4.296450174731648e-05, "loss": 0.0019, "step": 231400 }, { "epoch": 27.306512505899008, "grad_norm": 0.051947277039289474, "learning_rate": 4.2946110934159515e-05, "loss": 0.002, "step": 231450 }, { "epoch": 27.312411514865502, "grad_norm": 0.010494095273315907, "learning_rate": 4.2927721094642545e-05, "loss": 0.0023, "step": 231500 }, { "epoch": 27.318310523831997, "grad_norm": 0.034099530428647995, "learning_rate": 4.290933223130394e-05, "loss": 0.0021, "step": 231550 }, { "epoch": 27.32420953279849, "grad_norm": 0.0021534443367272615, "learning_rate": 4.2890944346681885e-05, "loss": 0.0025, "step": 231600 }, { "epoch": 27.330108541764982, "grad_norm": 0.005630101542919874, "learning_rate": 4.287255744331443e-05, "loss": 0.0024, "step": 231650 }, { "epoch": 27.336007550731477, "grad_norm": 0.03658708557486534, "learning_rate": 4.2854171523739487e-05, "loss": 0.0029, "step": 231700 }, { "epoch": 27.34190655969797, "grad_norm": 0.11959823966026306, "learning_rate": 4.2835786590494866e-05, "loss": 0.002, "step": 231750 }, { "epoch": 27.347805568664466, "grad_norm": 0.08748908340930939, "learning_rate": 4.281740264611819e-05, "loss": 0.0038, "step": 231800 }, { "epoch": 27.353704577630957, "grad_norm": 0.001984336180612445, "learning_rate": 4.2799019693146997e-05, "loss": 0.0025, "step": 231850 }, { "epoch": 27.35960358659745, "grad_norm": 0.05304768308997154, "learning_rate": 4.278063773411866e-05, "loss": 0.0026, "step": 231900 }, { "epoch": 27.365502595563946, "grad_norm": 0.012435125187039375, "learning_rate": 4.276225677157041e-05, "loss": 0.0027, "step": 231950 }, { "epoch": 27.37140160453044, "grad_norm": 0.022778362035751343, "learning_rate": 4.274387680803936e-05, "loss": 0.0017, "step": 232000 }, { "epoch": 27.37140160453044, "eval_cer": 0.08347676419965576, "eval_loss": 0.0001133705663960427, "eval_runtime": 2.0781, "eval_samples_per_second": 48.122, "eval_steps_per_second": 1.925, "eval_wer": 0.26, "step": 232000 }, { "epoch": 27.37730061349693, "grad_norm": 0.053728025406599045, "learning_rate": 4.272549784606248e-05, "loss": 0.0023, "step": 232050 }, { "epoch": 27.383199622463426, "grad_norm": 0.19045287370681763, "learning_rate": 4.270711988817656e-05, "loss": 0.0023, "step": 232100 }, { "epoch": 27.38909863142992, "grad_norm": 0.012936616316437721, "learning_rate": 4.2688742936918334e-05, "loss": 0.002, "step": 232150 }, { "epoch": 27.394997640396415, "grad_norm": 0.3091238737106323, "learning_rate": 4.267036699482433e-05, "loss": 0.0032, "step": 232200 }, { "epoch": 27.400896649362906, "grad_norm": 0.03797324001789093, "learning_rate": 4.265199206443096e-05, "loss": 0.0017, "step": 232250 }, { "epoch": 27.4067956583294, "grad_norm": 0.11714563518762589, "learning_rate": 4.2633618148274493e-05, "loss": 0.0019, "step": 232300 }, { "epoch": 27.412694667295895, "grad_norm": 0.001484959851950407, "learning_rate": 4.261524524889106e-05, "loss": 0.0034, "step": 232350 }, { "epoch": 27.41859367626239, "grad_norm": 0.06451276689767838, "learning_rate": 4.259687336881663e-05, "loss": 0.0028, "step": 232400 }, { "epoch": 27.42449268522888, "grad_norm": 0.15884624421596527, "learning_rate": 4.2578502510587094e-05, "loss": 0.003, "step": 232450 }, { "epoch": 27.430391694195375, "grad_norm": 0.07100200653076172, "learning_rate": 4.256013267673813e-05, "loss": 0.0021, "step": 232500 }, { "epoch": 27.43629070316187, "grad_norm": 0.08638747781515121, "learning_rate": 4.254176386980532e-05, "loss": 0.002, "step": 232550 }, { "epoch": 27.44218971212836, "grad_norm": 0.07632289826869965, "learning_rate": 4.252339609232408e-05, "loss": 0.0023, "step": 232600 }, { "epoch": 27.448088721094855, "grad_norm": 0.013787735253572464, "learning_rate": 4.2505029346829695e-05, "loss": 0.0023, "step": 232650 }, { "epoch": 27.45398773006135, "grad_norm": 0.31319478154182434, "learning_rate": 4.2486663635857286e-05, "loss": 0.0025, "step": 232700 }, { "epoch": 27.459886739027844, "grad_norm": 0.14678668975830078, "learning_rate": 4.246829896194189e-05, "loss": 0.003, "step": 232750 }, { "epoch": 27.465785747994335, "grad_norm": 0.2287614941596985, "learning_rate": 4.244993532761834e-05, "loss": 0.0023, "step": 232800 }, { "epoch": 27.47168475696083, "grad_norm": 0.49487587809562683, "learning_rate": 4.2431572735421346e-05, "loss": 0.0022, "step": 232850 }, { "epoch": 27.477583765927324, "grad_norm": 0.1822696179151535, "learning_rate": 4.241321118788547e-05, "loss": 0.0027, "step": 232900 }, { "epoch": 27.48348277489382, "grad_norm": 0.008909459225833416, "learning_rate": 4.239485068754515e-05, "loss": 0.0026, "step": 232950 }, { "epoch": 27.48938178386031, "grad_norm": 0.3656896948814392, "learning_rate": 4.2376491236934636e-05, "loss": 0.0031, "step": 233000 }, { "epoch": 27.48938178386031, "eval_cer": 0.08347676419965576, "eval_loss": 0.00020943516574334353, "eval_runtime": 2.025, "eval_samples_per_second": 49.383, "eval_steps_per_second": 1.975, "eval_wer": 0.26, "step": 233000 }, { "epoch": 27.495280792826804, "grad_norm": 0.2851526439189911, "learning_rate": 4.235813283858808e-05, "loss": 0.0018, "step": 233050 }, { "epoch": 27.5011798017933, "grad_norm": 0.00903993472456932, "learning_rate": 4.233977549503949e-05, "loss": 0.0025, "step": 233100 }, { "epoch": 27.507078810759793, "grad_norm": 0.058574650436639786, "learning_rate": 4.2321419208822674e-05, "loss": 0.0025, "step": 233150 }, { "epoch": 27.512977819726284, "grad_norm": 0.4344273805618286, "learning_rate": 4.230306398247136e-05, "loss": 0.0023, "step": 233200 }, { "epoch": 27.51887682869278, "grad_norm": 0.00812013354152441, "learning_rate": 4.228470981851908e-05, "loss": 0.0032, "step": 233250 }, { "epoch": 27.524775837659273, "grad_norm": 0.007491777651011944, "learning_rate": 4.226635671949924e-05, "loss": 0.0017, "step": 233300 }, { "epoch": 27.530674846625768, "grad_norm": 0.20644670724868774, "learning_rate": 4.22480046879451e-05, "loss": 0.003, "step": 233350 }, { "epoch": 27.53657385559226, "grad_norm": 0.025374028831720352, "learning_rate": 4.2229653726389765e-05, "loss": 0.0023, "step": 233400 }, { "epoch": 27.542472864558754, "grad_norm": 0.03295325115323067, "learning_rate": 4.2211303837366175e-05, "loss": 0.0023, "step": 233450 }, { "epoch": 27.548371873525248, "grad_norm": 0.19156667590141296, "learning_rate": 4.219295502340719e-05, "loss": 0.0021, "step": 233500 }, { "epoch": 27.554270882491743, "grad_norm": 0.024741994217038155, "learning_rate": 4.217460728704545e-05, "loss": 0.0021, "step": 233550 }, { "epoch": 27.560169891458234, "grad_norm": 0.06510189920663834, "learning_rate": 4.2156260630813475e-05, "loss": 0.0022, "step": 233600 }, { "epoch": 27.566068900424728, "grad_norm": 0.0037396629340946674, "learning_rate": 4.213791505724363e-05, "loss": 0.0018, "step": 233650 }, { "epoch": 27.571967909391223, "grad_norm": 0.1402258276939392, "learning_rate": 4.2119570568868134e-05, "loss": 0.0024, "step": 233700 }, { "epoch": 27.577866918357717, "grad_norm": 0.04606292024254799, "learning_rate": 4.210122716821904e-05, "loss": 0.0022, "step": 233750 }, { "epoch": 27.583765927324208, "grad_norm": 0.0738987848162651, "learning_rate": 4.20828848578283e-05, "loss": 0.0025, "step": 233800 }, { "epoch": 27.589664936290703, "grad_norm": 0.1413513869047165, "learning_rate": 4.206454364022765e-05, "loss": 0.0019, "step": 233850 }, { "epoch": 27.595563945257197, "grad_norm": 0.0025695536751300097, "learning_rate": 4.204620351794873e-05, "loss": 0.0027, "step": 233900 }, { "epoch": 27.601462954223692, "grad_norm": 0.00148765929043293, "learning_rate": 4.202786449352299e-05, "loss": 0.0013, "step": 233950 }, { "epoch": 27.607361963190183, "grad_norm": 0.011891220696270466, "learning_rate": 4.200952656948175e-05, "loss": 0.0029, "step": 234000 }, { "epoch": 27.607361963190183, "eval_cer": 0.08347676419965576, "eval_loss": 7.122338138287887e-05, "eval_runtime": 2.0469, "eval_samples_per_second": 48.854, "eval_steps_per_second": 1.954, "eval_wer": 0.26, "step": 234000 }, { "epoch": 27.613260972156677, "grad_norm": 0.021160593256354332, "learning_rate": 4.199118974835616e-05, "loss": 0.0026, "step": 234050 }, { "epoch": 27.619159981123172, "grad_norm": 0.012322074733674526, "learning_rate": 4.197285403267725e-05, "loss": 0.0023, "step": 234100 }, { "epoch": 27.625058990089666, "grad_norm": 0.020095480605959892, "learning_rate": 4.195451942497587e-05, "loss": 0.0018, "step": 234150 }, { "epoch": 27.630957999056157, "grad_norm": 0.14515681564807892, "learning_rate": 4.1936185927782725e-05, "loss": 0.0026, "step": 234200 }, { "epoch": 27.636857008022652, "grad_norm": 0.005356815177947283, "learning_rate": 4.191785354362836e-05, "loss": 0.0017, "step": 234250 }, { "epoch": 27.642756016989146, "grad_norm": 0.006652765441685915, "learning_rate": 4.189952227504317e-05, "loss": 0.0026, "step": 234300 }, { "epoch": 27.64865502595564, "grad_norm": 0.15732553601264954, "learning_rate": 4.188119212455741e-05, "loss": 0.0021, "step": 234350 }, { "epoch": 27.654554034922132, "grad_norm": 0.2592015266418457, "learning_rate": 4.186286309470116e-05, "loss": 0.0022, "step": 234400 }, { "epoch": 27.660453043888626, "grad_norm": 0.04441823437809944, "learning_rate": 4.184453518800436e-05, "loss": 0.0023, "step": 234450 }, { "epoch": 27.66635205285512, "grad_norm": 0.024584587663412094, "learning_rate": 4.1826208406996795e-05, "loss": 0.0026, "step": 234500 }, { "epoch": 27.672251061821616, "grad_norm": 0.03528805077075958, "learning_rate": 4.180788275420807e-05, "loss": 0.0021, "step": 234550 }, { "epoch": 27.678150070788107, "grad_norm": 0.07665152102708817, "learning_rate": 4.1789558232167674e-05, "loss": 0.0025, "step": 234600 }, { "epoch": 27.6840490797546, "grad_norm": 0.15961326658725739, "learning_rate": 4.177123484340489e-05, "loss": 0.0029, "step": 234650 }, { "epoch": 27.689948088721096, "grad_norm": 0.056355059146881104, "learning_rate": 4.1752912590448904e-05, "loss": 0.0026, "step": 234700 }, { "epoch": 27.69584709768759, "grad_norm": 0.14193613827228546, "learning_rate": 4.1734591475828714e-05, "loss": 0.0019, "step": 234750 }, { "epoch": 27.70174610665408, "grad_norm": 0.17940953373908997, "learning_rate": 4.171627150207314e-05, "loss": 0.0026, "step": 234800 }, { "epoch": 27.707645115620576, "grad_norm": 0.17856909334659576, "learning_rate": 4.169795267171089e-05, "loss": 0.0027, "step": 234850 }, { "epoch": 27.71354412458707, "grad_norm": 0.1982128769159317, "learning_rate": 4.167963498727048e-05, "loss": 0.003, "step": 234900 }, { "epoch": 27.719443133553565, "grad_norm": 0.13189327716827393, "learning_rate": 4.166131845128028e-05, "loss": 0.0028, "step": 234950 }, { "epoch": 27.725342142520056, "grad_norm": 0.03841298818588257, "learning_rate": 4.16430030662685e-05, "loss": 0.0028, "step": 235000 }, { "epoch": 27.725342142520056, "eval_cer": 0.08347676419965576, "eval_loss": 0.00010528301208978519, "eval_runtime": 2.0543, "eval_samples_per_second": 48.679, "eval_steps_per_second": 1.947, "eval_wer": 0.26, "step": 235000 }, { "epoch": 27.73124115148655, "grad_norm": 0.17404930293560028, "learning_rate": 4.162468883476319e-05, "loss": 0.0027, "step": 235050 }, { "epoch": 27.737140160453045, "grad_norm": 0.06672939658164978, "learning_rate": 4.160637575929222e-05, "loss": 0.002, "step": 235100 }, { "epoch": 27.74303916941954, "grad_norm": 0.2201675921678543, "learning_rate": 4.158806384238336e-05, "loss": 0.0028, "step": 235150 }, { "epoch": 27.74893817838603, "grad_norm": 0.0011056415969505906, "learning_rate": 4.156975308656417e-05, "loss": 0.0019, "step": 235200 }, { "epoch": 27.754837187352525, "grad_norm": 0.14520271122455597, "learning_rate": 4.155144349436206e-05, "loss": 0.0016, "step": 235250 }, { "epoch": 27.76073619631902, "grad_norm": 0.07044749706983566, "learning_rate": 4.1533135068304276e-05, "loss": 0.0024, "step": 235300 }, { "epoch": 27.76663520528551, "grad_norm": 0.007979517802596092, "learning_rate": 4.1514827810917914e-05, "loss": 0.0016, "step": 235350 }, { "epoch": 27.772534214252005, "grad_norm": 0.07304585725069046, "learning_rate": 4.149652172472988e-05, "loss": 0.0032, "step": 235400 }, { "epoch": 27.7784332232185, "grad_norm": 0.10726705938577652, "learning_rate": 4.1478216812266984e-05, "loss": 0.0019, "step": 235450 }, { "epoch": 27.784332232184994, "grad_norm": 0.18805032968521118, "learning_rate": 4.1459913076055815e-05, "loss": 0.0029, "step": 235500 }, { "epoch": 27.790231241151485, "grad_norm": 0.08030678331851959, "learning_rate": 4.14416105186228e-05, "loss": 0.0023, "step": 235550 }, { "epoch": 27.79613025011798, "grad_norm": 0.026401204988360405, "learning_rate": 4.142330914249424e-05, "loss": 0.0029, "step": 235600 }, { "epoch": 27.802029259084474, "grad_norm": 0.1682775616645813, "learning_rate": 4.140500895019624e-05, "loss": 0.0021, "step": 235650 }, { "epoch": 27.80792826805097, "grad_norm": 0.43750137090682983, "learning_rate": 4.1386709944254744e-05, "loss": 0.0025, "step": 235700 }, { "epoch": 27.81382727701746, "grad_norm": 0.15876401960849762, "learning_rate": 4.1368412127195564e-05, "loss": 0.0025, "step": 235750 }, { "epoch": 27.819726285983954, "grad_norm": 0.014952256344258785, "learning_rate": 4.1350115501544325e-05, "loss": 0.0022, "step": 235800 }, { "epoch": 27.82562529495045, "grad_norm": 0.02509828470647335, "learning_rate": 4.133182006982648e-05, "loss": 0.0015, "step": 235850 }, { "epoch": 27.831524303916943, "grad_norm": 0.07465842366218567, "learning_rate": 4.131352583456734e-05, "loss": 0.0027, "step": 235900 }, { "epoch": 27.837423312883434, "grad_norm": 0.007112913299351931, "learning_rate": 4.1295232798292007e-05, "loss": 0.002, "step": 235950 }, { "epoch": 27.84332232184993, "grad_norm": 0.07406897097826004, "learning_rate": 4.1276940963525464e-05, "loss": 0.0018, "step": 236000 }, { "epoch": 27.84332232184993, "eval_cer": 0.08347676419965576, "eval_loss": 0.0001683271984802559, "eval_runtime": 2.0878, "eval_samples_per_second": 47.898, "eval_steps_per_second": 1.916, "eval_wer": 0.26, "step": 236000 }, { "epoch": 27.849221330816423, "grad_norm": 0.26591670513153076, "learning_rate": 4.125865033279251e-05, "loss": 0.0028, "step": 236050 }, { "epoch": 27.855120339782918, "grad_norm": 0.116222083568573, "learning_rate": 4.1240360908617795e-05, "loss": 0.0024, "step": 236100 }, { "epoch": 27.86101934874941, "grad_norm": 0.03439486399292946, "learning_rate": 4.122207269352577e-05, "loss": 0.0024, "step": 236150 }, { "epoch": 27.866918357715903, "grad_norm": 0.03585357964038849, "learning_rate": 4.120378569004074e-05, "loss": 0.002, "step": 236200 }, { "epoch": 27.872817366682398, "grad_norm": 0.18461620807647705, "learning_rate": 4.1185499900686844e-05, "loss": 0.0024, "step": 236250 }, { "epoch": 27.878716375648892, "grad_norm": 0.040599822998046875, "learning_rate": 4.1167215327988026e-05, "loss": 0.0039, "step": 236300 }, { "epoch": 27.884615384615383, "grad_norm": 0.001499248668551445, "learning_rate": 4.11489319744681e-05, "loss": 0.0021, "step": 236350 }, { "epoch": 27.890514393581878, "grad_norm": 0.038084253668785095, "learning_rate": 4.11306498426507e-05, "loss": 0.0028, "step": 236400 }, { "epoch": 27.896413402548372, "grad_norm": 0.06318676471710205, "learning_rate": 4.1112368935059276e-05, "loss": 0.0021, "step": 236450 }, { "epoch": 27.902312411514867, "grad_norm": 0.05158468708395958, "learning_rate": 4.109408925421714e-05, "loss": 0.0027, "step": 236500 }, { "epoch": 27.908211420481358, "grad_norm": 0.0010279687121510506, "learning_rate": 4.10758108026474e-05, "loss": 0.0022, "step": 236550 }, { "epoch": 27.914110429447852, "grad_norm": 0.0335095152258873, "learning_rate": 4.105753358287301e-05, "loss": 0.0021, "step": 236600 }, { "epoch": 27.920009438414347, "grad_norm": 0.08284736424684525, "learning_rate": 4.1039257597416764e-05, "loss": 0.0024, "step": 236650 }, { "epoch": 27.92590844738084, "grad_norm": 0.08671164512634277, "learning_rate": 4.102098284880126e-05, "loss": 0.0023, "step": 236700 }, { "epoch": 27.931807456347332, "grad_norm": 0.02125386893749237, "learning_rate": 4.1002709339548936e-05, "loss": 0.0019, "step": 236750 }, { "epoch": 27.937706465313827, "grad_norm": 0.08116744458675385, "learning_rate": 4.098443707218208e-05, "loss": 0.0024, "step": 236800 }, { "epoch": 27.94360547428032, "grad_norm": 0.01771799847483635, "learning_rate": 4.096616604922278e-05, "loss": 0.002, "step": 236850 }, { "epoch": 27.949504483246816, "grad_norm": 0.02577323652803898, "learning_rate": 4.094789627319298e-05, "loss": 0.0025, "step": 236900 }, { "epoch": 27.955403492213307, "grad_norm": 0.009338987059891224, "learning_rate": 4.0929627746614416e-05, "loss": 0.0025, "step": 236950 }, { "epoch": 27.9613025011798, "grad_norm": 0.23178009688854218, "learning_rate": 4.0911360472008676e-05, "loss": 0.0018, "step": 237000 }, { "epoch": 27.9613025011798, "eval_cer": 0.08347676419965576, "eval_loss": 0.00021250206918921322, "eval_runtime": 2.0814, "eval_samples_per_second": 48.045, "eval_steps_per_second": 1.922, "eval_wer": 0.26, "step": 237000 }, { "epoch": 27.967201510146296, "grad_norm": 0.0038947032298892736, "learning_rate": 4.089309445189716e-05, "loss": 0.0022, "step": 237050 }, { "epoch": 27.97310051911279, "grad_norm": 0.0366801880300045, "learning_rate": 4.087482968880114e-05, "loss": 0.0025, "step": 237100 }, { "epoch": 27.97899952807928, "grad_norm": 0.00411924859508872, "learning_rate": 4.085656618524165e-05, "loss": 0.0025, "step": 237150 }, { "epoch": 27.984898537045776, "grad_norm": 0.1369561105966568, "learning_rate": 4.083830394373959e-05, "loss": 0.0026, "step": 237200 }, { "epoch": 27.99079754601227, "grad_norm": 0.10072275251150131, "learning_rate": 4.082004296681567e-05, "loss": 0.0021, "step": 237250 }, { "epoch": 27.996696554978765, "grad_norm": 0.043377701193094254, "learning_rate": 4.080178325699045e-05, "loss": 0.0019, "step": 237300 }, { "epoch": 28.002595563945256, "grad_norm": 0.03156775236129761, "learning_rate": 4.0783524816784255e-05, "loss": 0.0025, "step": 237350 }, { "epoch": 28.00849457291175, "grad_norm": 0.01795014552772045, "learning_rate": 4.0765267648717324e-05, "loss": 0.0014, "step": 237400 }, { "epoch": 28.014393581878245, "grad_norm": 0.10819718986749649, "learning_rate": 4.074701175530965e-05, "loss": 0.0028, "step": 237450 }, { "epoch": 28.02029259084474, "grad_norm": 0.07842670381069183, "learning_rate": 4.072875713908107e-05, "loss": 0.0023, "step": 237500 }, { "epoch": 28.02619159981123, "grad_norm": 0.0024086919147521257, "learning_rate": 4.071050380255125e-05, "loss": 0.0017, "step": 237550 }, { "epoch": 28.032090608777725, "grad_norm": 0.01074095256626606, "learning_rate": 4.069225174823968e-05, "loss": 0.0021, "step": 237600 }, { "epoch": 28.03798961774422, "grad_norm": 0.09150384366512299, "learning_rate": 4.067400097866565e-05, "loss": 0.0021, "step": 237650 }, { "epoch": 28.043888626710714, "grad_norm": 0.009458918124437332, "learning_rate": 4.065575149634833e-05, "loss": 0.0019, "step": 237700 }, { "epoch": 28.049787635677205, "grad_norm": 0.0022803752217441797, "learning_rate": 4.063750330380664e-05, "loss": 0.0023, "step": 237750 }, { "epoch": 28.0556866446437, "grad_norm": 0.07682385295629501, "learning_rate": 4.061925640355939e-05, "loss": 0.0029, "step": 237800 }, { "epoch": 28.061585653610194, "grad_norm": 0.16541032493114471, "learning_rate": 4.060101079812514e-05, "loss": 0.0027, "step": 237850 }, { "epoch": 28.067484662576685, "grad_norm": 0.09842169284820557, "learning_rate": 4.058276649002234e-05, "loss": 0.0015, "step": 237900 }, { "epoch": 28.07338367154318, "grad_norm": 0.11580811440944672, "learning_rate": 4.0564523481769203e-05, "loss": 0.0021, "step": 237950 }, { "epoch": 28.079282680509674, "grad_norm": 0.011516952887177467, "learning_rate": 4.05462817758838e-05, "loss": 0.0023, "step": 238000 }, { "epoch": 28.079282680509674, "eval_cer": 0.08347676419965576, "eval_loss": 7.78354296926409e-05, "eval_runtime": 2.1593, "eval_samples_per_second": 46.311, "eval_steps_per_second": 1.852, "eval_wer": 0.26, "step": 238000 }, { "epoch": 28.08518168947617, "grad_norm": 0.013018646277487278, "learning_rate": 4.052804137488404e-05, "loss": 0.0023, "step": 238050 }, { "epoch": 28.09108069844266, "grad_norm": 0.15001246333122253, "learning_rate": 4.050980228128759e-05, "loss": 0.0015, "step": 238100 }, { "epoch": 28.096979707409155, "grad_norm": 0.16772933304309845, "learning_rate": 4.049156449761199e-05, "loss": 0.0014, "step": 238150 }, { "epoch": 28.10287871637565, "grad_norm": 0.12058596312999725, "learning_rate": 4.047332802637457e-05, "loss": 0.0015, "step": 238200 }, { "epoch": 28.108777725342144, "grad_norm": 0.17954355478286743, "learning_rate": 4.0455092870092494e-05, "loss": 0.0018, "step": 238250 }, { "epoch": 28.114676734308635, "grad_norm": 0.06192002817988396, "learning_rate": 4.0436859031282736e-05, "loss": 0.0016, "step": 238300 }, { "epoch": 28.12057574327513, "grad_norm": 0.040836479514837265, "learning_rate": 4.0418626512462086e-05, "loss": 0.0014, "step": 238350 }, { "epoch": 28.126474752241624, "grad_norm": 0.030458033084869385, "learning_rate": 4.040039531614716e-05, "loss": 0.0023, "step": 238400 }, { "epoch": 28.132373761208118, "grad_norm": 0.018498364835977554, "learning_rate": 4.0382165444854404e-05, "loss": 0.0016, "step": 238450 }, { "epoch": 28.13827277017461, "grad_norm": 0.007952048443257809, "learning_rate": 4.036393690110005e-05, "loss": 0.0017, "step": 238500 }, { "epoch": 28.144171779141104, "grad_norm": 0.017416667193174362, "learning_rate": 4.0345709687400156e-05, "loss": 0.0025, "step": 238550 }, { "epoch": 28.150070788107598, "grad_norm": 0.06318720430135727, "learning_rate": 4.032748380627063e-05, "loss": 0.002, "step": 238600 }, { "epoch": 28.155969797074093, "grad_norm": 0.026002254337072372, "learning_rate": 4.030925926022714e-05, "loss": 0.002, "step": 238650 }, { "epoch": 28.161868806040584, "grad_norm": 0.1382753700017929, "learning_rate": 4.02910360517852e-05, "loss": 0.0025, "step": 238700 }, { "epoch": 28.16776781500708, "grad_norm": 0.04269801080226898, "learning_rate": 4.0272814183460164e-05, "loss": 0.0017, "step": 238750 }, { "epoch": 28.173666823973573, "grad_norm": 0.01741490140557289, "learning_rate": 4.0254593657767155e-05, "loss": 0.0025, "step": 238800 }, { "epoch": 28.179565832940067, "grad_norm": 0.010420724749565125, "learning_rate": 4.0236374477221136e-05, "loss": 0.0018, "step": 238850 }, { "epoch": 28.18546484190656, "grad_norm": 0.8565635681152344, "learning_rate": 4.021815664433688e-05, "loss": 0.0019, "step": 238900 }, { "epoch": 28.191363850873053, "grad_norm": 0.03209180757403374, "learning_rate": 4.019994016162897e-05, "loss": 0.0019, "step": 238950 }, { "epoch": 28.197262859839547, "grad_norm": 0.0006416713586077094, "learning_rate": 4.0181725031611795e-05, "loss": 0.002, "step": 239000 }, { "epoch": 28.197262859839547, "eval_cer": 0.08347676419965576, "eval_loss": 0.00010199008829658851, "eval_runtime": 2.0337, "eval_samples_per_second": 49.171, "eval_steps_per_second": 1.967, "eval_wer": 0.26, "step": 239000 }, { "epoch": 28.203161868806042, "grad_norm": 0.14683721959590912, "learning_rate": 4.016351125679959e-05, "loss": 0.0022, "step": 239050 }, { "epoch": 28.209060877772533, "grad_norm": 0.07871229201555252, "learning_rate": 4.0145298839706374e-05, "loss": 0.0017, "step": 239100 }, { "epoch": 28.214959886739027, "grad_norm": 0.06779392063617706, "learning_rate": 4.012708778284598e-05, "loss": 0.0021, "step": 239150 }, { "epoch": 28.220858895705522, "grad_norm": 0.003359236754477024, "learning_rate": 4.010887808873206e-05, "loss": 0.0015, "step": 239200 }, { "epoch": 28.226757904672017, "grad_norm": 0.13893567025661469, "learning_rate": 4.009066975987808e-05, "loss": 0.0024, "step": 239250 }, { "epoch": 28.232656913638507, "grad_norm": 0.35963860154151917, "learning_rate": 4.00724627987973e-05, "loss": 0.002, "step": 239300 }, { "epoch": 28.238555922605002, "grad_norm": 0.054162800312042236, "learning_rate": 4.0054257208002834e-05, "loss": 0.0023, "step": 239350 }, { "epoch": 28.244454931571497, "grad_norm": 0.013112357817590237, "learning_rate": 4.003605299000756e-05, "loss": 0.0017, "step": 239400 }, { "epoch": 28.25035394053799, "grad_norm": 0.03468150272965431, "learning_rate": 4.001785014732418e-05, "loss": 0.0017, "step": 239450 }, { "epoch": 28.256252949504482, "grad_norm": 0.04341653361916542, "learning_rate": 3.999964868246523e-05, "loss": 0.0015, "step": 239500 }, { "epoch": 28.262151958470977, "grad_norm": 0.04591629281640053, "learning_rate": 3.9981448597943015e-05, "loss": 0.0021, "step": 239550 }, { "epoch": 28.26805096743747, "grad_norm": 0.02311486378312111, "learning_rate": 3.996324989626967e-05, "loss": 0.0018, "step": 239600 }, { "epoch": 28.273949976403966, "grad_norm": 0.01949838548898697, "learning_rate": 3.9945052579957164e-05, "loss": 0.002, "step": 239650 }, { "epoch": 28.279848985370457, "grad_norm": 0.11427615582942963, "learning_rate": 3.992685665151723e-05, "loss": 0.0017, "step": 239700 }, { "epoch": 28.28574799433695, "grad_norm": 0.29336994886398315, "learning_rate": 3.9908662113461434e-05, "loss": 0.0016, "step": 239750 }, { "epoch": 28.291647003303446, "grad_norm": 0.23621444404125214, "learning_rate": 3.989046896830116e-05, "loss": 0.0013, "step": 239800 }, { "epoch": 28.29754601226994, "grad_norm": 0.003661473048850894, "learning_rate": 3.987227721854759e-05, "loss": 0.0019, "step": 239850 }, { "epoch": 28.30344502123643, "grad_norm": 0.0005899943062104285, "learning_rate": 3.9854086866711695e-05, "loss": 0.0026, "step": 239900 }, { "epoch": 28.309344030202926, "grad_norm": 0.0414397194981575, "learning_rate": 3.9835897915304265e-05, "loss": 0.0025, "step": 239950 }, { "epoch": 28.31524303916942, "grad_norm": 0.07365800440311432, "learning_rate": 3.9817710366835906e-05, "loss": 0.0021, "step": 240000 }, { "epoch": 28.31524303916942, "eval_cer": 0.08347676419965576, "eval_loss": 4.344038825365715e-05, "eval_runtime": 2.0224, "eval_samples_per_second": 49.446, "eval_steps_per_second": 1.978, "eval_wer": 0.26, "step": 240000 }, { "epoch": 28.321142048135915, "grad_norm": 0.005086483899503946, "learning_rate": 3.979952422381702e-05, "loss": 0.0021, "step": 240050 }, { "epoch": 28.327041057102406, "grad_norm": 0.16788350045681, "learning_rate": 3.978133948875783e-05, "loss": 0.0027, "step": 240100 }, { "epoch": 28.3329400660689, "grad_norm": 0.009305277839303017, "learning_rate": 3.976315616416834e-05, "loss": 0.0015, "step": 240150 }, { "epoch": 28.338839075035395, "grad_norm": 0.030625320971012115, "learning_rate": 3.9744974252558386e-05, "loss": 0.0015, "step": 240200 }, { "epoch": 28.34473808400189, "grad_norm": 0.04565588757395744, "learning_rate": 3.9726793756437584e-05, "loss": 0.0022, "step": 240250 }, { "epoch": 28.35063709296838, "grad_norm": 0.1612064093351364, "learning_rate": 3.9708614678315375e-05, "loss": 0.002, "step": 240300 }, { "epoch": 28.356536101934875, "grad_norm": 0.008490882813930511, "learning_rate": 3.9690437020700974e-05, "loss": 0.0026, "step": 240350 }, { "epoch": 28.36243511090137, "grad_norm": 0.08349458128213882, "learning_rate": 3.967226078610347e-05, "loss": 0.0025, "step": 240400 }, { "epoch": 28.36833411986786, "grad_norm": 0.15841931104660034, "learning_rate": 3.965408597703166e-05, "loss": 0.0032, "step": 240450 }, { "epoch": 28.374233128834355, "grad_norm": 0.1189507246017456, "learning_rate": 3.963591259599421e-05, "loss": 0.0023, "step": 240500 }, { "epoch": 28.38013213780085, "grad_norm": 0.017545610666275024, "learning_rate": 3.961774064549957e-05, "loss": 0.0021, "step": 240550 }, { "epoch": 28.386031146767344, "grad_norm": 0.018003104254603386, "learning_rate": 3.9599570128056e-05, "loss": 0.0017, "step": 240600 }, { "epoch": 28.391930155733835, "grad_norm": 0.011295764707028866, "learning_rate": 3.958140104617153e-05, "loss": 0.0026, "step": 240650 }, { "epoch": 28.39782916470033, "grad_norm": 0.015286233276128769, "learning_rate": 3.956323340235405e-05, "loss": 0.0023, "step": 240700 }, { "epoch": 28.403728173666824, "grad_norm": 0.0027588086668401957, "learning_rate": 3.95450671991112e-05, "loss": 0.0017, "step": 240750 }, { "epoch": 28.40962718263332, "grad_norm": 0.024992819875478745, "learning_rate": 3.952690243895044e-05, "loss": 0.0015, "step": 240800 }, { "epoch": 28.41552619159981, "grad_norm": 0.23905619978904724, "learning_rate": 3.950873912437903e-05, "loss": 0.0023, "step": 240850 }, { "epoch": 28.421425200566304, "grad_norm": 0.01260635070502758, "learning_rate": 3.9490577257904044e-05, "loss": 0.0025, "step": 240900 }, { "epoch": 28.4273242095328, "grad_norm": 0.0013007899979129434, "learning_rate": 3.947241684203232e-05, "loss": 0.0017, "step": 240950 }, { "epoch": 28.433223218499293, "grad_norm": 0.23494936525821686, "learning_rate": 3.945425787927054e-05, "loss": 0.003, "step": 241000 }, { "epoch": 28.433223218499293, "eval_cer": 0.08347676419965576, "eval_loss": 4.0842609450919554e-05, "eval_runtime": 2.0757, "eval_samples_per_second": 48.176, "eval_steps_per_second": 1.927, "eval_wer": 0.26, "step": 241000 }, { "epoch": 28.439122227465784, "grad_norm": 0.0049308123998343945, "learning_rate": 3.9436100372125155e-05, "loss": 0.0023, "step": 241050 }, { "epoch": 28.44502123643228, "grad_norm": 0.03311822935938835, "learning_rate": 3.9417944323102427e-05, "loss": 0.0019, "step": 241100 }, { "epoch": 28.450920245398773, "grad_norm": 0.006339756306260824, "learning_rate": 3.9399789734708415e-05, "loss": 0.0015, "step": 241150 }, { "epoch": 28.456819254365268, "grad_norm": 0.2026570588350296, "learning_rate": 3.9381636609448975e-05, "loss": 0.0014, "step": 241200 }, { "epoch": 28.46271826333176, "grad_norm": 0.10004009306430817, "learning_rate": 3.9363484949829746e-05, "loss": 0.0022, "step": 241250 }, { "epoch": 28.468617272298253, "grad_norm": 0.16723474860191345, "learning_rate": 3.9345334758356205e-05, "loss": 0.0025, "step": 241300 }, { "epoch": 28.474516281264748, "grad_norm": 0.04465387016534805, "learning_rate": 3.932718603753358e-05, "loss": 0.0026, "step": 241350 }, { "epoch": 28.480415290231242, "grad_norm": 0.023162052035331726, "learning_rate": 3.9309038789866934e-05, "loss": 0.0024, "step": 241400 }, { "epoch": 28.486314299197733, "grad_norm": 0.042144544422626495, "learning_rate": 3.929089301786111e-05, "loss": 0.0022, "step": 241450 }, { "epoch": 28.492213308164228, "grad_norm": 0.011983189731836319, "learning_rate": 3.927274872402074e-05, "loss": 0.0024, "step": 241500 }, { "epoch": 28.498112317130722, "grad_norm": 0.03300602361559868, "learning_rate": 3.925460591085027e-05, "loss": 0.0019, "step": 241550 }, { "epoch": 28.504011326097217, "grad_norm": 0.21373066306114197, "learning_rate": 3.923646458085392e-05, "loss": 0.0028, "step": 241600 }, { "epoch": 28.509910335063708, "grad_norm": 0.015038222074508667, "learning_rate": 3.921832473653571e-05, "loss": 0.0021, "step": 241650 }, { "epoch": 28.515809344030203, "grad_norm": 0.10170397907495499, "learning_rate": 3.9200186380399474e-05, "loss": 0.002, "step": 241700 }, { "epoch": 28.521708352996697, "grad_norm": 0.14477279782295227, "learning_rate": 3.918204951494883e-05, "loss": 0.0018, "step": 241750 }, { "epoch": 28.52760736196319, "grad_norm": 0.06992167234420776, "learning_rate": 3.9163914142687184e-05, "loss": 0.0019, "step": 241800 }, { "epoch": 28.533506370929683, "grad_norm": 0.03678375855088234, "learning_rate": 3.9145780266117734e-05, "loss": 0.0019, "step": 241850 }, { "epoch": 28.539405379896177, "grad_norm": 0.09824997931718826, "learning_rate": 3.912764788774348e-05, "loss": 0.0015, "step": 241900 }, { "epoch": 28.54530438886267, "grad_norm": 0.08334910124540329, "learning_rate": 3.910951701006723e-05, "loss": 0.002, "step": 241950 }, { "epoch": 28.551203397829166, "grad_norm": 0.2757181227207184, "learning_rate": 3.9091387635591536e-05, "loss": 0.0026, "step": 242000 }, { "epoch": 28.551203397829166, "eval_cer": 0.08347676419965576, "eval_loss": 8.861565584084019e-05, "eval_runtime": 2.1194, "eval_samples_per_second": 47.184, "eval_steps_per_second": 1.887, "eval_wer": 0.26, "step": 242000 }, { "epoch": 28.557102406795657, "grad_norm": 0.053718384355306625, "learning_rate": 3.9073259766818804e-05, "loss": 0.0027, "step": 242050 }, { "epoch": 28.56300141576215, "grad_norm": 0.0567488856613636, "learning_rate": 3.9055133406251186e-05, "loss": 0.002, "step": 242100 }, { "epoch": 28.568900424728646, "grad_norm": 0.2651081383228302, "learning_rate": 3.9037008556390655e-05, "loss": 0.0025, "step": 242150 }, { "epoch": 28.57479943369514, "grad_norm": 0.007244314067065716, "learning_rate": 3.9018885219738946e-05, "loss": 0.0021, "step": 242200 }, { "epoch": 28.58069844266163, "grad_norm": 0.008616024628281593, "learning_rate": 3.900076339879761e-05, "loss": 0.0017, "step": 242250 }, { "epoch": 28.586597451628126, "grad_norm": 0.02108408324420452, "learning_rate": 3.898264309606797e-05, "loss": 0.0015, "step": 242300 }, { "epoch": 28.59249646059462, "grad_norm": 0.01931465044617653, "learning_rate": 3.8964524314051175e-05, "loss": 0.0027, "step": 242350 }, { "epoch": 28.598395469561115, "grad_norm": 0.007855447009205818, "learning_rate": 3.894640705524813e-05, "loss": 0.0024, "step": 242400 }, { "epoch": 28.604294478527606, "grad_norm": 0.19146943092346191, "learning_rate": 3.892829132215953e-05, "loss": 0.0015, "step": 242450 }, { "epoch": 28.6101934874941, "grad_norm": 0.012813497334718704, "learning_rate": 3.891017711728587e-05, "loss": 0.0022, "step": 242500 }, { "epoch": 28.616092496460595, "grad_norm": 0.03880859911441803, "learning_rate": 3.8892064443127436e-05, "loss": 0.0023, "step": 242550 }, { "epoch": 28.62199150542709, "grad_norm": 0.006689796689897776, "learning_rate": 3.887395330218429e-05, "loss": 0.0018, "step": 242600 }, { "epoch": 28.62789051439358, "grad_norm": 0.14034704864025116, "learning_rate": 3.88558436969563e-05, "loss": 0.002, "step": 242650 }, { "epoch": 28.633789523360075, "grad_norm": 0.004006070550531149, "learning_rate": 3.8837735629943124e-05, "loss": 0.0017, "step": 242700 }, { "epoch": 28.63968853232657, "grad_norm": 0.02475045435130596, "learning_rate": 3.881962910364418e-05, "loss": 0.0022, "step": 242750 }, { "epoch": 28.64558754129306, "grad_norm": 0.010034327395260334, "learning_rate": 3.8801524120558704e-05, "loss": 0.0025, "step": 242800 }, { "epoch": 28.651486550259555, "grad_norm": 0.05856252834200859, "learning_rate": 3.878342068318569e-05, "loss": 0.0025, "step": 242850 }, { "epoch": 28.65738555922605, "grad_norm": 0.01905248314142227, "learning_rate": 3.8765318794023934e-05, "loss": 0.0019, "step": 242900 }, { "epoch": 28.663284568192545, "grad_norm": 0.007796768099069595, "learning_rate": 3.8747218455572046e-05, "loss": 0.0021, "step": 242950 }, { "epoch": 28.66918357715904, "grad_norm": 0.09389907866716385, "learning_rate": 3.8729119670328356e-05, "loss": 0.0018, "step": 243000 }, { "epoch": 28.66918357715904, "eval_cer": 0.08605851979345955, "eval_loss": 0.0009670069557614625, "eval_runtime": 2.0441, "eval_samples_per_second": 48.921, "eval_steps_per_second": 1.957, "eval_wer": 0.27, "step": 243000 }, { "epoch": 28.67508258612553, "grad_norm": 0.08434885740280151, "learning_rate": 3.871102244079104e-05, "loss": 0.0018, "step": 243050 }, { "epoch": 28.680981595092025, "grad_norm": 0.029817329719662666, "learning_rate": 3.8692926769458046e-05, "loss": 0.0032, "step": 243100 }, { "epoch": 28.68688060405852, "grad_norm": 0.12337357550859451, "learning_rate": 3.867483265882709e-05, "loss": 0.0024, "step": 243150 }, { "epoch": 28.69277961302501, "grad_norm": 0.0015367080923169851, "learning_rate": 3.865674011139567e-05, "loss": 0.0014, "step": 243200 }, { "epoch": 28.698678621991505, "grad_norm": 0.15841853618621826, "learning_rate": 3.8638649129661093e-05, "loss": 0.002, "step": 243250 }, { "epoch": 28.704577630958, "grad_norm": 0.008364873938262463, "learning_rate": 3.862055971612042e-05, "loss": 0.0024, "step": 243300 }, { "epoch": 28.710476639924494, "grad_norm": 0.013664190657436848, "learning_rate": 3.860247187327052e-05, "loss": 0.0028, "step": 243350 }, { "epoch": 28.716375648890985, "grad_norm": 0.26356732845306396, "learning_rate": 3.8584385603608053e-05, "loss": 0.0021, "step": 243400 }, { "epoch": 28.72227465785748, "grad_norm": 0.16807416081428528, "learning_rate": 3.8566300909629424e-05, "loss": 0.0019, "step": 243450 }, { "epoch": 28.728173666823974, "grad_norm": 0.01563834398984909, "learning_rate": 3.854821779383085e-05, "loss": 0.0021, "step": 243500 }, { "epoch": 28.73407267579047, "grad_norm": 0.18226511776447296, "learning_rate": 3.853013625870833e-05, "loss": 0.0021, "step": 243550 }, { "epoch": 28.73997168475696, "grad_norm": 0.11246916651725769, "learning_rate": 3.8512056306757617e-05, "loss": 0.0017, "step": 243600 }, { "epoch": 28.745870693723454, "grad_norm": 0.00748802162706852, "learning_rate": 3.8493977940474266e-05, "loss": 0.0022, "step": 243650 }, { "epoch": 28.75176970268995, "grad_norm": 0.12528307735919952, "learning_rate": 3.847590116235364e-05, "loss": 0.0026, "step": 243700 }, { "epoch": 28.757668711656443, "grad_norm": 0.00854555144906044, "learning_rate": 3.845782597489084e-05, "loss": 0.0021, "step": 243750 }, { "epoch": 28.763567720622934, "grad_norm": 0.08430474996566772, "learning_rate": 3.843975238058075e-05, "loss": 0.0021, "step": 243800 }, { "epoch": 28.76946672958943, "grad_norm": 0.0023412557784467936, "learning_rate": 3.8421680381918066e-05, "loss": 0.0015, "step": 243850 }, { "epoch": 28.775365738555923, "grad_norm": 0.19983339309692383, "learning_rate": 3.840360998139724e-05, "loss": 0.0027, "step": 243900 }, { "epoch": 28.781264747522417, "grad_norm": 0.08212260901927948, "learning_rate": 3.838554118151249e-05, "loss": 0.0022, "step": 243950 }, { "epoch": 28.78716375648891, "grad_norm": 0.3392542898654938, "learning_rate": 3.836747398475786e-05, "loss": 0.002, "step": 244000 }, { "epoch": 28.78716375648891, "eval_cer": 0.08605851979345955, "eval_loss": 0.000909956987015903, "eval_runtime": 2.1119, "eval_samples_per_second": 47.35, "eval_steps_per_second": 1.894, "eval_wer": 0.27, "step": 244000 }, { "epoch": 28.793062765455403, "grad_norm": 0.001230847556143999, "learning_rate": 3.834940839362713e-05, "loss": 0.002, "step": 244050 }, { "epoch": 28.798961774421898, "grad_norm": 0.003308985149487853, "learning_rate": 3.8331344410613875e-05, "loss": 0.003, "step": 244100 }, { "epoch": 28.804860783388392, "grad_norm": 0.022280490025877953, "learning_rate": 3.831328203821144e-05, "loss": 0.0024, "step": 244150 }, { "epoch": 28.810759792354883, "grad_norm": 0.2416997104883194, "learning_rate": 3.8295221278912963e-05, "loss": 0.0018, "step": 244200 }, { "epoch": 28.816658801321378, "grad_norm": 0.027137378230690956, "learning_rate": 3.827716213521132e-05, "loss": 0.0024, "step": 244250 }, { "epoch": 28.822557810287872, "grad_norm": 0.12145145982503891, "learning_rate": 3.8259104609599236e-05, "loss": 0.0019, "step": 244300 }, { "epoch": 28.828456819254367, "grad_norm": 0.01976604387164116, "learning_rate": 3.824104870456915e-05, "loss": 0.0024, "step": 244350 }, { "epoch": 28.834355828220858, "grad_norm": 0.04054408520460129, "learning_rate": 3.822299442261329e-05, "loss": 0.002, "step": 244400 }, { "epoch": 28.840254837187352, "grad_norm": 0.009776607155799866, "learning_rate": 3.820494176622369e-05, "loss": 0.0026, "step": 244450 }, { "epoch": 28.846153846153847, "grad_norm": 0.015767080709338188, "learning_rate": 3.818689073789211e-05, "loss": 0.0016, "step": 244500 }, { "epoch": 28.85205285512034, "grad_norm": 0.0020487697329372168, "learning_rate": 3.816884134011012e-05, "loss": 0.0027, "step": 244550 }, { "epoch": 28.857951864086832, "grad_norm": 0.005693455692380667, "learning_rate": 3.8150793575369066e-05, "loss": 0.0016, "step": 244600 }, { "epoch": 28.863850873053327, "grad_norm": 0.05922147259116173, "learning_rate": 3.8132747446160046e-05, "loss": 0.001, "step": 244650 }, { "epoch": 28.86974988201982, "grad_norm": 0.024837413802742958, "learning_rate": 3.8114702954973956e-05, "loss": 0.0014, "step": 244700 }, { "epoch": 28.875648890986316, "grad_norm": 0.03594668582081795, "learning_rate": 3.809666010430146e-05, "loss": 0.0029, "step": 244750 }, { "epoch": 28.881547899952807, "grad_norm": 0.06145823001861572, "learning_rate": 3.8078618896632995e-05, "loss": 0.0018, "step": 244800 }, { "epoch": 28.8874469089193, "grad_norm": 0.20488891005516052, "learning_rate": 3.8060579334458754e-05, "loss": 0.0024, "step": 244850 }, { "epoch": 28.893345917885796, "grad_norm": 0.057509876787662506, "learning_rate": 3.804254142026873e-05, "loss": 0.002, "step": 244900 }, { "epoch": 28.89924492685229, "grad_norm": 0.0017647914355620742, "learning_rate": 3.8024505156552656e-05, "loss": 0.0022, "step": 244950 }, { "epoch": 28.90514393581878, "grad_norm": 0.1774846613407135, "learning_rate": 3.800647054580006e-05, "loss": 0.002, "step": 245000 }, { "epoch": 28.90514393581878, "eval_cer": 0.08605851979345955, "eval_loss": 0.0005283982609398663, "eval_runtime": 2.049, "eval_samples_per_second": 48.805, "eval_steps_per_second": 1.952, "eval_wer": 0.27, "step": 245000 }, { "epoch": 28.911042944785276, "grad_norm": 1.2467808723449707, "learning_rate": 3.7988437590500256e-05, "loss": 0.0033, "step": 245050 }, { "epoch": 28.91694195375177, "grad_norm": 0.04224267229437828, "learning_rate": 3.79704062931423e-05, "loss": 0.002, "step": 245100 }, { "epoch": 28.922840962718265, "grad_norm": 0.009015565738081932, "learning_rate": 3.795237665621504e-05, "loss": 0.003, "step": 245150 }, { "epoch": 28.928739971684756, "grad_norm": 0.057109810411930084, "learning_rate": 3.793434868220706e-05, "loss": 0.0019, "step": 245200 }, { "epoch": 28.93463898065125, "grad_norm": 0.24693496525287628, "learning_rate": 3.791632237360678e-05, "loss": 0.0027, "step": 245250 }, { "epoch": 28.940537989617745, "grad_norm": 0.42240914702415466, "learning_rate": 3.7898297732902306e-05, "loss": 0.0026, "step": 245300 }, { "epoch": 28.94643699858424, "grad_norm": 0.021964402869343758, "learning_rate": 3.7880274762581587e-05, "loss": 0.0021, "step": 245350 }, { "epoch": 28.95233600755073, "grad_norm": 0.3272850513458252, "learning_rate": 3.786225346513231e-05, "loss": 0.0031, "step": 245400 }, { "epoch": 28.958235016517225, "grad_norm": 0.035706840455532074, "learning_rate": 3.784423384304192e-05, "loss": 0.0031, "step": 245450 }, { "epoch": 28.96413402548372, "grad_norm": 0.022377274930477142, "learning_rate": 3.782621589879766e-05, "loss": 0.0023, "step": 245500 }, { "epoch": 28.97003303445021, "grad_norm": 0.06102662533521652, "learning_rate": 3.780819963488652e-05, "loss": 0.002, "step": 245550 }, { "epoch": 28.975932043416705, "grad_norm": 0.08421658724546432, "learning_rate": 3.779018505379525e-05, "loss": 0.0028, "step": 245600 }, { "epoch": 28.9818310523832, "grad_norm": 0.09045426547527313, "learning_rate": 3.77721721580104e-05, "loss": 0.0023, "step": 245650 }, { "epoch": 28.987730061349694, "grad_norm": 0.10336629301309586, "learning_rate": 3.775416095001827e-05, "loss": 0.0017, "step": 245700 }, { "epoch": 28.993629070316185, "grad_norm": 0.06593772023916245, "learning_rate": 3.7736151432304914e-05, "loss": 0.0022, "step": 245750 }, { "epoch": 28.99952807928268, "grad_norm": 0.16427701711654663, "learning_rate": 3.7718143607356164e-05, "loss": 0.0021, "step": 245800 }, { "epoch": 29.005427088249174, "grad_norm": 0.01963692717254162, "learning_rate": 3.770013747765763e-05, "loss": 0.0016, "step": 245850 }, { "epoch": 29.01132609721567, "grad_norm": 0.11214175075292587, "learning_rate": 3.768213304569466e-05, "loss": 0.0017, "step": 245900 }, { "epoch": 29.01722510618216, "grad_norm": 0.0004401639453135431, "learning_rate": 3.766413031395241e-05, "loss": 0.0012, "step": 245950 }, { "epoch": 29.023124115148654, "grad_norm": 0.062030620872974396, "learning_rate": 3.764612928491575e-05, "loss": 0.0016, "step": 246000 }, { "epoch": 29.023124115148654, "eval_cer": 0.08605851979345955, "eval_loss": 0.000451875472208485, "eval_runtime": 2.0308, "eval_samples_per_second": 49.242, "eval_steps_per_second": 1.97, "eval_wer": 0.27, "step": 246000 }, { "epoch": 29.02902312411515, "grad_norm": 0.01408471167087555, "learning_rate": 3.762812996106936e-05, "loss": 0.0019, "step": 246050 }, { "epoch": 29.034922133081643, "grad_norm": 0.06176822632551193, "learning_rate": 3.7610132344897656e-05, "loss": 0.0015, "step": 246100 }, { "epoch": 29.040821142048134, "grad_norm": 0.1680014282464981, "learning_rate": 3.759213643888483e-05, "loss": 0.0021, "step": 246150 }, { "epoch": 29.04672015101463, "grad_norm": 0.053616996854543686, "learning_rate": 3.757414224551483e-05, "loss": 0.0019, "step": 246200 }, { "epoch": 29.052619159981123, "grad_norm": 0.01467670127749443, "learning_rate": 3.7556149767271384e-05, "loss": 0.002, "step": 246250 }, { "epoch": 29.058518168947618, "grad_norm": 0.019663752987980843, "learning_rate": 3.7538159006637954e-05, "loss": 0.0018, "step": 246300 }, { "epoch": 29.06441717791411, "grad_norm": 0.00021489922073669732, "learning_rate": 3.7520169966097796e-05, "loss": 0.0019, "step": 246350 }, { "epoch": 29.070316186880603, "grad_norm": 0.1984279453754425, "learning_rate": 3.750218264813393e-05, "loss": 0.0014, "step": 246400 }, { "epoch": 29.076215195847098, "grad_norm": 0.03559650108218193, "learning_rate": 3.748419705522912e-05, "loss": 0.0017, "step": 246450 }, { "epoch": 29.082114204813593, "grad_norm": 0.02536315843462944, "learning_rate": 3.746621318986587e-05, "loss": 0.0017, "step": 246500 }, { "epoch": 29.088013213780084, "grad_norm": 0.3186301290988922, "learning_rate": 3.744823105452651e-05, "loss": 0.0034, "step": 246550 }, { "epoch": 29.093912222746578, "grad_norm": 0.22818154096603394, "learning_rate": 3.743025065169306e-05, "loss": 0.0024, "step": 246600 }, { "epoch": 29.099811231713073, "grad_norm": 0.01801191456615925, "learning_rate": 3.741227198384734e-05, "loss": 0.0026, "step": 246650 }, { "epoch": 29.105710240679567, "grad_norm": 0.08200447261333466, "learning_rate": 3.7394295053470935e-05, "loss": 0.0016, "step": 246700 }, { "epoch": 29.111609249646058, "grad_norm": 0.034455977380275726, "learning_rate": 3.7376319863045185e-05, "loss": 0.0019, "step": 246750 }, { "epoch": 29.117508258612553, "grad_norm": 0.08181019872426987, "learning_rate": 3.735834641505116e-05, "loss": 0.0015, "step": 246800 }, { "epoch": 29.123407267579047, "grad_norm": 0.047175683081150055, "learning_rate": 3.7340374711969744e-05, "loss": 0.002, "step": 246850 }, { "epoch": 29.12930627654554, "grad_norm": 0.008047109469771385, "learning_rate": 3.7322404756281523e-05, "loss": 0.0016, "step": 246900 }, { "epoch": 29.135205285512033, "grad_norm": 0.015556152909994125, "learning_rate": 3.730443655046688e-05, "loss": 0.0014, "step": 246950 }, { "epoch": 29.141104294478527, "grad_norm": 0.10888701677322388, "learning_rate": 3.7286470097005954e-05, "loss": 0.0015, "step": 247000 }, { "epoch": 29.141104294478527, "eval_cer": 0.08605851979345955, "eval_loss": 0.0005170487565919757, "eval_runtime": 2.037, "eval_samples_per_second": 49.092, "eval_steps_per_second": 1.964, "eval_wer": 0.27, "step": 247000 }, { "epoch": 29.14700330344502, "grad_norm": 0.013888522982597351, "learning_rate": 3.726850539837862e-05, "loss": 0.002, "step": 247050 }, { "epoch": 29.152902312411516, "grad_norm": 0.021012721583247185, "learning_rate": 3.725054245706454e-05, "loss": 0.0016, "step": 247100 }, { "epoch": 29.158801321378007, "grad_norm": 0.043595027178525925, "learning_rate": 3.72325812755431e-05, "loss": 0.0017, "step": 247150 }, { "epoch": 29.164700330344502, "grad_norm": 0.0006156627205200493, "learning_rate": 3.721462185629347e-05, "loss": 0.0018, "step": 247200 }, { "epoch": 29.170599339310996, "grad_norm": 0.0069056907668709755, "learning_rate": 3.719666420179455e-05, "loss": 0.0017, "step": 247250 }, { "epoch": 29.17649834827749, "grad_norm": 0.016919834539294243, "learning_rate": 3.717870831452504e-05, "loss": 0.0022, "step": 247300 }, { "epoch": 29.182397357243982, "grad_norm": 0.020220989361405373, "learning_rate": 3.716075419696337e-05, "loss": 0.0023, "step": 247350 }, { "epoch": 29.188296366210476, "grad_norm": 0.0018516427371650934, "learning_rate": 3.714280185158771e-05, "loss": 0.0023, "step": 247400 }, { "epoch": 29.19419537517697, "grad_norm": 0.010523169301450253, "learning_rate": 3.7124851280876004e-05, "loss": 0.0018, "step": 247450 }, { "epoch": 29.200094384143465, "grad_norm": 0.009607580490410328, "learning_rate": 3.7106902487305957e-05, "loss": 0.0022, "step": 247500 }, { "epoch": 29.205993393109956, "grad_norm": 0.056109458208084106, "learning_rate": 3.7088955473355e-05, "loss": 0.002, "step": 247550 }, { "epoch": 29.21189240207645, "grad_norm": 0.10704290866851807, "learning_rate": 3.707101024150036e-05, "loss": 0.0022, "step": 247600 }, { "epoch": 29.217791411042946, "grad_norm": 0.10231085866689682, "learning_rate": 3.7053066794218986e-05, "loss": 0.0018, "step": 247650 }, { "epoch": 29.22369042000944, "grad_norm": 0.19587278366088867, "learning_rate": 3.7035125133987604e-05, "loss": 0.0018, "step": 247700 }, { "epoch": 29.22958942897593, "grad_norm": 0.016419244930148125, "learning_rate": 3.7017185263282656e-05, "loss": 0.0024, "step": 247750 }, { "epoch": 29.235488437942426, "grad_norm": 0.05358327180147171, "learning_rate": 3.6999247184580383e-05, "loss": 0.0027, "step": 247800 }, { "epoch": 29.24138744690892, "grad_norm": 0.18313179910182953, "learning_rate": 3.698131090035673e-05, "loss": 0.0019, "step": 247850 }, { "epoch": 29.247286455875415, "grad_norm": 0.051146261394023895, "learning_rate": 3.696337641308746e-05, "loss": 0.002, "step": 247900 }, { "epoch": 29.253185464841906, "grad_norm": 0.06863778084516525, "learning_rate": 3.694544372524802e-05, "loss": 0.0018, "step": 247950 }, { "epoch": 29.2590844738084, "grad_norm": 0.023787904530763626, "learning_rate": 3.692751283931364e-05, "loss": 0.0021, "step": 248000 }, { "epoch": 29.2590844738084, "eval_cer": 0.08347676419965576, "eval_loss": 0.0002845553099177778, "eval_runtime": 2.0807, "eval_samples_per_second": 48.061, "eval_steps_per_second": 1.922, "eval_wer": 0.26, "step": 248000 }, { "epoch": 29.264983482774895, "grad_norm": 0.1744101494550705, "learning_rate": 3.690958375775932e-05, "loss": 0.002, "step": 248050 }, { "epoch": 29.27088249174139, "grad_norm": 0.2038707435131073, "learning_rate": 3.6891656483059763e-05, "loss": 0.0017, "step": 248100 }, { "epoch": 29.27678150070788, "grad_norm": 0.02485639788210392, "learning_rate": 3.687373101768948e-05, "loss": 0.0022, "step": 248150 }, { "epoch": 29.282680509674375, "grad_norm": 0.0028817018028348684, "learning_rate": 3.685580736412268e-05, "loss": 0.0017, "step": 248200 }, { "epoch": 29.28857951864087, "grad_norm": 0.07477408647537231, "learning_rate": 3.683788552483335e-05, "loss": 0.0023, "step": 248250 }, { "epoch": 29.29447852760736, "grad_norm": 0.06174653768539429, "learning_rate": 3.68199655022952e-05, "loss": 0.0019, "step": 248300 }, { "epoch": 29.300377536573855, "grad_norm": 0.09583503752946854, "learning_rate": 3.6802047298981746e-05, "loss": 0.0027, "step": 248350 }, { "epoch": 29.30627654554035, "grad_norm": 0.2071661800146103, "learning_rate": 3.67841309173662e-05, "loss": 0.002, "step": 248400 }, { "epoch": 29.312175554506844, "grad_norm": 0.015691278502345085, "learning_rate": 3.6766216359921536e-05, "loss": 0.0036, "step": 248450 }, { "epoch": 29.318074563473335, "grad_norm": 0.08900006115436554, "learning_rate": 3.6748303629120484e-05, "loss": 0.0017, "step": 248500 }, { "epoch": 29.32397357243983, "grad_norm": 0.2783171534538269, "learning_rate": 3.673039272743551e-05, "loss": 0.0022, "step": 248550 }, { "epoch": 29.329872581406324, "grad_norm": 0.18284106254577637, "learning_rate": 3.671248365733883e-05, "loss": 0.0017, "step": 248600 }, { "epoch": 29.33577159037282, "grad_norm": 0.006950962357223034, "learning_rate": 3.669457642130243e-05, "loss": 0.0014, "step": 248650 }, { "epoch": 29.34167059933931, "grad_norm": 0.01333619561046362, "learning_rate": 3.667667102179803e-05, "loss": 0.0018, "step": 248700 }, { "epoch": 29.347569608305804, "grad_norm": 0.07797438651323318, "learning_rate": 3.665876746129706e-05, "loss": 0.0027, "step": 248750 }, { "epoch": 29.3534686172723, "grad_norm": 0.0831209272146225, "learning_rate": 3.6640865742270755e-05, "loss": 0.0019, "step": 248800 }, { "epoch": 29.359367626238793, "grad_norm": 0.02036745846271515, "learning_rate": 3.662296586719005e-05, "loss": 0.0017, "step": 248850 }, { "epoch": 29.365266635205284, "grad_norm": 0.031228182837367058, "learning_rate": 3.6605067838525656e-05, "loss": 0.0024, "step": 248900 }, { "epoch": 29.37116564417178, "grad_norm": 0.2260349690914154, "learning_rate": 3.658717165874801e-05, "loss": 0.0019, "step": 248950 }, { "epoch": 29.377064653138273, "grad_norm": 0.13654962182044983, "learning_rate": 3.65692773303273e-05, "loss": 0.0022, "step": 249000 }, { "epoch": 29.377064653138273, "eval_cer": 0.08347676419965576, "eval_loss": 0.00027723744278773665, "eval_runtime": 2.09, "eval_samples_per_second": 47.848, "eval_steps_per_second": 1.914, "eval_wer": 0.26, "step": 249000 }, { "epoch": 29.382963662104768, "grad_norm": 0.010004093870520592, "learning_rate": 3.655138485573347e-05, "loss": 0.002, "step": 249050 }, { "epoch": 29.38886267107126, "grad_norm": 0.08127211779356003, "learning_rate": 3.653349423743619e-05, "loss": 0.0015, "step": 249100 }, { "epoch": 29.394761680037753, "grad_norm": 0.006867399904876947, "learning_rate": 3.651560547790488e-05, "loss": 0.0018, "step": 249150 }, { "epoch": 29.400660689004248, "grad_norm": 0.0760272741317749, "learning_rate": 3.64977185796087e-05, "loss": 0.0013, "step": 249200 }, { "epoch": 29.406559697970742, "grad_norm": 0.010597019456326962, "learning_rate": 3.647983354501656e-05, "loss": 0.0028, "step": 249250 }, { "epoch": 29.412458706937233, "grad_norm": 0.007091972976922989, "learning_rate": 3.6461950376597124e-05, "loss": 0.0017, "step": 249300 }, { "epoch": 29.418357715903728, "grad_norm": 0.18921059370040894, "learning_rate": 3.6444069076818774e-05, "loss": 0.0015, "step": 249350 }, { "epoch": 29.424256724870222, "grad_norm": 0.003703768365085125, "learning_rate": 3.642618964814964e-05, "loss": 0.0029, "step": 249400 }, { "epoch": 29.430155733836717, "grad_norm": 0.12006266415119171, "learning_rate": 3.640831209305761e-05, "loss": 0.0026, "step": 249450 }, { "epoch": 29.436054742803208, "grad_norm": 0.09447730332612991, "learning_rate": 3.639043641401028e-05, "loss": 0.0025, "step": 249500 }, { "epoch": 29.441953751769702, "grad_norm": 0.20731604099273682, "learning_rate": 3.6372562613475024e-05, "loss": 0.0022, "step": 249550 }, { "epoch": 29.447852760736197, "grad_norm": 0.012774365954101086, "learning_rate": 3.635469069391895e-05, "loss": 0.0019, "step": 249600 }, { "epoch": 29.45375176970269, "grad_norm": 0.05236812308430672, "learning_rate": 3.633682065780887e-05, "loss": 0.0019, "step": 249650 }, { "epoch": 29.459650778669182, "grad_norm": 0.19938494265079498, "learning_rate": 3.631895250761141e-05, "loss": 0.0015, "step": 249700 }, { "epoch": 29.465549787635677, "grad_norm": 0.06107853353023529, "learning_rate": 3.630108624579286e-05, "loss": 0.0023, "step": 249750 }, { "epoch": 29.47144879660217, "grad_norm": 0.4280133545398712, "learning_rate": 3.628322187481928e-05, "loss": 0.0018, "step": 249800 }, { "epoch": 29.477347805568666, "grad_norm": 0.012295511551201344, "learning_rate": 3.626535939715647e-05, "loss": 0.0014, "step": 249850 }, { "epoch": 29.483246814535157, "grad_norm": 0.1736133098602295, "learning_rate": 3.6247498815269973e-05, "loss": 0.0025, "step": 249900 }, { "epoch": 29.48914582350165, "grad_norm": 0.0820813924074173, "learning_rate": 3.622964013162505e-05, "loss": 0.0023, "step": 249950 }, { "epoch": 29.495044832468146, "grad_norm": 0.05702553689479828, "learning_rate": 3.621178334868672e-05, "loss": 0.0019, "step": 250000 }, { "epoch": 29.495044832468146, "eval_cer": 0.08347676419965576, "eval_loss": 0.00020213729294482619, "eval_runtime": 2.0261, "eval_samples_per_second": 49.357, "eval_steps_per_second": 1.974, "eval_wer": 0.26, "step": 250000 }, { "epoch": 29.50094384143464, "grad_norm": 0.0023688997607678175, "learning_rate": 3.619392846891975e-05, "loss": 0.0017, "step": 250050 }, { "epoch": 29.50684285040113, "grad_norm": 0.007795293815433979, "learning_rate": 3.617607549478862e-05, "loss": 0.0022, "step": 250100 }, { "epoch": 29.512741859367626, "grad_norm": 0.1543450802564621, "learning_rate": 3.6158224428757535e-05, "loss": 0.0019, "step": 250150 }, { "epoch": 29.51864086833412, "grad_norm": 0.054969288408756256, "learning_rate": 3.614037527329048e-05, "loss": 0.0024, "step": 250200 }, { "epoch": 29.524539877300615, "grad_norm": 0.09126213192939758, "learning_rate": 3.612252803085113e-05, "loss": 0.0025, "step": 250250 }, { "epoch": 29.530438886267106, "grad_norm": 0.003579816548153758, "learning_rate": 3.610468270390295e-05, "loss": 0.0015, "step": 250300 }, { "epoch": 29.5363378952336, "grad_norm": 0.13488511741161346, "learning_rate": 3.6086839294909084e-05, "loss": 0.0017, "step": 250350 }, { "epoch": 29.542236904200095, "grad_norm": 0.10048523545265198, "learning_rate": 3.6068997806332446e-05, "loss": 0.0021, "step": 250400 }, { "epoch": 29.54813591316659, "grad_norm": 0.10704481601715088, "learning_rate": 3.605115824063569e-05, "loss": 0.0015, "step": 250450 }, { "epoch": 29.55403492213308, "grad_norm": 0.0021297980565577745, "learning_rate": 3.6033320600281165e-05, "loss": 0.0025, "step": 250500 }, { "epoch": 29.559933931099575, "grad_norm": 0.08917209506034851, "learning_rate": 3.601548488773098e-05, "loss": 0.0015, "step": 250550 }, { "epoch": 29.56583294006607, "grad_norm": 0.19348670542240143, "learning_rate": 3.599765110544699e-05, "loss": 0.0022, "step": 250600 }, { "epoch": 29.57173194903256, "grad_norm": 0.10753419995307922, "learning_rate": 3.5979819255890784e-05, "loss": 0.0016, "step": 250650 }, { "epoch": 29.577630957999055, "grad_norm": 0.013296464458107948, "learning_rate": 3.596198934152365e-05, "loss": 0.0016, "step": 250700 }, { "epoch": 29.58352996696555, "grad_norm": 0.09881056100130081, "learning_rate": 3.594416136480664e-05, "loss": 0.0027, "step": 250750 }, { "epoch": 29.589428975932044, "grad_norm": 0.1598733812570572, "learning_rate": 3.592633532820052e-05, "loss": 0.0022, "step": 250800 }, { "epoch": 29.595327984898535, "grad_norm": 0.019204482436180115, "learning_rate": 3.590851123416579e-05, "loss": 0.0027, "step": 250850 }, { "epoch": 29.60122699386503, "grad_norm": 0.151561439037323, "learning_rate": 3.589068908516271e-05, "loss": 0.0023, "step": 250900 }, { "epoch": 29.607126002831524, "grad_norm": 0.13962262868881226, "learning_rate": 3.5872868883651245e-05, "loss": 0.0022, "step": 250950 }, { "epoch": 29.61302501179802, "grad_norm": 0.006199653726071119, "learning_rate": 3.585505063209109e-05, "loss": 0.0014, "step": 251000 }, { "epoch": 29.61302501179802, "eval_cer": 0.08347676419965576, "eval_loss": 0.0002941841084975749, "eval_runtime": 2.0993, "eval_samples_per_second": 47.636, "eval_steps_per_second": 1.905, "eval_wer": 0.26, "step": 251000 }, { "epoch": 29.61892402076451, "grad_norm": 0.018961843103170395, "learning_rate": 3.5837234332941685e-05, "loss": 0.0012, "step": 251050 }, { "epoch": 29.624823029731004, "grad_norm": 0.0352482944726944, "learning_rate": 3.5819419988662186e-05, "loss": 0.0015, "step": 251100 }, { "epoch": 29.6307220386975, "grad_norm": 0.5555554032325745, "learning_rate": 3.5801607601711476e-05, "loss": 0.0021, "step": 251150 }, { "epoch": 29.636621047663994, "grad_norm": 0.375885546207428, "learning_rate": 3.5783797174548194e-05, "loss": 0.0021, "step": 251200 }, { "epoch": 29.642520056630485, "grad_norm": 0.0040763490833342075, "learning_rate": 3.576598870963068e-05, "loss": 0.0019, "step": 251250 }, { "epoch": 29.64841906559698, "grad_norm": 0.14116616547107697, "learning_rate": 3.5748182209417026e-05, "loss": 0.0018, "step": 251300 }, { "epoch": 29.654318074563474, "grad_norm": 0.11408039927482605, "learning_rate": 3.573037767636505e-05, "loss": 0.0023, "step": 251350 }, { "epoch": 29.660217083529968, "grad_norm": 0.04742005094885826, "learning_rate": 3.571257511293228e-05, "loss": 0.0024, "step": 251400 }, { "epoch": 29.66611609249646, "grad_norm": 0.1464805006980896, "learning_rate": 3.569477452157598e-05, "loss": 0.0019, "step": 251450 }, { "epoch": 29.672015101462954, "grad_norm": 0.0016258707037195563, "learning_rate": 3.567697590475315e-05, "loss": 0.0016, "step": 251500 }, { "epoch": 29.677914110429448, "grad_norm": 0.0681070014834404, "learning_rate": 3.5659179264920506e-05, "loss": 0.0019, "step": 251550 }, { "epoch": 29.683813119395943, "grad_norm": 0.037749920040369034, "learning_rate": 3.56413846045345e-05, "loss": 0.0024, "step": 251600 }, { "epoch": 29.689712128362434, "grad_norm": 0.033508893102407455, "learning_rate": 3.5623591926051314e-05, "loss": 0.0028, "step": 251650 }, { "epoch": 29.695611137328928, "grad_norm": 0.01427391730248928, "learning_rate": 3.560580123192684e-05, "loss": 0.002, "step": 251700 }, { "epoch": 29.701510146295423, "grad_norm": 0.01902081072330475, "learning_rate": 3.558801252461672e-05, "loss": 0.002, "step": 251750 }, { "epoch": 29.707409155261917, "grad_norm": 0.0807056799530983, "learning_rate": 3.5570225806576304e-05, "loss": 0.0014, "step": 251800 }, { "epoch": 29.71330816422841, "grad_norm": 0.008047386072576046, "learning_rate": 3.555244108026066e-05, "loss": 0.0018, "step": 251850 }, { "epoch": 29.719207173194903, "grad_norm": 0.05395558476448059, "learning_rate": 3.5534658348124594e-05, "loss": 0.0017, "step": 251900 }, { "epoch": 29.725106182161397, "grad_norm": 0.029502740129828453, "learning_rate": 3.551687761262266e-05, "loss": 0.002, "step": 251950 }, { "epoch": 29.731005191127892, "grad_norm": 0.003908132202923298, "learning_rate": 3.5499098876209094e-05, "loss": 0.0018, "step": 252000 }, { "epoch": 29.731005191127892, "eval_cer": 0.08347676419965576, "eval_loss": 0.00028082524659112096, "eval_runtime": 2.032, "eval_samples_per_second": 49.212, "eval_steps_per_second": 1.968, "eval_wer": 0.26, "step": 252000 }, { "epoch": 29.736904200094383, "grad_norm": 0.0351092703640461, "learning_rate": 3.548132214133787e-05, "loss": 0.0022, "step": 252050 }, { "epoch": 29.742803209060877, "grad_norm": 0.017903735861182213, "learning_rate": 3.546354741046269e-05, "loss": 0.0015, "step": 252100 }, { "epoch": 29.748702218027372, "grad_norm": 0.26073363423347473, "learning_rate": 3.5445774686036996e-05, "loss": 0.0018, "step": 252150 }, { "epoch": 29.754601226993866, "grad_norm": 0.0019672962371259928, "learning_rate": 3.5428003970513914e-05, "loss": 0.002, "step": 252200 }, { "epoch": 29.760500235960357, "grad_norm": 0.08808755874633789, "learning_rate": 3.541023526634632e-05, "loss": 0.0024, "step": 252250 }, { "epoch": 29.766399244926852, "grad_norm": 0.03525533154606819, "learning_rate": 3.539246857598683e-05, "loss": 0.002, "step": 252300 }, { "epoch": 29.772298253893346, "grad_norm": 0.048088185489177704, "learning_rate": 3.537470390188774e-05, "loss": 0.0029, "step": 252350 }, { "epoch": 29.77819726285984, "grad_norm": 0.2842418849468231, "learning_rate": 3.535694124650109e-05, "loss": 0.0032, "step": 252400 }, { "epoch": 29.784096271826332, "grad_norm": 0.02466459386050701, "learning_rate": 3.5339180612278634e-05, "loss": 0.002, "step": 252450 }, { "epoch": 29.789995280792827, "grad_norm": 0.21531522274017334, "learning_rate": 3.5321422001671844e-05, "loss": 0.0019, "step": 252500 }, { "epoch": 29.79589428975932, "grad_norm": 0.2253609001636505, "learning_rate": 3.530366541713194e-05, "loss": 0.002, "step": 252550 }, { "epoch": 29.801793298725816, "grad_norm": 0.18583440780639648, "learning_rate": 3.5285910861109836e-05, "loss": 0.0014, "step": 252600 }, { "epoch": 29.807692307692307, "grad_norm": 0.07325971126556396, "learning_rate": 3.5268158336056175e-05, "loss": 0.0018, "step": 252650 }, { "epoch": 29.8135913166588, "grad_norm": 0.005999128334224224, "learning_rate": 3.5250407844421304e-05, "loss": 0.0017, "step": 252700 }, { "epoch": 29.819490325625296, "grad_norm": 0.022158686071634293, "learning_rate": 3.523265938865533e-05, "loss": 0.002, "step": 252750 }, { "epoch": 29.82538933459179, "grad_norm": 0.12744538486003876, "learning_rate": 3.5214912971208004e-05, "loss": 0.0022, "step": 252800 }, { "epoch": 29.83128834355828, "grad_norm": 0.025569848716259003, "learning_rate": 3.519716859452888e-05, "loss": 0.0021, "step": 252850 }, { "epoch": 29.837187352524776, "grad_norm": 0.09949634969234467, "learning_rate": 3.517942626106719e-05, "loss": 0.0021, "step": 252900 }, { "epoch": 29.84308636149127, "grad_norm": 0.1289680302143097, "learning_rate": 3.5161685973271876e-05, "loss": 0.0016, "step": 252950 }, { "epoch": 29.848985370457765, "grad_norm": 0.0008628061041235924, "learning_rate": 3.514394773359163e-05, "loss": 0.0018, "step": 253000 }, { "epoch": 29.848985370457765, "eval_cer": 0.08347676419965576, "eval_loss": 0.00024953283718787134, "eval_runtime": 2.041, "eval_samples_per_second": 48.995, "eval_steps_per_second": 1.96, "eval_wer": 0.26, "step": 253000 }, { "epoch": 29.854884379424256, "grad_norm": 0.022400304675102234, "learning_rate": 3.512621154447482e-05, "loss": 0.0016, "step": 253050 }, { "epoch": 29.86078338839075, "grad_norm": 0.30429694056510925, "learning_rate": 3.510847740836956e-05, "loss": 0.0023, "step": 253100 }, { "epoch": 29.866682397357245, "grad_norm": 0.08829358965158463, "learning_rate": 3.509074532772367e-05, "loss": 0.002, "step": 253150 }, { "epoch": 29.87258140632374, "grad_norm": 0.011365777812898159, "learning_rate": 3.507301530498469e-05, "loss": 0.0023, "step": 253200 }, { "epoch": 29.87848041529023, "grad_norm": 0.09642383456230164, "learning_rate": 3.5055287342599864e-05, "loss": 0.0027, "step": 253250 }, { "epoch": 29.884379424256725, "grad_norm": 0.1381470113992691, "learning_rate": 3.503756144301617e-05, "loss": 0.0022, "step": 253300 }, { "epoch": 29.89027843322322, "grad_norm": 0.06648480147123337, "learning_rate": 3.50198376086803e-05, "loss": 0.0024, "step": 253350 }, { "epoch": 29.89617744218971, "grad_norm": 0.1526830941438675, "learning_rate": 3.500211584203865e-05, "loss": 0.0021, "step": 253400 }, { "epoch": 29.902076451156205, "grad_norm": 0.0925348624587059, "learning_rate": 3.498439614553732e-05, "loss": 0.0017, "step": 253450 }, { "epoch": 29.9079754601227, "grad_norm": 0.03392761945724487, "learning_rate": 3.4966678521622166e-05, "loss": 0.0024, "step": 253500 }, { "epoch": 29.913874469089194, "grad_norm": 0.06843653321266174, "learning_rate": 3.49489629727387e-05, "loss": 0.0019, "step": 253550 }, { "epoch": 29.919773478055685, "grad_norm": 0.0831701010465622, "learning_rate": 3.49312495013322e-05, "loss": 0.0017, "step": 253600 }, { "epoch": 29.92567248702218, "grad_norm": 0.13293980062007904, "learning_rate": 3.4913538109847625e-05, "loss": 0.0024, "step": 253650 }, { "epoch": 29.931571495988674, "grad_norm": 0.0845353975892067, "learning_rate": 3.489582880072967e-05, "loss": 0.0022, "step": 253700 }, { "epoch": 29.93747050495517, "grad_norm": 0.11118384450674057, "learning_rate": 3.487812157642272e-05, "loss": 0.0025, "step": 253750 }, { "epoch": 29.94336951392166, "grad_norm": 0.07907608151435852, "learning_rate": 3.4860416439370884e-05, "loss": 0.0021, "step": 253800 }, { "epoch": 29.949268522888154, "grad_norm": 0.05719055235385895, "learning_rate": 3.484271339201798e-05, "loss": 0.0019, "step": 253850 }, { "epoch": 29.95516753185465, "grad_norm": 0.11535614728927612, "learning_rate": 3.482501243680754e-05, "loss": 0.0018, "step": 253900 }, { "epoch": 29.961066540821143, "grad_norm": 0.02351788803935051, "learning_rate": 3.480731357618281e-05, "loss": 0.0015, "step": 253950 }, { "epoch": 29.966965549787634, "grad_norm": 0.007274084724485874, "learning_rate": 3.478961681258674e-05, "loss": 0.0027, "step": 254000 }, { "epoch": 29.966965549787634, "eval_cer": 0.08347676419965576, "eval_loss": 3.374650623300113e-05, "eval_runtime": 2.0943, "eval_samples_per_second": 47.75, "eval_steps_per_second": 1.91, "eval_wer": 0.26, "step": 254000 }, { "epoch": 29.97286455875413, "grad_norm": 0.14592953026294708, "learning_rate": 3.477192214846199e-05, "loss": 0.0017, "step": 254050 }, { "epoch": 29.978763567720623, "grad_norm": 0.0004134593182243407, "learning_rate": 3.475422958625094e-05, "loss": 0.0016, "step": 254100 }, { "epoch": 29.984662576687118, "grad_norm": 0.04107988253235817, "learning_rate": 3.473653912839566e-05, "loss": 0.0012, "step": 254150 }, { "epoch": 29.99056158565361, "grad_norm": 0.13188579678535461, "learning_rate": 3.4718850777337966e-05, "loss": 0.002, "step": 254200 }, { "epoch": 29.996460594620103, "grad_norm": 0.018647203221917152, "learning_rate": 3.470116453551934e-05, "loss": 0.002, "step": 254250 }, { "epoch": 30.002359603586598, "grad_norm": 0.02413887158036232, "learning_rate": 3.468348040538101e-05, "loss": 0.0019, "step": 254300 }, { "epoch": 30.008258612553092, "grad_norm": 0.014815163798630238, "learning_rate": 3.466579838936388e-05, "loss": 0.0017, "step": 254350 }, { "epoch": 30.014157621519583, "grad_norm": 0.2520253658294678, "learning_rate": 3.464811848990859e-05, "loss": 0.002, "step": 254400 }, { "epoch": 30.020056630486078, "grad_norm": 0.06020649895071983, "learning_rate": 3.463044070945546e-05, "loss": 0.0015, "step": 254450 }, { "epoch": 30.025955639452572, "grad_norm": 0.16138964891433716, "learning_rate": 3.461276505044455e-05, "loss": 0.0016, "step": 254500 }, { "epoch": 30.031854648419067, "grad_norm": 0.17341263592243195, "learning_rate": 3.459509151531559e-05, "loss": 0.0017, "step": 254550 }, { "epoch": 30.037753657385558, "grad_norm": 0.00244996789842844, "learning_rate": 3.4577420106508064e-05, "loss": 0.002, "step": 254600 }, { "epoch": 30.043652666352052, "grad_norm": 0.017168212682008743, "learning_rate": 3.455975082646112e-05, "loss": 0.0016, "step": 254650 }, { "epoch": 30.049551675318547, "grad_norm": 0.01294935867190361, "learning_rate": 3.454208367761365e-05, "loss": 0.0017, "step": 254700 }, { "epoch": 30.05545068428504, "grad_norm": 0.014510059729218483, "learning_rate": 3.452441866240419e-05, "loss": 0.0015, "step": 254750 }, { "epoch": 30.061349693251532, "grad_norm": 0.10642451047897339, "learning_rate": 3.450675578327105e-05, "loss": 0.0012, "step": 254800 }, { "epoch": 30.067248702218027, "grad_norm": 0.2232711762189865, "learning_rate": 3.448909504265221e-05, "loss": 0.0016, "step": 254850 }, { "epoch": 30.07314771118452, "grad_norm": 0.036545418202877045, "learning_rate": 3.447143644298535e-05, "loss": 0.0014, "step": 254900 }, { "epoch": 30.079046720151016, "grad_norm": 0.14258623123168945, "learning_rate": 3.445377998670788e-05, "loss": 0.0019, "step": 254950 }, { "epoch": 30.084945729117507, "grad_norm": 0.03435451164841652, "learning_rate": 3.4436125676256904e-05, "loss": 0.0014, "step": 255000 }, { "epoch": 30.084945729117507, "eval_cer": 0.08347676419965576, "eval_loss": 8.074516517808661e-05, "eval_runtime": 2.0153, "eval_samples_per_second": 49.621, "eval_steps_per_second": 1.985, "eval_wer": 0.26, "step": 255000 }, { "epoch": 30.090844738084, "grad_norm": 0.1041162982583046, "learning_rate": 3.441847351406922e-05, "loss": 0.0016, "step": 255050 }, { "epoch": 30.096743747050496, "grad_norm": 0.021289777010679245, "learning_rate": 3.440082350258133e-05, "loss": 0.0015, "step": 255100 }, { "epoch": 30.10264275601699, "grad_norm": 0.12726448476314545, "learning_rate": 3.438317564422944e-05, "loss": 0.0018, "step": 255150 }, { "epoch": 30.10854176498348, "grad_norm": 0.13782817125320435, "learning_rate": 3.436552994144946e-05, "loss": 0.0019, "step": 255200 }, { "epoch": 30.114440773949976, "grad_norm": 0.10622716695070267, "learning_rate": 3.4347886396677034e-05, "loss": 0.0019, "step": 255250 }, { "epoch": 30.12033978291647, "grad_norm": 0.15505202114582062, "learning_rate": 3.433024501234745e-05, "loss": 0.0021, "step": 255300 }, { "epoch": 30.126238791882965, "grad_norm": 0.028246557340025902, "learning_rate": 3.431260579089575e-05, "loss": 0.0016, "step": 255350 }, { "epoch": 30.132137800849456, "grad_norm": 0.09172134101390839, "learning_rate": 3.429496873475664e-05, "loss": 0.0014, "step": 255400 }, { "epoch": 30.13803680981595, "grad_norm": 0.11803460121154785, "learning_rate": 3.4277333846364545e-05, "loss": 0.0019, "step": 255450 }, { "epoch": 30.143935818782445, "grad_norm": 0.027249939739704132, "learning_rate": 3.425970112815359e-05, "loss": 0.0011, "step": 255500 }, { "epoch": 30.14983482774894, "grad_norm": 0.10035375505685806, "learning_rate": 3.4242070582557596e-05, "loss": 0.0014, "step": 255550 }, { "epoch": 30.15573383671543, "grad_norm": 0.03329979628324509, "learning_rate": 3.422444221201009e-05, "loss": 0.0021, "step": 255600 }, { "epoch": 30.161632845681925, "grad_norm": 0.0022193032782524824, "learning_rate": 3.420681601894429e-05, "loss": 0.0014, "step": 255650 }, { "epoch": 30.16753185464842, "grad_norm": 0.00748221343383193, "learning_rate": 3.418919200579312e-05, "loss": 0.0019, "step": 255700 }, { "epoch": 30.173430863614914, "grad_norm": 0.2588389217853546, "learning_rate": 3.4171570174989215e-05, "loss": 0.0015, "step": 255750 }, { "epoch": 30.179329872581405, "grad_norm": 0.0015542226610705256, "learning_rate": 3.415395052896487e-05, "loss": 0.0021, "step": 255800 }, { "epoch": 30.1852288815479, "grad_norm": 0.007989196106791496, "learning_rate": 3.4136333070152127e-05, "loss": 0.0017, "step": 255850 }, { "epoch": 30.191127890514394, "grad_norm": 0.010612974874675274, "learning_rate": 3.411871780098269e-05, "loss": 0.0014, "step": 255900 }, { "epoch": 30.197026899480885, "grad_norm": 0.23253761231899261, "learning_rate": 3.4101104723887986e-05, "loss": 0.0022, "step": 255950 }, { "epoch": 30.20292590844738, "grad_norm": 0.1019466370344162, "learning_rate": 3.408349384129912e-05, "loss": 0.0016, "step": 256000 }, { "epoch": 30.20292590844738, "eval_cer": 0.08347676419965576, "eval_loss": 9.70220789895393e-05, "eval_runtime": 2.0941, "eval_samples_per_second": 47.754, "eval_steps_per_second": 1.91, "eval_wer": 0.26, "step": 256000 }, { "epoch": 30.208824917413875, "grad_norm": 0.03783956542611122, "learning_rate": 3.40658851556469e-05, "loss": 0.0017, "step": 256050 }, { "epoch": 30.21472392638037, "grad_norm": 0.06844331324100494, "learning_rate": 3.4048278669361834e-05, "loss": 0.0014, "step": 256100 }, { "epoch": 30.22062293534686, "grad_norm": 0.04749719053506851, "learning_rate": 3.403067438487411e-05, "loss": 0.0013, "step": 256150 }, { "epoch": 30.226521944313355, "grad_norm": 0.0020081703551113605, "learning_rate": 3.401307230461364e-05, "loss": 0.0011, "step": 256200 }, { "epoch": 30.23242095327985, "grad_norm": 0.09794116765260696, "learning_rate": 3.3995472431010025e-05, "loss": 0.0026, "step": 256250 }, { "epoch": 30.238319962246344, "grad_norm": 0.005962054710835218, "learning_rate": 3.3977874766492556e-05, "loss": 0.0016, "step": 256300 }, { "epoch": 30.244218971212835, "grad_norm": 0.24771076440811157, "learning_rate": 3.396027931349021e-05, "loss": 0.0021, "step": 256350 }, { "epoch": 30.25011798017933, "grad_norm": 0.004341702442616224, "learning_rate": 3.3942686074431676e-05, "loss": 0.0025, "step": 256400 }, { "epoch": 30.256016989145824, "grad_norm": 0.0011487611336633563, "learning_rate": 3.392509505174531e-05, "loss": 0.0013, "step": 256450 }, { "epoch": 30.26191599811232, "grad_norm": 0.026803534477949142, "learning_rate": 3.390750624785919e-05, "loss": 0.0008, "step": 256500 }, { "epoch": 30.26781500707881, "grad_norm": 0.12252206355333328, "learning_rate": 3.3889919665201077e-05, "loss": 0.0015, "step": 256550 }, { "epoch": 30.273714016045304, "grad_norm": 0.0015371254412457347, "learning_rate": 3.387233530619843e-05, "loss": 0.0014, "step": 256600 }, { "epoch": 30.2796130250118, "grad_norm": 0.0005314638838171959, "learning_rate": 3.3854753173278395e-05, "loss": 0.0016, "step": 256650 }, { "epoch": 30.285512033978293, "grad_norm": 0.04660923033952713, "learning_rate": 3.3837173268867815e-05, "loss": 0.0018, "step": 256700 }, { "epoch": 30.291411042944784, "grad_norm": 0.18126457929611206, "learning_rate": 3.3819595595393225e-05, "loss": 0.0021, "step": 256750 }, { "epoch": 30.29731005191128, "grad_norm": 0.00777422683313489, "learning_rate": 3.3802020155280845e-05, "loss": 0.0019, "step": 256800 }, { "epoch": 30.303209060877773, "grad_norm": 0.0017590157222002745, "learning_rate": 3.3784446950956586e-05, "loss": 0.0023, "step": 256850 }, { "epoch": 30.309108069844267, "grad_norm": 0.038803476840257645, "learning_rate": 3.376687598484607e-05, "loss": 0.0015, "step": 256900 }, { "epoch": 30.31500707881076, "grad_norm": 0.008176974020898342, "learning_rate": 3.37493072593746e-05, "loss": 0.0017, "step": 256950 }, { "epoch": 30.320906087777253, "grad_norm": 0.020677268505096436, "learning_rate": 3.373174077696715e-05, "loss": 0.0015, "step": 257000 }, { "epoch": 30.320906087777253, "eval_cer": 0.08347676419965576, "eval_loss": 0.00013224522990640253, "eval_runtime": 2.0381, "eval_samples_per_second": 49.066, "eval_steps_per_second": 1.963, "eval_wer": 0.26, "step": 257000 }, { "epoch": 30.326805096743747, "grad_norm": 0.031878262758255005, "learning_rate": 3.371417654004841e-05, "loss": 0.0023, "step": 257050 }, { "epoch": 30.332704105710242, "grad_norm": 0.09554962068796158, "learning_rate": 3.369661455104276e-05, "loss": 0.0014, "step": 257100 }, { "epoch": 30.338603114676733, "grad_norm": 0.7613540291786194, "learning_rate": 3.367905481237423e-05, "loss": 0.0016, "step": 257150 }, { "epoch": 30.344502123643228, "grad_norm": 0.12712462246418, "learning_rate": 3.366149732646661e-05, "loss": 0.0019, "step": 257200 }, { "epoch": 30.350401132609722, "grad_norm": 0.017411762848496437, "learning_rate": 3.364394209574332e-05, "loss": 0.0013, "step": 257250 }, { "epoch": 30.356300141576217, "grad_norm": 0.2852792739868164, "learning_rate": 3.362638912262749e-05, "loss": 0.0019, "step": 257300 }, { "epoch": 30.362199150542708, "grad_norm": 0.19066296517848969, "learning_rate": 3.360883840954193e-05, "loss": 0.002, "step": 257350 }, { "epoch": 30.368098159509202, "grad_norm": 0.029214246198534966, "learning_rate": 3.359128995890914e-05, "loss": 0.0014, "step": 257400 }, { "epoch": 30.373997168475697, "grad_norm": 0.004296797327697277, "learning_rate": 3.357374377315132e-05, "loss": 0.0015, "step": 257450 }, { "epoch": 30.37989617744219, "grad_norm": 0.0018088058568537235, "learning_rate": 3.355619985469036e-05, "loss": 0.0021, "step": 257500 }, { "epoch": 30.385795186408682, "grad_norm": 0.005305868573486805, "learning_rate": 3.3538658205947824e-05, "loss": 0.0024, "step": 257550 }, { "epoch": 30.391694195375177, "grad_norm": 0.07764217257499695, "learning_rate": 3.352111882934496e-05, "loss": 0.0015, "step": 257600 }, { "epoch": 30.39759320434167, "grad_norm": 0.0033622211776673794, "learning_rate": 3.350358172730269e-05, "loss": 0.0017, "step": 257650 }, { "epoch": 30.403492213308166, "grad_norm": 0.23981596529483795, "learning_rate": 3.3486046902241664e-05, "loss": 0.002, "step": 257700 }, { "epoch": 30.409391222274657, "grad_norm": 0.046796366572380066, "learning_rate": 3.346851435658218e-05, "loss": 0.002, "step": 257750 }, { "epoch": 30.41529023124115, "grad_norm": 0.8606444001197815, "learning_rate": 3.345098409274423e-05, "loss": 0.0018, "step": 257800 }, { "epoch": 30.421189240207646, "grad_norm": 0.06778185069561005, "learning_rate": 3.343345611314751e-05, "loss": 0.0017, "step": 257850 }, { "epoch": 30.42708824917414, "grad_norm": 0.11826595664024353, "learning_rate": 3.341593042021138e-05, "loss": 0.0013, "step": 257900 }, { "epoch": 30.43298725814063, "grad_norm": 0.11991255730390549, "learning_rate": 3.33984070163549e-05, "loss": 0.0014, "step": 257950 }, { "epoch": 30.438886267107126, "grad_norm": 0.04395994916558266, "learning_rate": 3.33808859039968e-05, "loss": 0.0018, "step": 258000 }, { "epoch": 30.438886267107126, "eval_cer": 0.08347676419965576, "eval_loss": 0.00011085960431955755, "eval_runtime": 2.0612, "eval_samples_per_second": 48.515, "eval_steps_per_second": 1.941, "eval_wer": 0.26, "step": 258000 }, { "epoch": 30.44478527607362, "grad_norm": 0.03232729434967041, "learning_rate": 3.336336708555549e-05, "loss": 0.0011, "step": 258050 }, { "epoch": 30.450684285040115, "grad_norm": 0.01248866505920887, "learning_rate": 3.334585056344908e-05, "loss": 0.001, "step": 258100 }, { "epoch": 30.456583294006606, "grad_norm": 0.0686786025762558, "learning_rate": 3.332833634009535e-05, "loss": 0.0014, "step": 258150 }, { "epoch": 30.4624823029731, "grad_norm": 0.13553790748119354, "learning_rate": 3.331082441791177e-05, "loss": 0.0014, "step": 258200 }, { "epoch": 30.468381311939595, "grad_norm": 0.3154861330986023, "learning_rate": 3.329331479931549e-05, "loss": 0.0019, "step": 258250 }, { "epoch": 30.47428032090609, "grad_norm": 0.006639566738158464, "learning_rate": 3.327580748672334e-05, "loss": 0.0023, "step": 258300 }, { "epoch": 30.48017932987258, "grad_norm": 0.00875797588378191, "learning_rate": 3.325830248255184e-05, "loss": 0.0016, "step": 258350 }, { "epoch": 30.486078338839075, "grad_norm": 0.032431475818157196, "learning_rate": 3.324079978921719e-05, "loss": 0.0023, "step": 258400 }, { "epoch": 30.49197734780557, "grad_norm": 0.008043432608246803, "learning_rate": 3.3223299409135245e-05, "loss": 0.0015, "step": 258450 }, { "epoch": 30.49787635677206, "grad_norm": 0.06387821584939957, "learning_rate": 3.320580134472157e-05, "loss": 0.0015, "step": 258500 }, { "epoch": 30.503775365738555, "grad_norm": 0.24378618597984314, "learning_rate": 3.3188305598391415e-05, "loss": 0.0022, "step": 258550 }, { "epoch": 30.50967437470505, "grad_norm": 0.08260853588581085, "learning_rate": 3.31708121725597e-05, "loss": 0.0026, "step": 258600 }, { "epoch": 30.515573383671544, "grad_norm": 0.010695367120206356, "learning_rate": 3.3153321069641e-05, "loss": 0.0021, "step": 258650 }, { "epoch": 30.521472392638035, "grad_norm": 0.07462257891893387, "learning_rate": 3.31358322920496e-05, "loss": 0.0016, "step": 258700 }, { "epoch": 30.52737140160453, "grad_norm": 0.08747866749763489, "learning_rate": 3.311834584219946e-05, "loss": 0.0015, "step": 258750 }, { "epoch": 30.533270410571024, "grad_norm": 0.22958773374557495, "learning_rate": 3.31008617225042e-05, "loss": 0.0015, "step": 258800 }, { "epoch": 30.53916941953752, "grad_norm": 0.008642977103590965, "learning_rate": 3.3083379935377154e-05, "loss": 0.0021, "step": 258850 }, { "epoch": 30.54506842850401, "grad_norm": 0.013021309860050678, "learning_rate": 3.3065900483231305e-05, "loss": 0.0016, "step": 258900 }, { "epoch": 30.550967437470504, "grad_norm": 0.026624681428074837, "learning_rate": 3.304842336847932e-05, "loss": 0.0008, "step": 258950 }, { "epoch": 30.556866446437, "grad_norm": 0.1589074581861496, "learning_rate": 3.303094859353355e-05, "loss": 0.002, "step": 259000 }, { "epoch": 30.556866446437, "eval_cer": 0.08347676419965576, "eval_loss": 8.453214832115918e-05, "eval_runtime": 2.072, "eval_samples_per_second": 48.264, "eval_steps_per_second": 1.931, "eval_wer": 0.26, "step": 259000 }, { "epoch": 30.562765455403493, "grad_norm": 0.019375013187527657, "learning_rate": 3.3013476160806e-05, "loss": 0.0014, "step": 259050 }, { "epoch": 30.568664464369984, "grad_norm": 0.05623234435915947, "learning_rate": 3.2996006072708384e-05, "loss": 0.002, "step": 259100 }, { "epoch": 30.57456347333648, "grad_norm": 0.004224998876452446, "learning_rate": 3.297853833165207e-05, "loss": 0.0013, "step": 259150 }, { "epoch": 30.580462482302973, "grad_norm": 0.17090488970279694, "learning_rate": 3.2961072940048124e-05, "loss": 0.0026, "step": 259200 }, { "epoch": 30.586361491269468, "grad_norm": 0.011071648448705673, "learning_rate": 3.294360990030726e-05, "loss": 0.0019, "step": 259250 }, { "epoch": 30.59226050023596, "grad_norm": 0.03173306956887245, "learning_rate": 3.292614921483988e-05, "loss": 0.0021, "step": 259300 }, { "epoch": 30.598159509202453, "grad_norm": 0.007230636663734913, "learning_rate": 3.290869088605608e-05, "loss": 0.0017, "step": 259350 }, { "epoch": 30.604058518168948, "grad_norm": 0.14302963018417358, "learning_rate": 3.289123491636559e-05, "loss": 0.0021, "step": 259400 }, { "epoch": 30.609957527135442, "grad_norm": 0.21167051792144775, "learning_rate": 3.287378130817783e-05, "loss": 0.0019, "step": 259450 }, { "epoch": 30.615856536101933, "grad_norm": 0.30240747332572937, "learning_rate": 3.285633006390193e-05, "loss": 0.0023, "step": 259500 }, { "epoch": 30.621755545068428, "grad_norm": 0.12489812076091766, "learning_rate": 3.2838881185946646e-05, "loss": 0.0025, "step": 259550 }, { "epoch": 30.627654554034923, "grad_norm": 0.13841556012630463, "learning_rate": 3.282143467672044e-05, "loss": 0.0024, "step": 259600 }, { "epoch": 30.633553563001417, "grad_norm": 0.03975000977516174, "learning_rate": 3.280399053863142e-05, "loss": 0.0026, "step": 259650 }, { "epoch": 30.639452571967908, "grad_norm": 0.07291978597640991, "learning_rate": 3.27865487740874e-05, "loss": 0.0018, "step": 259700 }, { "epoch": 30.645351580934403, "grad_norm": 0.07404589653015137, "learning_rate": 3.276910938549582e-05, "loss": 0.0015, "step": 259750 }, { "epoch": 30.651250589900897, "grad_norm": 0.19599446654319763, "learning_rate": 3.2751672375263836e-05, "loss": 0.0017, "step": 259800 }, { "epoch": 30.65714959886739, "grad_norm": 0.002437532413750887, "learning_rate": 3.273423774579824e-05, "loss": 0.0014, "step": 259850 }, { "epoch": 30.663048607833883, "grad_norm": 0.1392175853252411, "learning_rate": 3.271680549950553e-05, "loss": 0.002, "step": 259900 }, { "epoch": 30.668947616800377, "grad_norm": 0.0006032686214894056, "learning_rate": 3.269937563879186e-05, "loss": 0.0015, "step": 259950 }, { "epoch": 30.67484662576687, "grad_norm": 0.1043112501502037, "learning_rate": 3.2681948166063046e-05, "loss": 0.0019, "step": 260000 }, { "epoch": 30.67484662576687, "eval_cer": 0.08347676419965576, "eval_loss": 0.00014044537965673953, "eval_runtime": 2.0769, "eval_samples_per_second": 48.149, "eval_steps_per_second": 1.926, "eval_wer": 0.26, "step": 260000 }, { "epoch": 30.680745634733366, "grad_norm": 0.08171846717596054, "learning_rate": 3.266452308372459e-05, "loss": 0.0016, "step": 260050 }, { "epoch": 30.686644643699857, "grad_norm": 0.01364156510680914, "learning_rate": 3.264710039418164e-05, "loss": 0.0016, "step": 260100 }, { "epoch": 30.69254365266635, "grad_norm": 0.014062111265957355, "learning_rate": 3.2629680099839024e-05, "loss": 0.0022, "step": 260150 }, { "epoch": 30.698442661632846, "grad_norm": 0.00265332218259573, "learning_rate": 3.261226220310127e-05, "loss": 0.002, "step": 260200 }, { "epoch": 30.70434167059934, "grad_norm": 0.0840802788734436, "learning_rate": 3.259484670637254e-05, "loss": 0.0025, "step": 260250 }, { "epoch": 30.710240679565832, "grad_norm": 0.0027207653038203716, "learning_rate": 3.257743361205667e-05, "loss": 0.0016, "step": 260300 }, { "epoch": 30.716139688532326, "grad_norm": 0.013068413361907005, "learning_rate": 3.256002292255717e-05, "loss": 0.0012, "step": 260350 }, { "epoch": 30.72203869749882, "grad_norm": 0.0054330346174538136, "learning_rate": 3.254261464027723e-05, "loss": 0.0011, "step": 260400 }, { "epoch": 30.727937706465315, "grad_norm": 0.07033529877662659, "learning_rate": 3.252520876761966e-05, "loss": 0.0018, "step": 260450 }, { "epoch": 30.733836715431806, "grad_norm": 0.012249494902789593, "learning_rate": 3.250780530698701e-05, "loss": 0.0014, "step": 260500 }, { "epoch": 30.7397357243983, "grad_norm": 0.05096239969134331, "learning_rate": 3.249040426078145e-05, "loss": 0.0019, "step": 260550 }, { "epoch": 30.745634733364795, "grad_norm": 0.16729514300823212, "learning_rate": 3.247300563140481e-05, "loss": 0.0022, "step": 260600 }, { "epoch": 30.75153374233129, "grad_norm": 0.01784825511276722, "learning_rate": 3.245560942125862e-05, "loss": 0.0021, "step": 260650 }, { "epoch": 30.75743275129778, "grad_norm": 0.0005952033097855747, "learning_rate": 3.243821563274405e-05, "loss": 0.0024, "step": 260700 }, { "epoch": 30.763331760264276, "grad_norm": 0.014830751344561577, "learning_rate": 3.2420824268261953e-05, "loss": 0.0018, "step": 260750 }, { "epoch": 30.76923076923077, "grad_norm": 0.07032927125692368, "learning_rate": 3.2403435330212804e-05, "loss": 0.0021, "step": 260800 }, { "epoch": 30.77512977819726, "grad_norm": 0.04248708114027977, "learning_rate": 3.238604882099683e-05, "loss": 0.002, "step": 260850 }, { "epoch": 30.781028787163756, "grad_norm": 0.009644011966884136, "learning_rate": 3.236866474301384e-05, "loss": 0.0023, "step": 260900 }, { "epoch": 30.78692779613025, "grad_norm": 0.04473266378045082, "learning_rate": 3.235128309866334e-05, "loss": 0.0023, "step": 260950 }, { "epoch": 30.792826805096745, "grad_norm": 0.0032029200810939074, "learning_rate": 3.2333903890344514e-05, "loss": 0.0011, "step": 261000 }, { "epoch": 30.792826805096745, "eval_cer": 0.08347676419965576, "eval_loss": 0.00010392277908977121, "eval_runtime": 2.1311, "eval_samples_per_second": 46.925, "eval_steps_per_second": 1.877, "eval_wer": 0.26, "step": 261000 }, { "epoch": 30.79872581406324, "grad_norm": 0.1614217907190323, "learning_rate": 3.231652712045618e-05, "loss": 0.0018, "step": 261050 }, { "epoch": 30.80462482302973, "grad_norm": 0.01147304568439722, "learning_rate": 3.229915279139681e-05, "loss": 0.0012, "step": 261100 }, { "epoch": 30.810523831996225, "grad_norm": 0.02172270603477955, "learning_rate": 3.228178090556459e-05, "loss": 0.002, "step": 261150 }, { "epoch": 30.81642284096272, "grad_norm": 0.138756662607193, "learning_rate": 3.226441146535734e-05, "loss": 0.0013, "step": 261200 }, { "epoch": 30.82232184992921, "grad_norm": 0.12014320492744446, "learning_rate": 3.224704447317255e-05, "loss": 0.0023, "step": 261250 }, { "epoch": 30.828220858895705, "grad_norm": 0.009735900908708572, "learning_rate": 3.222967993140736e-05, "loss": 0.0016, "step": 261300 }, { "epoch": 30.8341198678622, "grad_norm": 0.0029514243360608816, "learning_rate": 3.221231784245857e-05, "loss": 0.0015, "step": 261350 }, { "epoch": 30.840018876828694, "grad_norm": 0.24680890142917633, "learning_rate": 3.219495820872265e-05, "loss": 0.0025, "step": 261400 }, { "epoch": 30.845917885795185, "grad_norm": 0.2571810185909271, "learning_rate": 3.2177601032595736e-05, "loss": 0.0016, "step": 261450 }, { "epoch": 30.85181689476168, "grad_norm": 0.03280363604426384, "learning_rate": 3.216024631647361e-05, "loss": 0.0021, "step": 261500 }, { "epoch": 30.857715903728174, "grad_norm": 0.014926648698747158, "learning_rate": 3.214289406275173e-05, "loss": 0.0013, "step": 261550 }, { "epoch": 30.86361491269467, "grad_norm": 0.10334431380033493, "learning_rate": 3.21255442738252e-05, "loss": 0.0018, "step": 261600 }, { "epoch": 30.86951392166116, "grad_norm": 0.03300032764673233, "learning_rate": 3.210819695208881e-05, "loss": 0.0023, "step": 261650 }, { "epoch": 30.875412930627654, "grad_norm": 0.03564620763063431, "learning_rate": 3.209085209993696e-05, "loss": 0.002, "step": 261700 }, { "epoch": 30.88131193959415, "grad_norm": 0.010660198517143726, "learning_rate": 3.2073509719763775e-05, "loss": 0.0016, "step": 261750 }, { "epoch": 30.887210948560643, "grad_norm": 0.00981306191533804, "learning_rate": 3.205616981396297e-05, "loss": 0.0023, "step": 261800 }, { "epoch": 30.893109957527134, "grad_norm": 0.0018714596517384052, "learning_rate": 3.2038832384927966e-05, "loss": 0.002, "step": 261850 }, { "epoch": 30.89900896649363, "grad_norm": 0.020772520452737808, "learning_rate": 3.2021497435051826e-05, "loss": 0.0017, "step": 261900 }, { "epoch": 30.904907975460123, "grad_norm": 0.06527148932218552, "learning_rate": 3.200416496672729e-05, "loss": 0.0016, "step": 261950 }, { "epoch": 30.910806984426618, "grad_norm": 0.010244376957416534, "learning_rate": 3.1986834982346714e-05, "loss": 0.0022, "step": 262000 }, { "epoch": 30.910806984426618, "eval_cer": 0.08347676419965576, "eval_loss": 9.038661664817482e-05, "eval_runtime": 2.053, "eval_samples_per_second": 48.71, "eval_steps_per_second": 1.948, "eval_wer": 0.26, "step": 262000 }, { "epoch": 30.91670599339311, "grad_norm": 0.0825868621468544, "learning_rate": 3.1969507484302145e-05, "loss": 0.0012, "step": 262050 }, { "epoch": 30.922605002359603, "grad_norm": 0.09066061675548553, "learning_rate": 3.195218247498527e-05, "loss": 0.0011, "step": 262100 }, { "epoch": 30.928504011326098, "grad_norm": 0.030182497575879097, "learning_rate": 3.193485995678746e-05, "loss": 0.0022, "step": 262150 }, { "epoch": 30.934403020292592, "grad_norm": 0.017104076221585274, "learning_rate": 3.1917539932099694e-05, "loss": 0.0019, "step": 262200 }, { "epoch": 30.940302029259083, "grad_norm": 0.02262166328728199, "learning_rate": 3.190022240331266e-05, "loss": 0.0012, "step": 262250 }, { "epoch": 30.946201038225578, "grad_norm": 0.002964618382975459, "learning_rate": 3.1882907372816664e-05, "loss": 0.0018, "step": 262300 }, { "epoch": 30.952100047192072, "grad_norm": 0.06630036979913712, "learning_rate": 3.186559484300168e-05, "loss": 0.0018, "step": 262350 }, { "epoch": 30.957999056158567, "grad_norm": 0.14070843160152435, "learning_rate": 3.1848284816257334e-05, "loss": 0.0019, "step": 262400 }, { "epoch": 30.963898065125058, "grad_norm": 0.058999478816986084, "learning_rate": 3.1830977294972906e-05, "loss": 0.0021, "step": 262450 }, { "epoch": 30.969797074091552, "grad_norm": 0.003258113982155919, "learning_rate": 3.1813672281537346e-05, "loss": 0.0016, "step": 262500 }, { "epoch": 30.975696083058047, "grad_norm": 0.07346509397029877, "learning_rate": 3.1796369778339234e-05, "loss": 0.0021, "step": 262550 }, { "epoch": 30.98159509202454, "grad_norm": 0.08110027015209198, "learning_rate": 3.177906978776682e-05, "loss": 0.0018, "step": 262600 }, { "epoch": 30.987494100991032, "grad_norm": 0.005235690623521805, "learning_rate": 3.176177231220799e-05, "loss": 0.0021, "step": 262650 }, { "epoch": 30.993393109957527, "grad_norm": 0.09911854565143585, "learning_rate": 3.174447735405032e-05, "loss": 0.0022, "step": 262700 }, { "epoch": 30.99929211892402, "grad_norm": 0.12427309900522232, "learning_rate": 3.1727184915680976e-05, "loss": 0.0022, "step": 262750 }, { "epoch": 31.005191127890516, "grad_norm": 0.03896708786487579, "learning_rate": 3.1709894999486826e-05, "loss": 0.0024, "step": 262800 }, { "epoch": 31.011090136857007, "grad_norm": 0.04917483776807785, "learning_rate": 3.169260760785439e-05, "loss": 0.0018, "step": 262850 }, { "epoch": 31.0169891458235, "grad_norm": 0.01390006858855486, "learning_rate": 3.167532274316983e-05, "loss": 0.0022, "step": 262900 }, { "epoch": 31.022888154789996, "grad_norm": 0.01702902652323246, "learning_rate": 3.1658040407818944e-05, "loss": 0.0011, "step": 262950 }, { "epoch": 31.02878716375649, "grad_norm": 0.021656904369592667, "learning_rate": 3.164076060418719e-05, "loss": 0.0016, "step": 263000 }, { "epoch": 31.02878716375649, "eval_cer": 0.08347676419965576, "eval_loss": 4.7202214773278683e-05, "eval_runtime": 2.0557, "eval_samples_per_second": 48.646, "eval_steps_per_second": 1.946, "eval_wer": 0.26, "step": 263000 }, { "epoch": 31.03468617272298, "grad_norm": 0.0918993428349495, "learning_rate": 3.162348333465969e-05, "loss": 0.0013, "step": 263050 }, { "epoch": 31.040585181689476, "grad_norm": 0.03198100998997688, "learning_rate": 3.160620860162119e-05, "loss": 0.0013, "step": 263100 }, { "epoch": 31.04648419065597, "grad_norm": 0.001841932418756187, "learning_rate": 3.15889364074561e-05, "loss": 0.0013, "step": 263150 }, { "epoch": 31.052383199622465, "grad_norm": 0.030297327786684036, "learning_rate": 3.1571666754548505e-05, "loss": 0.0013, "step": 263200 }, { "epoch": 31.058282208588956, "grad_norm": 0.02308879792690277, "learning_rate": 3.1554399645282094e-05, "loss": 0.0012, "step": 263250 }, { "epoch": 31.06418121755545, "grad_norm": 0.0010175785282626748, "learning_rate": 3.153713508204024e-05, "loss": 0.0012, "step": 263300 }, { "epoch": 31.070080226521945, "grad_norm": 0.005722705274820328, "learning_rate": 3.151987306720593e-05, "loss": 0.0022, "step": 263350 }, { "epoch": 31.07597923548844, "grad_norm": 0.052841491997241974, "learning_rate": 3.1502613603161836e-05, "loss": 0.0021, "step": 263400 }, { "epoch": 31.08187824445493, "grad_norm": 0.18781204521656036, "learning_rate": 3.148535669229025e-05, "loss": 0.0016, "step": 263450 }, { "epoch": 31.087777253421425, "grad_norm": 0.020775586366653442, "learning_rate": 3.146810233697313e-05, "loss": 0.001, "step": 263500 }, { "epoch": 31.09367626238792, "grad_norm": 0.13647423684597015, "learning_rate": 3.1450850539592084e-05, "loss": 0.0016, "step": 263550 }, { "epoch": 31.09957527135441, "grad_norm": 0.11182881891727448, "learning_rate": 3.143360130252834e-05, "loss": 0.0015, "step": 263600 }, { "epoch": 31.105474280320905, "grad_norm": 0.1083521917462349, "learning_rate": 3.1416354628162805e-05, "loss": 0.0018, "step": 263650 }, { "epoch": 31.1113732892874, "grad_norm": 0.002687786938622594, "learning_rate": 3.139911051887601e-05, "loss": 0.0016, "step": 263700 }, { "epoch": 31.117272298253894, "grad_norm": 0.06624583154916763, "learning_rate": 3.138186897704813e-05, "loss": 0.0023, "step": 263750 }, { "epoch": 31.123171307220385, "grad_norm": 0.08753699064254761, "learning_rate": 3.1364630005059e-05, "loss": 0.0017, "step": 263800 }, { "epoch": 31.12907031618688, "grad_norm": 0.13463518023490906, "learning_rate": 3.13473936052881e-05, "loss": 0.0015, "step": 263850 }, { "epoch": 31.134969325153374, "grad_norm": 0.07041502743959427, "learning_rate": 3.133015978011457e-05, "loss": 0.002, "step": 263900 }, { "epoch": 31.14086833411987, "grad_norm": 0.12163957953453064, "learning_rate": 3.131292853191713e-05, "loss": 0.0022, "step": 263950 }, { "epoch": 31.14676734308636, "grad_norm": 0.024352561682462692, "learning_rate": 3.129569986307422e-05, "loss": 0.0021, "step": 264000 }, { "epoch": 31.14676734308636, "eval_cer": 0.08347676419965576, "eval_loss": 7.636614463990554e-05, "eval_runtime": 2.0538, "eval_samples_per_second": 48.69, "eval_steps_per_second": 1.948, "eval_wer": 0.26, "step": 264000 }, { "epoch": 31.152666352052854, "grad_norm": 0.2445192039012909, "learning_rate": 3.127847377596389e-05, "loss": 0.0014, "step": 264050 }, { "epoch": 31.15856536101935, "grad_norm": 0.003824081039056182, "learning_rate": 3.1261250272963816e-05, "loss": 0.0013, "step": 264100 }, { "epoch": 31.164464369985843, "grad_norm": 0.09904486685991287, "learning_rate": 3.124402935645137e-05, "loss": 0.0015, "step": 264150 }, { "epoch": 31.170363378952334, "grad_norm": 0.0010771788656711578, "learning_rate": 3.122681102880352e-05, "loss": 0.0021, "step": 264200 }, { "epoch": 31.17626238791883, "grad_norm": 0.22824402153491974, "learning_rate": 3.120959529239688e-05, "loss": 0.0026, "step": 264250 }, { "epoch": 31.182161396885324, "grad_norm": 0.04897244647145271, "learning_rate": 3.119238214960774e-05, "loss": 0.0015, "step": 264300 }, { "epoch": 31.188060405851818, "grad_norm": 0.25933781266212463, "learning_rate": 3.117517160281198e-05, "loss": 0.0021, "step": 264350 }, { "epoch": 31.19395941481831, "grad_norm": 0.0126776322722435, "learning_rate": 3.115796365438518e-05, "loss": 0.0021, "step": 264400 }, { "epoch": 31.199858423784804, "grad_norm": 0.0276829581707716, "learning_rate": 3.114075830670251e-05, "loss": 0.0015, "step": 264450 }, { "epoch": 31.205757432751298, "grad_norm": 0.05273442715406418, "learning_rate": 3.1123555562138814e-05, "loss": 0.0009, "step": 264500 }, { "epoch": 31.211656441717793, "grad_norm": 0.4072744846343994, "learning_rate": 3.110635542306859e-05, "loss": 0.002, "step": 264550 }, { "epoch": 31.217555450684284, "grad_norm": 0.1458774358034134, "learning_rate": 3.108915789186592e-05, "loss": 0.0019, "step": 264600 }, { "epoch": 31.223454459650778, "grad_norm": 0.013056311756372452, "learning_rate": 3.1071962970904576e-05, "loss": 0.0021, "step": 264650 }, { "epoch": 31.229353468617273, "grad_norm": 0.010731369256973267, "learning_rate": 3.105477066255795e-05, "loss": 0.0017, "step": 264700 }, { "epoch": 31.235252477583767, "grad_norm": 0.049873556941747665, "learning_rate": 3.1037580969199074e-05, "loss": 0.0012, "step": 264750 }, { "epoch": 31.241151486550258, "grad_norm": 0.02735595591366291, "learning_rate": 3.1020393893200606e-05, "loss": 0.0017, "step": 264800 }, { "epoch": 31.247050495516753, "grad_norm": 0.0028272843919694424, "learning_rate": 3.100320943693489e-05, "loss": 0.0018, "step": 264850 }, { "epoch": 31.252949504483247, "grad_norm": 0.03042648732662201, "learning_rate": 3.098602760277385e-05, "loss": 0.0014, "step": 264900 }, { "epoch": 31.258848513449742, "grad_norm": 0.09858693927526474, "learning_rate": 3.096884839308909e-05, "loss": 0.0014, "step": 264950 }, { "epoch": 31.264747522416233, "grad_norm": 0.0034860053565353155, "learning_rate": 3.095167181025183e-05, "loss": 0.0013, "step": 265000 }, { "epoch": 31.264747522416233, "eval_cer": 0.08347676419965576, "eval_loss": 6.124399078544229e-05, "eval_runtime": 2.0476, "eval_samples_per_second": 48.837, "eval_steps_per_second": 1.953, "eval_wer": 0.26, "step": 265000 }, { "epoch": 31.270646531382727, "grad_norm": 0.1369752734899521, "learning_rate": 3.0934497856632926e-05, "loss": 0.0013, "step": 265050 }, { "epoch": 31.276545540349222, "grad_norm": 0.0316031314432621, "learning_rate": 3.091732653460289e-05, "loss": 0.0021, "step": 265100 }, { "epoch": 31.282444549315716, "grad_norm": 0.004081346560269594, "learning_rate": 3.090015784653185e-05, "loss": 0.0015, "step": 265150 }, { "epoch": 31.288343558282207, "grad_norm": 0.003618807764723897, "learning_rate": 3.088299179478959e-05, "loss": 0.0022, "step": 265200 }, { "epoch": 31.294242567248702, "grad_norm": 0.062281493097543716, "learning_rate": 3.086582838174551e-05, "loss": 0.0017, "step": 265250 }, { "epoch": 31.300141576215196, "grad_norm": 0.0035715256817638874, "learning_rate": 3.084866760976867e-05, "loss": 0.0019, "step": 265300 }, { "epoch": 31.30604058518169, "grad_norm": 0.0029660670552402735, "learning_rate": 3.0831509481227745e-05, "loss": 0.0012, "step": 265350 }, { "epoch": 31.311939594148182, "grad_norm": 0.031398944556713104, "learning_rate": 3.081435399849104e-05, "loss": 0.0016, "step": 265400 }, { "epoch": 31.317838603114676, "grad_norm": 0.01098361611366272, "learning_rate": 3.07972011639265e-05, "loss": 0.0014, "step": 265450 }, { "epoch": 31.32373761208117, "grad_norm": 0.18761278688907623, "learning_rate": 3.078005097990175e-05, "loss": 0.0015, "step": 265500 }, { "epoch": 31.329636621047666, "grad_norm": 0.11034327745437622, "learning_rate": 3.076290344878397e-05, "loss": 0.0015, "step": 265550 }, { "epoch": 31.335535630014157, "grad_norm": 0.08876916021108627, "learning_rate": 3.074575857294004e-05, "loss": 0.0016, "step": 265600 }, { "epoch": 31.34143463898065, "grad_norm": 0.023544732481241226, "learning_rate": 3.072861635473643e-05, "loss": 0.001, "step": 265650 }, { "epoch": 31.347333647947146, "grad_norm": 0.02439560741186142, "learning_rate": 3.071147679653927e-05, "loss": 0.0011, "step": 265700 }, { "epoch": 31.35323265691364, "grad_norm": 0.004452748689800501, "learning_rate": 3.069433990071429e-05, "loss": 0.0013, "step": 265750 }, { "epoch": 31.35913166588013, "grad_norm": 0.09862426668405533, "learning_rate": 3.067720566962691e-05, "loss": 0.0017, "step": 265800 }, { "epoch": 31.365030674846626, "grad_norm": 0.21016156673431396, "learning_rate": 3.0660074105642136e-05, "loss": 0.0014, "step": 265850 }, { "epoch": 31.37092968381312, "grad_norm": 0.045138828456401825, "learning_rate": 3.064294521112461e-05, "loss": 0.0012, "step": 265900 }, { "epoch": 31.376828692779615, "grad_norm": 0.20087459683418274, "learning_rate": 3.0625818988438616e-05, "loss": 0.002, "step": 265950 }, { "epoch": 31.382727701746106, "grad_norm": 0.0010114662582054734, "learning_rate": 3.060869543994806e-05, "loss": 0.0016, "step": 266000 }, { "epoch": 31.382727701746106, "eval_cer": 0.08347676419965576, "eval_loss": 1.5494393664994277e-05, "eval_runtime": 2.1078, "eval_samples_per_second": 47.444, "eval_steps_per_second": 1.898, "eval_wer": 0.26, "step": 266000 }, { "epoch": 31.3886267107126, "grad_norm": 0.008493182249367237, "learning_rate": 3.0591574568016496e-05, "loss": 0.0011, "step": 266050 }, { "epoch": 31.394525719679095, "grad_norm": 0.09877404570579529, "learning_rate": 3.057445637500709e-05, "loss": 0.0015, "step": 266100 }, { "epoch": 31.40042472864559, "grad_norm": 0.03206152096390724, "learning_rate": 3.0557340863282647e-05, "loss": 0.0018, "step": 266150 }, { "epoch": 31.40632373761208, "grad_norm": 0.01294762548059225, "learning_rate": 3.054022803520562e-05, "loss": 0.0017, "step": 266200 }, { "epoch": 31.412222746578575, "grad_norm": 0.08396371454000473, "learning_rate": 3.0523117893138054e-05, "loss": 0.002, "step": 266250 }, { "epoch": 31.41812175554507, "grad_norm": 0.03396540880203247, "learning_rate": 3.0506010439441645e-05, "loss": 0.0009, "step": 266300 }, { "epoch": 31.42402076451156, "grad_norm": 0.2412591427564621, "learning_rate": 3.0488905676477712e-05, "loss": 0.0011, "step": 266350 }, { "epoch": 31.429919773478055, "grad_norm": 0.03647731617093086, "learning_rate": 3.0471803606607212e-05, "loss": 0.0011, "step": 266400 }, { "epoch": 31.43581878244455, "grad_norm": 0.015743449330329895, "learning_rate": 3.045470423219071e-05, "loss": 0.0016, "step": 266450 }, { "epoch": 31.441717791411044, "grad_norm": 0.17693668603897095, "learning_rate": 3.0437607555588433e-05, "loss": 0.0016, "step": 266500 }, { "epoch": 31.447616800377535, "grad_norm": 0.013917368836700916, "learning_rate": 3.0420513579160204e-05, "loss": 0.0012, "step": 266550 }, { "epoch": 31.45351580934403, "grad_norm": 0.013217419385910034, "learning_rate": 3.0403422305265478e-05, "loss": 0.0014, "step": 266600 }, { "epoch": 31.459414818310524, "grad_norm": 0.03808280825614929, "learning_rate": 3.0386333736263356e-05, "loss": 0.0022, "step": 266650 }, { "epoch": 31.46531382727702, "grad_norm": 0.09192677587270737, "learning_rate": 3.0369247874512545e-05, "loss": 0.0011, "step": 266700 }, { "epoch": 31.47121283624351, "grad_norm": 0.011525549925863743, "learning_rate": 3.035216472237138e-05, "loss": 0.0013, "step": 266750 }, { "epoch": 31.477111845210004, "grad_norm": 0.001311339670792222, "learning_rate": 3.0335084282197852e-05, "loss": 0.0021, "step": 266800 }, { "epoch": 31.4830108541765, "grad_norm": 0.11382254958152771, "learning_rate": 3.0318006556349532e-05, "loss": 0.0017, "step": 266850 }, { "epoch": 31.488909863142993, "grad_norm": 0.07426577806472778, "learning_rate": 3.0300931547183655e-05, "loss": 0.0018, "step": 266900 }, { "epoch": 31.494808872109484, "grad_norm": 0.004731492139399052, "learning_rate": 3.0283859257057045e-05, "loss": 0.0017, "step": 266950 }, { "epoch": 31.50070788107598, "grad_norm": 0.40544193983078003, "learning_rate": 3.0266789688326186e-05, "loss": 0.0016, "step": 267000 }, { "epoch": 31.50070788107598, "eval_cer": 0.08347676419965576, "eval_loss": 4.520138827501796e-05, "eval_runtime": 2.0555, "eval_samples_per_second": 48.649, "eval_steps_per_second": 1.946, "eval_wer": 0.26, "step": 267000 }, { "epoch": 31.506606890042473, "grad_norm": 0.2744661867618561, "learning_rate": 3.024972284334716e-05, "loss": 0.0012, "step": 267050 }, { "epoch": 31.512505899008968, "grad_norm": 0.15270446240901947, "learning_rate": 3.0232658724475684e-05, "loss": 0.0021, "step": 267100 }, { "epoch": 31.51840490797546, "grad_norm": 0.15288682281970978, "learning_rate": 3.0215597334067115e-05, "loss": 0.0021, "step": 267150 }, { "epoch": 31.524303916941953, "grad_norm": 0.007277204655110836, "learning_rate": 3.0198538674476394e-05, "loss": 0.002, "step": 267200 }, { "epoch": 31.530202925908448, "grad_norm": 0.2247561663389206, "learning_rate": 3.0181482748058133e-05, "loss": 0.0016, "step": 267250 }, { "epoch": 31.536101934874942, "grad_norm": 0.0350714772939682, "learning_rate": 3.0164429557166514e-05, "loss": 0.0017, "step": 267300 }, { "epoch": 31.542000943841433, "grad_norm": 0.037215422838926315, "learning_rate": 3.014737910415538e-05, "loss": 0.0013, "step": 267350 }, { "epoch": 31.547899952807928, "grad_norm": 0.31623879075050354, "learning_rate": 3.0130331391378187e-05, "loss": 0.0009, "step": 267400 }, { "epoch": 31.553798961774422, "grad_norm": 0.0033000397961586714, "learning_rate": 3.0113286421188014e-05, "loss": 0.0016, "step": 267450 }, { "epoch": 31.559697970740917, "grad_norm": 0.12160038203001022, "learning_rate": 3.009624419593756e-05, "loss": 0.0018, "step": 267500 }, { "epoch": 31.565596979707408, "grad_norm": 0.018960673362016678, "learning_rate": 3.0079204717979135e-05, "loss": 0.0021, "step": 267550 }, { "epoch": 31.571495988673902, "grad_norm": 0.19014553725719452, "learning_rate": 3.006216798966468e-05, "loss": 0.001, "step": 267600 }, { "epoch": 31.577394997640397, "grad_norm": 0.03984115645289421, "learning_rate": 3.0045134013345754e-05, "loss": 0.0013, "step": 267650 }, { "epoch": 31.58329400660689, "grad_norm": 0.006534831132739782, "learning_rate": 3.0028102791373535e-05, "loss": 0.0013, "step": 267700 }, { "epoch": 31.589193015573382, "grad_norm": 0.0020392367150634527, "learning_rate": 3.001107432609882e-05, "loss": 0.0014, "step": 267750 }, { "epoch": 31.595092024539877, "grad_norm": 0.0853838101029396, "learning_rate": 2.9994048619872035e-05, "loss": 0.0028, "step": 267800 }, { "epoch": 31.60099103350637, "grad_norm": 0.13901153206825256, "learning_rate": 2.9977025675043225e-05, "loss": 0.0018, "step": 267850 }, { "epoch": 31.606890042472866, "grad_norm": 0.08227382600307465, "learning_rate": 2.996000549396204e-05, "loss": 0.0019, "step": 267900 }, { "epoch": 31.612789051439357, "grad_norm": 0.0015177109744399786, "learning_rate": 2.9942988078977764e-05, "loss": 0.0015, "step": 267950 }, { "epoch": 31.61868806040585, "grad_norm": 0.14464479684829712, "learning_rate": 2.992597343243927e-05, "loss": 0.0019, "step": 268000 }, { "epoch": 31.61868806040585, "eval_cer": 0.08347676419965576, "eval_loss": 3.6927151086274534e-05, "eval_runtime": 2.0357, "eval_samples_per_second": 49.124, "eval_steps_per_second": 1.965, "eval_wer": 0.26, "step": 268000 }, { "epoch": 31.624587069372346, "grad_norm": 0.11031253635883331, "learning_rate": 2.9908961556695086e-05, "loss": 0.0015, "step": 268050 }, { "epoch": 31.63048607833884, "grad_norm": 0.22340989112854004, "learning_rate": 2.9891952454093326e-05, "loss": 0.0016, "step": 268100 }, { "epoch": 31.63638508730533, "grad_norm": 0.058930788189172745, "learning_rate": 2.9874946126981755e-05, "loss": 0.0021, "step": 268150 }, { "epoch": 31.642284096271826, "grad_norm": 0.0012964127818122506, "learning_rate": 2.985794257770773e-05, "loss": 0.0022, "step": 268200 }, { "epoch": 31.64818310523832, "grad_norm": 0.010873023420572281, "learning_rate": 2.9840941808618234e-05, "loss": 0.0016, "step": 268250 }, { "epoch": 31.654082114204815, "grad_norm": 0.050321225076913834, "learning_rate": 2.9823943822059853e-05, "loss": 0.0014, "step": 268300 }, { "epoch": 31.659981123171306, "grad_norm": 0.04035023972392082, "learning_rate": 2.9806948620378805e-05, "loss": 0.0016, "step": 268350 }, { "epoch": 31.6658801321378, "grad_norm": 0.0555742122232914, "learning_rate": 2.978995620592092e-05, "loss": 0.0016, "step": 268400 }, { "epoch": 31.671779141104295, "grad_norm": 0.0715521052479744, "learning_rate": 2.9772966581031627e-05, "loss": 0.0018, "step": 268450 }, { "epoch": 31.67767815007079, "grad_norm": 0.0012851409846916795, "learning_rate": 2.9755979748056006e-05, "loss": 0.0014, "step": 268500 }, { "epoch": 31.68357715903728, "grad_norm": 0.0021622497588396072, "learning_rate": 2.973899570933872e-05, "loss": 0.0021, "step": 268550 }, { "epoch": 31.689476168003775, "grad_norm": 0.005868486128747463, "learning_rate": 2.972201446722405e-05, "loss": 0.0018, "step": 268600 }, { "epoch": 31.69537517697027, "grad_norm": 0.04326687380671501, "learning_rate": 2.9705036024055905e-05, "loss": 0.0012, "step": 268650 }, { "epoch": 31.70127418593676, "grad_norm": 0.1634606420993805, "learning_rate": 2.9688060382177795e-05, "loss": 0.0016, "step": 268700 }, { "epoch": 31.707173194903255, "grad_norm": 0.08731581270694733, "learning_rate": 2.967108754393284e-05, "loss": 0.0014, "step": 268750 }, { "epoch": 31.71307220386975, "grad_norm": 0.05756264925003052, "learning_rate": 2.9654117511663802e-05, "loss": 0.0023, "step": 268800 }, { "epoch": 31.718971212836244, "grad_norm": 0.16407184302806854, "learning_rate": 2.963715028771302e-05, "loss": 0.0013, "step": 268850 }, { "epoch": 31.72487022180274, "grad_norm": 0.14231355488300323, "learning_rate": 2.962018587442247e-05, "loss": 0.0017, "step": 268900 }, { "epoch": 31.73076923076923, "grad_norm": 0.008670218288898468, "learning_rate": 2.9603224274133713e-05, "loss": 0.0017, "step": 268950 }, { "epoch": 31.736668239735724, "grad_norm": 0.002561603207141161, "learning_rate": 2.9586265489187947e-05, "loss": 0.0019, "step": 269000 }, { "epoch": 31.736668239735724, "eval_cer": 0.08347676419965576, "eval_loss": 7.654075307073072e-05, "eval_runtime": 2.064, "eval_samples_per_second": 48.449, "eval_steps_per_second": 1.938, "eval_wer": 0.26, "step": 269000 }, { "epoch": 31.74256724870222, "grad_norm": 0.029228422790765762, "learning_rate": 2.956930952192597e-05, "loss": 0.0012, "step": 269050 }, { "epoch": 31.74846625766871, "grad_norm": 0.11913470178842545, "learning_rate": 2.955235637468821e-05, "loss": 0.0019, "step": 269100 }, { "epoch": 31.754365266635205, "grad_norm": 0.016943832859396935, "learning_rate": 2.9535406049814683e-05, "loss": 0.0014, "step": 269150 }, { "epoch": 31.7602642756017, "grad_norm": 0.015328978188335896, "learning_rate": 2.9518458549645013e-05, "loss": 0.002, "step": 269200 }, { "epoch": 31.766163284568194, "grad_norm": 0.006698157172650099, "learning_rate": 2.950151387651845e-05, "loss": 0.0018, "step": 269250 }, { "epoch": 31.772062293534685, "grad_norm": 0.03231209143996239, "learning_rate": 2.9484572032773838e-05, "loss": 0.0019, "step": 269300 }, { "epoch": 31.77796130250118, "grad_norm": 0.022036978974938393, "learning_rate": 2.9467633020749653e-05, "loss": 0.0016, "step": 269350 }, { "epoch": 31.783860311467674, "grad_norm": 0.0039101457223296165, "learning_rate": 2.9450696842783954e-05, "loss": 0.0015, "step": 269400 }, { "epoch": 31.789759320434168, "grad_norm": 0.03540363907814026, "learning_rate": 2.9433763501214428e-05, "loss": 0.0018, "step": 269450 }, { "epoch": 31.79565832940066, "grad_norm": 0.02696644701063633, "learning_rate": 2.9416832998378375e-05, "loss": 0.0021, "step": 269500 }, { "epoch": 31.801557338367154, "grad_norm": 0.0020404139067977667, "learning_rate": 2.939990533661269e-05, "loss": 0.0014, "step": 269550 }, { "epoch": 31.80745634733365, "grad_norm": 0.06923702359199524, "learning_rate": 2.9382980518253868e-05, "loss": 0.0018, "step": 269600 }, { "epoch": 31.813355356300143, "grad_norm": 0.09129352867603302, "learning_rate": 2.936605854563802e-05, "loss": 0.0017, "step": 269650 }, { "epoch": 31.819254365266634, "grad_norm": 0.0038985307328402996, "learning_rate": 2.934913942110088e-05, "loss": 0.0022, "step": 269700 }, { "epoch": 31.82515337423313, "grad_norm": 0.039209749549627304, "learning_rate": 2.9332223146977756e-05, "loss": 0.0011, "step": 269750 }, { "epoch": 31.831052383199623, "grad_norm": 0.04445357993245125, "learning_rate": 2.9315309725603596e-05, "loss": 0.0016, "step": 269800 }, { "epoch": 31.836951392166117, "grad_norm": 0.15457311272621155, "learning_rate": 2.929839915931294e-05, "loss": 0.0015, "step": 269850 }, { "epoch": 31.84285040113261, "grad_norm": 0.2159908562898636, "learning_rate": 2.9281491450439924e-05, "loss": 0.002, "step": 269900 }, { "epoch": 31.848749410099103, "grad_norm": 0.015680881217122078, "learning_rate": 2.926458660131831e-05, "loss": 0.0011, "step": 269950 }, { "epoch": 31.854648419065597, "grad_norm": 0.0016428785165771842, "learning_rate": 2.9247684614281447e-05, "loss": 0.0015, "step": 270000 }, { "epoch": 31.854648419065597, "eval_cer": 0.08347676419965576, "eval_loss": 5.78389190195594e-05, "eval_runtime": 2.0579, "eval_samples_per_second": 48.593, "eval_steps_per_second": 1.944, "eval_wer": 0.26, "step": 270000 }, { "epoch": 31.860547428032092, "grad_norm": 0.046215347945690155, "learning_rate": 2.9230785491662294e-05, "loss": 0.0011, "step": 270050 }, { "epoch": 31.866446436998583, "grad_norm": 0.007388765923678875, "learning_rate": 2.921388923579341e-05, "loss": 0.0022, "step": 270100 }, { "epoch": 31.872345445965077, "grad_norm": 0.009532252326607704, "learning_rate": 2.9196995849006996e-05, "loss": 0.002, "step": 270150 }, { "epoch": 31.878244454931572, "grad_norm": 0.012393898330628872, "learning_rate": 2.918010533363481e-05, "loss": 0.002, "step": 270200 }, { "epoch": 31.884143463898067, "grad_norm": 0.0170615091919899, "learning_rate": 2.9163217692008203e-05, "loss": 0.0017, "step": 270250 }, { "epoch": 31.890042472864558, "grad_norm": 0.18444538116455078, "learning_rate": 2.9146332926458197e-05, "loss": 0.0015, "step": 270300 }, { "epoch": 31.895941481831052, "grad_norm": 0.03534970059990883, "learning_rate": 2.912945103931536e-05, "loss": 0.001, "step": 270350 }, { "epoch": 31.901840490797547, "grad_norm": 0.015020483173429966, "learning_rate": 2.9112572032909873e-05, "loss": 0.002, "step": 270400 }, { "epoch": 31.90773949976404, "grad_norm": 0.0011584903113543987, "learning_rate": 2.9095695909571534e-05, "loss": 0.0017, "step": 270450 }, { "epoch": 31.913638508730532, "grad_norm": 0.16350868344306946, "learning_rate": 2.9078822671629734e-05, "loss": 0.0018, "step": 270500 }, { "epoch": 31.919537517697027, "grad_norm": 0.20856937766075134, "learning_rate": 2.9061952321413465e-05, "loss": 0.002, "step": 270550 }, { "epoch": 31.92543652666352, "grad_norm": 0.009201493114233017, "learning_rate": 2.9045084861251314e-05, "loss": 0.0021, "step": 270600 }, { "epoch": 31.931335535630016, "grad_norm": 0.4350857138633728, "learning_rate": 2.9028220293471487e-05, "loss": 0.0016, "step": 270650 }, { "epoch": 31.937234544596507, "grad_norm": 0.00909461546689272, "learning_rate": 2.9011358620401774e-05, "loss": 0.0018, "step": 270700 }, { "epoch": 31.943133553563, "grad_norm": 0.02806789241731167, "learning_rate": 2.8994499844369583e-05, "loss": 0.0013, "step": 270750 }, { "epoch": 31.949032562529496, "grad_norm": 0.007559030316770077, "learning_rate": 2.8977643967701894e-05, "loss": 0.0015, "step": 270800 }, { "epoch": 31.95493157149599, "grad_norm": 0.055152956396341324, "learning_rate": 2.8960790992725296e-05, "loss": 0.002, "step": 270850 }, { "epoch": 31.96083058046248, "grad_norm": 0.06781649589538574, "learning_rate": 2.8943940921766016e-05, "loss": 0.0021, "step": 270900 }, { "epoch": 31.966729589428976, "grad_norm": 0.008877416141331196, "learning_rate": 2.8927093757149836e-05, "loss": 0.0016, "step": 270950 }, { "epoch": 31.97262859839547, "grad_norm": 0.004536790773272514, "learning_rate": 2.891024950120216e-05, "loss": 0.0015, "step": 271000 }, { "epoch": 31.97262859839547, "eval_cer": 0.08347676419965576, "eval_loss": 3.651339284260757e-05, "eval_runtime": 2.0322, "eval_samples_per_second": 49.207, "eval_steps_per_second": 1.968, "eval_wer": 0.26, "step": 271000 }, { "epoch": 31.978527607361965, "grad_norm": 0.002938095713034272, "learning_rate": 2.8893408156247935e-05, "loss": 0.0017, "step": 271050 }, { "epoch": 31.984426616328456, "grad_norm": 0.014075121842324734, "learning_rate": 2.8876569724611823e-05, "loss": 0.0017, "step": 271100 }, { "epoch": 31.99032562529495, "grad_norm": 0.014751776121556759, "learning_rate": 2.8859734208617968e-05, "loss": 0.0015, "step": 271150 }, { "epoch": 31.996224634261445, "grad_norm": 0.027241196483373642, "learning_rate": 2.884290161059017e-05, "loss": 0.0011, "step": 271200 }, { "epoch": 32.00212364322794, "grad_norm": 0.2695954740047455, "learning_rate": 2.8826071932851807e-05, "loss": 0.0024, "step": 271250 }, { "epoch": 32.008022652194434, "grad_norm": 0.02233242057263851, "learning_rate": 2.8809245177725857e-05, "loss": 0.0007, "step": 271300 }, { "epoch": 32.01392166116092, "grad_norm": 0.04155702888965607, "learning_rate": 2.8792421347534904e-05, "loss": 0.0015, "step": 271350 }, { "epoch": 32.019820670127416, "grad_norm": 0.014215431176126003, "learning_rate": 2.8775600444601125e-05, "loss": 0.0018, "step": 271400 }, { "epoch": 32.02571967909391, "grad_norm": 0.09556344896554947, "learning_rate": 2.8758782471246276e-05, "loss": 0.0013, "step": 271450 }, { "epoch": 32.031618688060405, "grad_norm": 0.04389597475528717, "learning_rate": 2.8741967429791734e-05, "loss": 0.0012, "step": 271500 }, { "epoch": 32.0375176970269, "grad_norm": 0.022799208760261536, "learning_rate": 2.872515532255845e-05, "loss": 0.0015, "step": 271550 }, { "epoch": 32.043416705993394, "grad_norm": 0.00652811024338007, "learning_rate": 2.8708346151866973e-05, "loss": 0.0017, "step": 271600 }, { "epoch": 32.04931571495989, "grad_norm": 0.09166073054075241, "learning_rate": 2.8691539920037475e-05, "loss": 0.0009, "step": 271650 }, { "epoch": 32.05521472392638, "grad_norm": 0.0058135902509093285, "learning_rate": 2.86747366293897e-05, "loss": 0.0012, "step": 271700 }, { "epoch": 32.06111373289287, "grad_norm": 0.025566445663571358, "learning_rate": 2.865793628224297e-05, "loss": 0.0015, "step": 271750 }, { "epoch": 32.067012741859365, "grad_norm": 0.2880612909793854, "learning_rate": 2.864113888091622e-05, "loss": 0.0013, "step": 271800 }, { "epoch": 32.07291175082586, "grad_norm": 0.03143182396888733, "learning_rate": 2.8624344427727982e-05, "loss": 0.0017, "step": 271850 }, { "epoch": 32.078810759792354, "grad_norm": 0.006866406183689833, "learning_rate": 2.8607552924996373e-05, "loss": 0.0008, "step": 271900 }, { "epoch": 32.08470976875885, "grad_norm": 0.04510706663131714, "learning_rate": 2.8590764375039103e-05, "loss": 0.0016, "step": 271950 }, { "epoch": 32.09060877772534, "grad_norm": 0.16378264129161835, "learning_rate": 2.8573978780173477e-05, "loss": 0.0016, "step": 272000 }, { "epoch": 32.09060877772534, "eval_cer": 0.08347676419965576, "eval_loss": 8.422011887887493e-06, "eval_runtime": 2.0261, "eval_samples_per_second": 49.356, "eval_steps_per_second": 1.974, "eval_wer": 0.26, "step": 272000 }, { "epoch": 32.09650778669184, "grad_norm": 0.029172178357839584, "learning_rate": 2.8557196142716392e-05, "loss": 0.0014, "step": 272050 }, { "epoch": 32.10240679565833, "grad_norm": 0.27916091680526733, "learning_rate": 2.8540416464984333e-05, "loss": 0.0018, "step": 272100 }, { "epoch": 32.10830580462482, "grad_norm": 0.004238532390445471, "learning_rate": 2.8523639749293374e-05, "loss": 0.001, "step": 272150 }, { "epoch": 32.114204813591314, "grad_norm": 0.11837045848369598, "learning_rate": 2.8506865997959176e-05, "loss": 0.0007, "step": 272200 }, { "epoch": 32.12010382255781, "grad_norm": 0.024206459522247314, "learning_rate": 2.8490095213297035e-05, "loss": 0.001, "step": 272250 }, { "epoch": 32.1260028315243, "grad_norm": 0.00799527857452631, "learning_rate": 2.8473327397621787e-05, "loss": 0.0013, "step": 272300 }, { "epoch": 32.1319018404908, "grad_norm": 0.14965181052684784, "learning_rate": 2.845656255324786e-05, "loss": 0.0027, "step": 272350 }, { "epoch": 32.13780084945729, "grad_norm": 0.04582258686423302, "learning_rate": 2.8439800682489304e-05, "loss": 0.0016, "step": 272400 }, { "epoch": 32.14369985842379, "grad_norm": 0.02529088221490383, "learning_rate": 2.8423041787659733e-05, "loss": 0.0019, "step": 272450 }, { "epoch": 32.14959886739028, "grad_norm": 0.004861727822571993, "learning_rate": 2.8406285871072347e-05, "loss": 0.0014, "step": 272500 }, { "epoch": 32.15549787635677, "grad_norm": 0.0004826003569178283, "learning_rate": 2.8389532935039963e-05, "loss": 0.0013, "step": 272550 }, { "epoch": 32.16139688532326, "grad_norm": 0.005469440948218107, "learning_rate": 2.8372782981874963e-05, "loss": 0.0014, "step": 272600 }, { "epoch": 32.16729589428976, "grad_norm": 0.03887594863772392, "learning_rate": 2.835603601388932e-05, "loss": 0.0014, "step": 272650 }, { "epoch": 32.17319490325625, "grad_norm": 0.019640594720840454, "learning_rate": 2.8339292033394575e-05, "loss": 0.0018, "step": 272700 }, { "epoch": 32.17909391222275, "grad_norm": 0.001802931074053049, "learning_rate": 2.8322551042701927e-05, "loss": 0.0018, "step": 272750 }, { "epoch": 32.18499292118924, "grad_norm": 0.10877411812543869, "learning_rate": 2.8305813044122097e-05, "loss": 0.0021, "step": 272800 }, { "epoch": 32.190891930155736, "grad_norm": 0.007898910902440548, "learning_rate": 2.828907803996541e-05, "loss": 0.0009, "step": 272850 }, { "epoch": 32.19679093912223, "grad_norm": 0.008748210966587067, "learning_rate": 2.8272346032541752e-05, "loss": 0.001, "step": 272900 }, { "epoch": 32.20268994808872, "grad_norm": 0.10463031381368637, "learning_rate": 2.8255617024160664e-05, "loss": 0.002, "step": 272950 }, { "epoch": 32.20858895705521, "grad_norm": 0.003539132187142968, "learning_rate": 2.8238891017131218e-05, "loss": 0.0017, "step": 273000 }, { "epoch": 32.20858895705521, "eval_cer": 0.08347676419965576, "eval_loss": 1.895191235234961e-05, "eval_runtime": 2.0372, "eval_samples_per_second": 49.088, "eval_steps_per_second": 1.964, "eval_wer": 0.26, "step": 273000 }, { "epoch": 32.21448796602171, "grad_norm": 0.019565317779779434, "learning_rate": 2.8222168013762078e-05, "loss": 0.0012, "step": 273050 }, { "epoch": 32.2203869749882, "grad_norm": 0.01766037382185459, "learning_rate": 2.8205448016361506e-05, "loss": 0.002, "step": 273100 }, { "epoch": 32.226285983954696, "grad_norm": 0.005682784598320723, "learning_rate": 2.8188731027237342e-05, "loss": 0.0011, "step": 273150 }, { "epoch": 32.23218499292119, "grad_norm": 0.016881484538316727, "learning_rate": 2.817201704869701e-05, "loss": 0.0017, "step": 273200 }, { "epoch": 32.238084001887685, "grad_norm": 0.004688510671257973, "learning_rate": 2.815530608304753e-05, "loss": 0.0013, "step": 273250 }, { "epoch": 32.24398301085418, "grad_norm": 0.2150249481201172, "learning_rate": 2.813859813259548e-05, "loss": 0.0016, "step": 273300 }, { "epoch": 32.24988201982067, "grad_norm": 0.001862950623035431, "learning_rate": 2.8121893199647055e-05, "loss": 0.002, "step": 273350 }, { "epoch": 32.25578102878716, "grad_norm": 0.04301043972373009, "learning_rate": 2.8105191286508005e-05, "loss": 0.0015, "step": 273400 }, { "epoch": 32.261680037753656, "grad_norm": 0.02970224805176258, "learning_rate": 2.808849239548368e-05, "loss": 0.0015, "step": 273450 }, { "epoch": 32.26757904672015, "grad_norm": 0.011621980927884579, "learning_rate": 2.8071796528879014e-05, "loss": 0.0014, "step": 273500 }, { "epoch": 32.273478055686645, "grad_norm": 0.005034519359469414, "learning_rate": 2.805510368899849e-05, "loss": 0.0013, "step": 273550 }, { "epoch": 32.27937706465314, "grad_norm": 0.2504924535751343, "learning_rate": 2.8038413878146248e-05, "loss": 0.0009, "step": 273600 }, { "epoch": 32.285276073619634, "grad_norm": 0.014680693857371807, "learning_rate": 2.8021727098625938e-05, "loss": 0.0011, "step": 273650 }, { "epoch": 32.29117508258613, "grad_norm": 0.012217835523188114, "learning_rate": 2.800504335274081e-05, "loss": 0.0014, "step": 273700 }, { "epoch": 32.297074091552616, "grad_norm": 0.027411526069045067, "learning_rate": 2.798836264279372e-05, "loss": 0.001, "step": 273750 }, { "epoch": 32.30297310051911, "grad_norm": 0.00039295683382079005, "learning_rate": 2.7971684971087077e-05, "loss": 0.0015, "step": 273800 }, { "epoch": 32.308872109485606, "grad_norm": 0.06130664795637131, "learning_rate": 2.795501033992287e-05, "loss": 0.0019, "step": 273850 }, { "epoch": 32.3147711184521, "grad_norm": 0.008213361725211143, "learning_rate": 2.79383387516027e-05, "loss": 0.0013, "step": 273900 }, { "epoch": 32.320670127418595, "grad_norm": 0.01669738069176674, "learning_rate": 2.792167020842771e-05, "loss": 0.0016, "step": 273950 }, { "epoch": 32.32656913638509, "grad_norm": 0.000434155052062124, "learning_rate": 2.7905004712698647e-05, "loss": 0.0017, "step": 274000 }, { "epoch": 32.32656913638509, "eval_cer": 0.08347676419965576, "eval_loss": 1.464870547351893e-05, "eval_runtime": 2.0723, "eval_samples_per_second": 48.255, "eval_steps_per_second": 1.93, "eval_wer": 0.26, "step": 274000 }, { "epoch": 32.332468145351584, "grad_norm": 0.21216188371181488, "learning_rate": 2.7888342266715822e-05, "loss": 0.0016, "step": 274050 }, { "epoch": 32.33836715431807, "grad_norm": 0.005227749701589346, "learning_rate": 2.787168287277914e-05, "loss": 0.0016, "step": 274100 }, { "epoch": 32.344266163284566, "grad_norm": 0.026390761137008667, "learning_rate": 2.7855026533188055e-05, "loss": 0.0019, "step": 274150 }, { "epoch": 32.35016517225106, "grad_norm": 0.01123740803450346, "learning_rate": 2.7838373250241668e-05, "loss": 0.0012, "step": 274200 }, { "epoch": 32.356064181217555, "grad_norm": 0.0013604703126475215, "learning_rate": 2.7821723026238588e-05, "loss": 0.0012, "step": 274250 }, { "epoch": 32.36196319018405, "grad_norm": 0.0009697003988549113, "learning_rate": 2.7805075863477025e-05, "loss": 0.0017, "step": 274300 }, { "epoch": 32.367862199150544, "grad_norm": 0.026295609772205353, "learning_rate": 2.778843176425474e-05, "loss": 0.0013, "step": 274350 }, { "epoch": 32.37376120811704, "grad_norm": 0.020216817036271095, "learning_rate": 2.777179073086915e-05, "loss": 0.0018, "step": 274400 }, { "epoch": 32.37966021708353, "grad_norm": 0.008259279653429985, "learning_rate": 2.7755152765617177e-05, "loss": 0.0016, "step": 274450 }, { "epoch": 32.38555922605002, "grad_norm": 0.02956489659845829, "learning_rate": 2.7738517870795334e-05, "loss": 0.0021, "step": 274500 }, { "epoch": 32.391458235016515, "grad_norm": 0.005126649979501963, "learning_rate": 2.7721886048699713e-05, "loss": 0.0012, "step": 274550 }, { "epoch": 32.39735724398301, "grad_norm": 0.09465841948986053, "learning_rate": 2.7705257301625993e-05, "loss": 0.0019, "step": 274600 }, { "epoch": 32.403256252949504, "grad_norm": 0.0028816345147788525, "learning_rate": 2.7688631631869417e-05, "loss": 0.0018, "step": 274650 }, { "epoch": 32.409155261916, "grad_norm": 0.27176427841186523, "learning_rate": 2.7672009041724806e-05, "loss": 0.0019, "step": 274700 }, { "epoch": 32.41505427088249, "grad_norm": 0.1052536740899086, "learning_rate": 2.7655389533486557e-05, "loss": 0.0018, "step": 274750 }, { "epoch": 32.42095327984899, "grad_norm": 0.03698190301656723, "learning_rate": 2.7638773109448647e-05, "loss": 0.0014, "step": 274800 }, { "epoch": 32.42685228881548, "grad_norm": 0.24140672385692596, "learning_rate": 2.762215977190461e-05, "loss": 0.0014, "step": 274850 }, { "epoch": 32.43275129778197, "grad_norm": 0.047456346452236176, "learning_rate": 2.7605549523147554e-05, "loss": 0.0019, "step": 274900 }, { "epoch": 32.438650306748464, "grad_norm": 0.08917494863271713, "learning_rate": 2.7588942365470205e-05, "loss": 0.0018, "step": 274950 }, { "epoch": 32.44454931571496, "grad_norm": 0.13089679181575775, "learning_rate": 2.7572338301164825e-05, "loss": 0.0015, "step": 275000 }, { "epoch": 32.44454931571496, "eval_cer": 0.08347676419965576, "eval_loss": 2.6355553927714936e-05, "eval_runtime": 2.0739, "eval_samples_per_second": 48.218, "eval_steps_per_second": 1.929, "eval_wer": 0.26, "step": 275000 }, { "epoch": 32.45044832468145, "grad_norm": 0.004295989405363798, "learning_rate": 2.7555737332523233e-05, "loss": 0.0015, "step": 275050 }, { "epoch": 32.45634733364795, "grad_norm": 0.010145323351025581, "learning_rate": 2.753913946183685e-05, "loss": 0.0013, "step": 275100 }, { "epoch": 32.46224634261444, "grad_norm": 0.020526759326457977, "learning_rate": 2.7522544691396667e-05, "loss": 0.0021, "step": 275150 }, { "epoch": 32.46814535158094, "grad_norm": 0.029213430359959602, "learning_rate": 2.7505953023493237e-05, "loss": 0.0016, "step": 275200 }, { "epoch": 32.47404436054743, "grad_norm": 0.009606923907995224, "learning_rate": 2.7489364460416678e-05, "loss": 0.0018, "step": 275250 }, { "epoch": 32.47994336951392, "grad_norm": 0.020341673865914345, "learning_rate": 2.7472779004456706e-05, "loss": 0.0014, "step": 275300 }, { "epoch": 32.48584237848041, "grad_norm": 0.05059665068984032, "learning_rate": 2.7456196657902578e-05, "loss": 0.0012, "step": 275350 }, { "epoch": 32.49174138744691, "grad_norm": 0.002549405675381422, "learning_rate": 2.7439617423043145e-05, "loss": 0.0007, "step": 275400 }, { "epoch": 32.4976403964134, "grad_norm": 0.001282919547520578, "learning_rate": 2.7423041302166817e-05, "loss": 0.0018, "step": 275450 }, { "epoch": 32.5035394053799, "grad_norm": 0.00762396352365613, "learning_rate": 2.740646829756156e-05, "loss": 0.0013, "step": 275500 }, { "epoch": 32.50943841434639, "grad_norm": 0.04406731203198433, "learning_rate": 2.7389898411514957e-05, "loss": 0.0017, "step": 275550 }, { "epoch": 32.515337423312886, "grad_norm": 0.2648278772830963, "learning_rate": 2.7373331646314117e-05, "loss": 0.0012, "step": 275600 }, { "epoch": 32.52123643227938, "grad_norm": 0.05131155252456665, "learning_rate": 2.7356768004245727e-05, "loss": 0.0022, "step": 275650 }, { "epoch": 32.52713544124587, "grad_norm": 0.029394743964076042, "learning_rate": 2.7340207487596053e-05, "loss": 0.0013, "step": 275700 }, { "epoch": 32.53303445021236, "grad_norm": 0.014198115095496178, "learning_rate": 2.7323650098650923e-05, "loss": 0.0011, "step": 275750 }, { "epoch": 32.53893345917886, "grad_norm": 0.22263607382774353, "learning_rate": 2.7307095839695722e-05, "loss": 0.0016, "step": 275800 }, { "epoch": 32.54483246814535, "grad_norm": 0.03981892764568329, "learning_rate": 2.729054471301543e-05, "loss": 0.0014, "step": 275850 }, { "epoch": 32.550731477111846, "grad_norm": 0.009329363703727722, "learning_rate": 2.727399672089457e-05, "loss": 0.0019, "step": 275900 }, { "epoch": 32.55663048607834, "grad_norm": 0.560158371925354, "learning_rate": 2.725745186561725e-05, "loss": 0.0011, "step": 275950 }, { "epoch": 32.562529495044835, "grad_norm": 0.14288564026355743, "learning_rate": 2.7240910149467115e-05, "loss": 0.0018, "step": 276000 }, { "epoch": 32.562529495044835, "eval_cer": 0.08347676419965576, "eval_loss": 2.035639226960484e-05, "eval_runtime": 2.0771, "eval_samples_per_second": 48.144, "eval_steps_per_second": 1.926, "eval_wer": 0.26, "step": 276000 }, { "epoch": 32.56842850401133, "grad_norm": 0.007080668117851019, "learning_rate": 2.7224371574727424e-05, "loss": 0.0014, "step": 276050 }, { "epoch": 32.57432751297782, "grad_norm": 0.104045569896698, "learning_rate": 2.7207836143680975e-05, "loss": 0.0019, "step": 276100 }, { "epoch": 32.58022652194431, "grad_norm": 0.016322659328579903, "learning_rate": 2.7191303858610136e-05, "loss": 0.0021, "step": 276150 }, { "epoch": 32.586125530910806, "grad_norm": 0.007571068126708269, "learning_rate": 2.7174774721796824e-05, "loss": 0.0013, "step": 276200 }, { "epoch": 32.5920245398773, "grad_norm": 0.1548558622598648, "learning_rate": 2.7158248735522525e-05, "loss": 0.0016, "step": 276250 }, { "epoch": 32.597923548843795, "grad_norm": 0.3997197151184082, "learning_rate": 2.7141725902068337e-05, "loss": 0.0012, "step": 276300 }, { "epoch": 32.60382255781029, "grad_norm": 0.16276267170906067, "learning_rate": 2.712520622371487e-05, "loss": 0.0016, "step": 276350 }, { "epoch": 32.609721566776784, "grad_norm": 0.01999673992395401, "learning_rate": 2.7108689702742318e-05, "loss": 0.0014, "step": 276400 }, { "epoch": 32.61562057574328, "grad_norm": 0.003953469451516867, "learning_rate": 2.7092176341430442e-05, "loss": 0.0023, "step": 276450 }, { "epoch": 32.621519584709766, "grad_norm": 0.07587166130542755, "learning_rate": 2.707566614205854e-05, "loss": 0.0015, "step": 276500 }, { "epoch": 32.62741859367626, "grad_norm": 0.058110348880290985, "learning_rate": 2.7059159106905524e-05, "loss": 0.0015, "step": 276550 }, { "epoch": 32.633317602642755, "grad_norm": 0.33341649174690247, "learning_rate": 2.704265523824982e-05, "loss": 0.0014, "step": 276600 }, { "epoch": 32.63921661160925, "grad_norm": 0.1403636485338211, "learning_rate": 2.702615453836945e-05, "loss": 0.0014, "step": 276650 }, { "epoch": 32.645115620575744, "grad_norm": 0.017011160030961037, "learning_rate": 2.700965700954197e-05, "loss": 0.0011, "step": 276700 }, { "epoch": 32.65101462954224, "grad_norm": 0.12640266120433807, "learning_rate": 2.6993162654044534e-05, "loss": 0.0016, "step": 276750 }, { "epoch": 32.65691363850873, "grad_norm": 0.004084366839379072, "learning_rate": 2.6976671474153824e-05, "loss": 0.0024, "step": 276800 }, { "epoch": 32.66281264747522, "grad_norm": 0.0727468878030777, "learning_rate": 2.6960183472146095e-05, "loss": 0.0015, "step": 276850 }, { "epoch": 32.668711656441715, "grad_norm": 0.1401323825120926, "learning_rate": 2.6943698650297187e-05, "loss": 0.0015, "step": 276900 }, { "epoch": 32.67461066540821, "grad_norm": 0.028309300541877747, "learning_rate": 2.692721701088247e-05, "loss": 0.0017, "step": 276950 }, { "epoch": 32.680509674374704, "grad_norm": 0.00044939195504412055, "learning_rate": 2.6910738556176884e-05, "loss": 0.0019, "step": 277000 }, { "epoch": 32.680509674374704, "eval_cer": 0.08347676419965576, "eval_loss": 0.00013147779100108892, "eval_runtime": 2.0506, "eval_samples_per_second": 48.765, "eval_steps_per_second": 1.951, "eval_wer": 0.26, "step": 277000 }, { "epoch": 32.6864086833412, "grad_norm": 0.019490644335746765, "learning_rate": 2.6894263288454935e-05, "loss": 0.0016, "step": 277050 }, { "epoch": 32.69230769230769, "grad_norm": 0.33114364743232727, "learning_rate": 2.6877791209990676e-05, "loss": 0.0015, "step": 277100 }, { "epoch": 32.69820670127419, "grad_norm": 0.120289646089077, "learning_rate": 2.6861322323057736e-05, "loss": 0.0021, "step": 277150 }, { "epoch": 32.70410571024068, "grad_norm": 0.030060648918151855, "learning_rate": 2.6844856629929293e-05, "loss": 0.0019, "step": 277200 }, { "epoch": 32.71000471920717, "grad_norm": 0.09832202643156052, "learning_rate": 2.6828394132878086e-05, "loss": 0.0012, "step": 277250 }, { "epoch": 32.715903728173664, "grad_norm": 0.004730955697596073, "learning_rate": 2.6811934834176423e-05, "loss": 0.0019, "step": 277300 }, { "epoch": 32.72180273714016, "grad_norm": 0.019018493592739105, "learning_rate": 2.6795478736096152e-05, "loss": 0.001, "step": 277350 }, { "epoch": 32.72770174610665, "grad_norm": 0.001470306538976729, "learning_rate": 2.677902584090869e-05, "loss": 0.001, "step": 277400 }, { "epoch": 32.73360075507315, "grad_norm": 0.00323712476529181, "learning_rate": 2.6762576150885e-05, "loss": 0.002, "step": 277450 }, { "epoch": 32.73949976403964, "grad_norm": 0.04055218771100044, "learning_rate": 2.674612966829564e-05, "loss": 0.0017, "step": 277500 }, { "epoch": 32.74539877300614, "grad_norm": 0.09012382477521896, "learning_rate": 2.672968639541068e-05, "loss": 0.0013, "step": 277550 }, { "epoch": 32.75129778197263, "grad_norm": 0.07491510361433029, "learning_rate": 2.6713246334499775e-05, "loss": 0.0021, "step": 277600 }, { "epoch": 32.75719679093912, "grad_norm": 0.007553812116384506, "learning_rate": 2.6696809487832098e-05, "loss": 0.0017, "step": 277650 }, { "epoch": 32.763095799905614, "grad_norm": 0.07104148715734482, "learning_rate": 2.668037585767646e-05, "loss": 0.0017, "step": 277700 }, { "epoch": 32.76899480887211, "grad_norm": 0.004984515253454447, "learning_rate": 2.6663945446301142e-05, "loss": 0.0013, "step": 277750 }, { "epoch": 32.7748938178386, "grad_norm": 0.007934615015983582, "learning_rate": 2.6647518255974025e-05, "loss": 0.0017, "step": 277800 }, { "epoch": 32.7807928268051, "grad_norm": 0.05339404568076134, "learning_rate": 2.6631094288962525e-05, "loss": 0.0022, "step": 277850 }, { "epoch": 32.78669183577159, "grad_norm": 0.014161227270960808, "learning_rate": 2.661467354753363e-05, "loss": 0.0017, "step": 277900 }, { "epoch": 32.792590844738086, "grad_norm": 0.038650646805763245, "learning_rate": 2.6598256033953873e-05, "loss": 0.0012, "step": 277950 }, { "epoch": 32.79848985370458, "grad_norm": 0.4024330675601959, "learning_rate": 2.6581841750489344e-05, "loss": 0.0015, "step": 278000 }, { "epoch": 32.79848985370458, "eval_cer": 0.08347676419965576, "eval_loss": 4.2935440433211625e-05, "eval_runtime": 2.1161, "eval_samples_per_second": 47.258, "eval_steps_per_second": 1.89, "eval_wer": 0.26, "step": 278000 }, { "epoch": 32.80438886267107, "grad_norm": 0.03591742366552353, "learning_rate": 2.6565430699405693e-05, "loss": 0.0013, "step": 278050 }, { "epoch": 32.81028787163756, "grad_norm": 0.0027103901375085115, "learning_rate": 2.654902288296811e-05, "loss": 0.0011, "step": 278100 }, { "epoch": 32.81618688060406, "grad_norm": 0.8980695009231567, "learning_rate": 2.6532618303441347e-05, "loss": 0.0017, "step": 278150 }, { "epoch": 32.82208588957055, "grad_norm": 0.17382927238941193, "learning_rate": 2.6516216963089698e-05, "loss": 0.001, "step": 278200 }, { "epoch": 32.827984898537046, "grad_norm": 0.004200945142656565, "learning_rate": 2.649981886417705e-05, "loss": 0.0014, "step": 278250 }, { "epoch": 32.83388390750354, "grad_norm": 0.27279165387153625, "learning_rate": 2.6483424008966796e-05, "loss": 0.0021, "step": 278300 }, { "epoch": 32.839782916470035, "grad_norm": 0.12100038677453995, "learning_rate": 2.6467032399721902e-05, "loss": 0.0015, "step": 278350 }, { "epoch": 32.84568192543653, "grad_norm": 0.0071262335404753685, "learning_rate": 2.645064403870488e-05, "loss": 0.0017, "step": 278400 }, { "epoch": 32.85158093440302, "grad_norm": 0.14051172137260437, "learning_rate": 2.6434258928177796e-05, "loss": 0.0013, "step": 278450 }, { "epoch": 32.85747994336951, "grad_norm": 0.15071195363998413, "learning_rate": 2.6417877070402275e-05, "loss": 0.002, "step": 278500 }, { "epoch": 32.86337895233601, "grad_norm": 0.011797207407653332, "learning_rate": 2.6401498467639473e-05, "loss": 0.0021, "step": 278550 }, { "epoch": 32.8692779613025, "grad_norm": 0.16147731244564056, "learning_rate": 2.6385123122150112e-05, "loss": 0.0016, "step": 278600 }, { "epoch": 32.875176970268996, "grad_norm": 0.05144811421632767, "learning_rate": 2.6368751036194473e-05, "loss": 0.0014, "step": 278650 }, { "epoch": 32.88107597923549, "grad_norm": 0.008475225418806076, "learning_rate": 2.6352382212032363e-05, "loss": 0.0017, "step": 278700 }, { "epoch": 32.886974988201985, "grad_norm": 0.10198735445737839, "learning_rate": 2.6336016651923157e-05, "loss": 0.0018, "step": 278750 }, { "epoch": 32.89287399716848, "grad_norm": 0.032468341290950775, "learning_rate": 2.6319654358125756e-05, "loss": 0.0009, "step": 278800 }, { "epoch": 32.89877300613497, "grad_norm": 0.01984705589711666, "learning_rate": 2.6303295332898665e-05, "loss": 0.002, "step": 278850 }, { "epoch": 32.90467201510146, "grad_norm": 0.19068853557109833, "learning_rate": 2.628693957849988e-05, "loss": 0.0015, "step": 278900 }, { "epoch": 32.910571024067956, "grad_norm": 0.024381542578339577, "learning_rate": 2.627058709718697e-05, "loss": 0.0015, "step": 278950 }, { "epoch": 32.91647003303445, "grad_norm": 0.06963083893060684, "learning_rate": 2.6254237891217047e-05, "loss": 0.0013, "step": 279000 }, { "epoch": 32.91647003303445, "eval_cer": 0.08347676419965576, "eval_loss": 2.6245337721775286e-05, "eval_runtime": 2.0677, "eval_samples_per_second": 48.364, "eval_steps_per_second": 1.935, "eval_wer": 0.26, "step": 279000 }, { "epoch": 32.922369042000945, "grad_norm": 0.37339693307876587, "learning_rate": 2.6237891962846773e-05, "loss": 0.0009, "step": 279050 }, { "epoch": 32.92826805096744, "grad_norm": 0.01159196812659502, "learning_rate": 2.6221549314332348e-05, "loss": 0.0014, "step": 279100 }, { "epoch": 32.934167059933934, "grad_norm": 0.00214790808968246, "learning_rate": 2.6205209947929553e-05, "loss": 0.0015, "step": 279150 }, { "epoch": 32.94006606890042, "grad_norm": 0.08031374961137772, "learning_rate": 2.618887386589367e-05, "loss": 0.0014, "step": 279200 }, { "epoch": 32.945965077866916, "grad_norm": 0.09930455684661865, "learning_rate": 2.617254107047955e-05, "loss": 0.0013, "step": 279250 }, { "epoch": 32.95186408683341, "grad_norm": 0.09472992271184921, "learning_rate": 2.615621156394158e-05, "loss": 0.0014, "step": 279300 }, { "epoch": 32.957763095799905, "grad_norm": 0.009160896763205528, "learning_rate": 2.613988534853374e-05, "loss": 0.0012, "step": 279350 }, { "epoch": 32.9636621047664, "grad_norm": 0.0036547170020639896, "learning_rate": 2.612356242650949e-05, "loss": 0.0008, "step": 279400 }, { "epoch": 32.969561113732894, "grad_norm": 0.029037795960903168, "learning_rate": 2.610724280012188e-05, "loss": 0.0013, "step": 279450 }, { "epoch": 32.97546012269939, "grad_norm": 0.19779764115810394, "learning_rate": 2.6090926471623468e-05, "loss": 0.0012, "step": 279500 }, { "epoch": 32.98135913166588, "grad_norm": 0.02471081167459488, "learning_rate": 2.607461344326637e-05, "loss": 0.0014, "step": 279550 }, { "epoch": 32.98725814063237, "grad_norm": 0.0528981015086174, "learning_rate": 2.605830371730229e-05, "loss": 0.0016, "step": 279600 }, { "epoch": 32.993157149598865, "grad_norm": 0.2190190553665161, "learning_rate": 2.604199729598243e-05, "loss": 0.0015, "step": 279650 }, { "epoch": 32.99905615856536, "grad_norm": 0.004304434638470411, "learning_rate": 2.6025694181557535e-05, "loss": 0.0014, "step": 279700 }, { "epoch": 33.004955167531854, "grad_norm": 0.01039459090679884, "learning_rate": 2.6009394376277906e-05, "loss": 0.0016, "step": 279750 }, { "epoch": 33.01085417649835, "grad_norm": 0.02957822009921074, "learning_rate": 2.599309788239339e-05, "loss": 0.0015, "step": 279800 }, { "epoch": 33.01675318546484, "grad_norm": 0.026518387719988823, "learning_rate": 2.597680470215338e-05, "loss": 0.0009, "step": 279850 }, { "epoch": 33.02265219443134, "grad_norm": 0.23673956096172333, "learning_rate": 2.596051483780678e-05, "loss": 0.0012, "step": 279900 }, { "epoch": 33.02855120339783, "grad_norm": 0.002809405792504549, "learning_rate": 2.594422829160209e-05, "loss": 0.0009, "step": 279950 }, { "epoch": 33.03445021236432, "grad_norm": 0.03780701383948326, "learning_rate": 2.5927945065787307e-05, "loss": 0.0009, "step": 280000 }, { "epoch": 33.03445021236432, "eval_cer": 0.08347676419965576, "eval_loss": 7.667380850762129e-05, "eval_runtime": 2.0728, "eval_samples_per_second": 48.243, "eval_steps_per_second": 1.93, "eval_wer": 0.26, "step": 280000 }, { "epoch": 33.040349221330814, "grad_norm": 0.029381414875388145, "learning_rate": 2.5911665162609982e-05, "loss": 0.0017, "step": 280050 }, { "epoch": 33.04624823029731, "grad_norm": 0.01013407576829195, "learning_rate": 2.5895388584317225e-05, "loss": 0.0013, "step": 280100 }, { "epoch": 33.0521472392638, "grad_norm": 0.017368357628583908, "learning_rate": 2.587911533315564e-05, "loss": 0.0017, "step": 280150 }, { "epoch": 33.0580462482303, "grad_norm": 0.1526186615228653, "learning_rate": 2.586284541137145e-05, "loss": 0.0009, "step": 280200 }, { "epoch": 33.06394525719679, "grad_norm": 0.0313553549349308, "learning_rate": 2.5846578821210356e-05, "loss": 0.0012, "step": 280250 }, { "epoch": 33.06984426616329, "grad_norm": 0.0031801918521523476, "learning_rate": 2.5830315564917606e-05, "loss": 0.0009, "step": 280300 }, { "epoch": 33.07574327512978, "grad_norm": 0.14669278264045715, "learning_rate": 2.581405564473801e-05, "loss": 0.0011, "step": 280350 }, { "epoch": 33.08164228409627, "grad_norm": 0.0033575892448425293, "learning_rate": 2.5797799062915906e-05, "loss": 0.0016, "step": 280400 }, { "epoch": 33.08754129306276, "grad_norm": 0.11476495116949081, "learning_rate": 2.5781545821695163e-05, "loss": 0.002, "step": 280450 }, { "epoch": 33.09344030202926, "grad_norm": 0.18347495794296265, "learning_rate": 2.57652959233192e-05, "loss": 0.0014, "step": 280500 }, { "epoch": 33.09933931099575, "grad_norm": 0.26775607466697693, "learning_rate": 2.574904937003097e-05, "loss": 0.0015, "step": 280550 }, { "epoch": 33.10523831996225, "grad_norm": 0.02004440501332283, "learning_rate": 2.5732806164072964e-05, "loss": 0.0017, "step": 280600 }, { "epoch": 33.11113732892874, "grad_norm": 0.0040077995508909225, "learning_rate": 2.571656630768722e-05, "loss": 0.0006, "step": 280650 }, { "epoch": 33.117036337895236, "grad_norm": 0.007342759519815445, "learning_rate": 2.5700329803115303e-05, "loss": 0.0014, "step": 280700 }, { "epoch": 33.12293534686173, "grad_norm": 0.01107789110392332, "learning_rate": 2.5684096652598295e-05, "loss": 0.0014, "step": 280750 }, { "epoch": 33.12883435582822, "grad_norm": 0.21593940258026123, "learning_rate": 2.5667866858376876e-05, "loss": 0.0019, "step": 280800 }, { "epoch": 33.13473336479471, "grad_norm": 0.1005520224571228, "learning_rate": 2.565164042269122e-05, "loss": 0.0015, "step": 280850 }, { "epoch": 33.14063237376121, "grad_norm": 0.08934659510850906, "learning_rate": 2.5635417347781034e-05, "loss": 0.0014, "step": 280900 }, { "epoch": 33.1465313827277, "grad_norm": 0.1211412101984024, "learning_rate": 2.561919763588554e-05, "loss": 0.0009, "step": 280950 }, { "epoch": 33.152430391694196, "grad_norm": 0.009947100654244423, "learning_rate": 2.5602981289243578e-05, "loss": 0.0014, "step": 281000 }, { "epoch": 33.152430391694196, "eval_cer": 0.08347676419965576, "eval_loss": 5.862659600097686e-05, "eval_runtime": 2.0343, "eval_samples_per_second": 49.156, "eval_steps_per_second": 1.966, "eval_wer": 0.26, "step": 281000 }, { "epoch": 33.15832940066069, "grad_norm": 0.16072827577590942, "learning_rate": 2.5586768310093456e-05, "loss": 0.0015, "step": 281050 }, { "epoch": 33.164228409627185, "grad_norm": 0.36852219700813293, "learning_rate": 2.557055870067302e-05, "loss": 0.0011, "step": 281100 }, { "epoch": 33.17012741859368, "grad_norm": 0.014458327554166317, "learning_rate": 2.5554352463219665e-05, "loss": 0.0009, "step": 281150 }, { "epoch": 33.17602642756017, "grad_norm": 0.03982381895184517, "learning_rate": 2.5538149599970322e-05, "loss": 0.0017, "step": 281200 }, { "epoch": 33.18192543652666, "grad_norm": 0.0009614073787815869, "learning_rate": 2.5521950113161458e-05, "loss": 0.0012, "step": 281250 }, { "epoch": 33.187824445493156, "grad_norm": 0.026310639455914497, "learning_rate": 2.5505754005029065e-05, "loss": 0.0012, "step": 281300 }, { "epoch": 33.19372345445965, "grad_norm": 0.0021560925524681807, "learning_rate": 2.548956127780867e-05, "loss": 0.0011, "step": 281350 }, { "epoch": 33.199622463426145, "grad_norm": 0.0006888678180985153, "learning_rate": 2.5473371933735334e-05, "loss": 0.001, "step": 281400 }, { "epoch": 33.20552147239264, "grad_norm": 0.005311674904078245, "learning_rate": 2.5457185975043664e-05, "loss": 0.0011, "step": 281450 }, { "epoch": 33.211420481359134, "grad_norm": 0.004812116734683514, "learning_rate": 2.544100340396776e-05, "loss": 0.0015, "step": 281500 }, { "epoch": 33.21731949032563, "grad_norm": 0.04501747712492943, "learning_rate": 2.5424824222741327e-05, "loss": 0.0013, "step": 281550 }, { "epoch": 33.223218499292116, "grad_norm": 0.013930259272456169, "learning_rate": 2.540864843359754e-05, "loss": 0.0008, "step": 281600 }, { "epoch": 33.22911750825861, "grad_norm": 0.0005389973521232605, "learning_rate": 2.539247603876912e-05, "loss": 0.0011, "step": 281650 }, { "epoch": 33.235016517225105, "grad_norm": 0.021520676091313362, "learning_rate": 2.5376307040488323e-05, "loss": 0.0018, "step": 281700 }, { "epoch": 33.2409155261916, "grad_norm": 0.3719482123851776, "learning_rate": 2.5360141440986935e-05, "loss": 0.0019, "step": 281750 }, { "epoch": 33.246814535158094, "grad_norm": 0.0020758495666086674, "learning_rate": 2.5343979242496286e-05, "loss": 0.0021, "step": 281800 }, { "epoch": 33.25271354412459, "grad_norm": 0.2170676738023758, "learning_rate": 2.532782044724721e-05, "loss": 0.0012, "step": 281850 }, { "epoch": 33.25861255309108, "grad_norm": 0.2405741959810257, "learning_rate": 2.5311665057470096e-05, "loss": 0.0011, "step": 281900 }, { "epoch": 33.26451156205757, "grad_norm": 0.0041403863579034805, "learning_rate": 2.5295513075394854e-05, "loss": 0.0014, "step": 281950 }, { "epoch": 33.270410571024065, "grad_norm": 0.043137457221746445, "learning_rate": 2.5279364503250923e-05, "loss": 0.0012, "step": 282000 }, { "epoch": 33.270410571024065, "eval_cer": 0.08347676419965576, "eval_loss": 0.00011873056791955605, "eval_runtime": 2.0565, "eval_samples_per_second": 48.627, "eval_steps_per_second": 1.945, "eval_wer": 0.26, "step": 282000 }, { "epoch": 33.27630957999056, "grad_norm": 0.20281173288822174, "learning_rate": 2.5263219343267265e-05, "loss": 0.0022, "step": 282050 }, { "epoch": 33.282208588957054, "grad_norm": 0.04380939528346062, "learning_rate": 2.5247077597672365e-05, "loss": 0.0014, "step": 282100 }, { "epoch": 33.28810759792355, "grad_norm": 0.011409364640712738, "learning_rate": 2.5230939268694286e-05, "loss": 0.0015, "step": 282150 }, { "epoch": 33.29400660689004, "grad_norm": 0.050745654851198196, "learning_rate": 2.521480435856056e-05, "loss": 0.0015, "step": 282200 }, { "epoch": 33.29990561585654, "grad_norm": 0.0029542038682848215, "learning_rate": 2.5198672869498273e-05, "loss": 0.0012, "step": 282250 }, { "epoch": 33.30580462482303, "grad_norm": 0.3017680048942566, "learning_rate": 2.518254480373403e-05, "loss": 0.0014, "step": 282300 }, { "epoch": 33.31170363378952, "grad_norm": 0.0031397459097206593, "learning_rate": 2.5166420163493986e-05, "loss": 0.0017, "step": 282350 }, { "epoch": 33.317602642756015, "grad_norm": 0.07691920548677444, "learning_rate": 2.515029895100378e-05, "loss": 0.0009, "step": 282400 }, { "epoch": 33.32350165172251, "grad_norm": 0.11563289910554886, "learning_rate": 2.5134181168488625e-05, "loss": 0.0013, "step": 282450 }, { "epoch": 33.329400660689004, "grad_norm": 0.008119579404592514, "learning_rate": 2.5118066818173237e-05, "loss": 0.0019, "step": 282500 }, { "epoch": 33.3352996696555, "grad_norm": 0.1214815005660057, "learning_rate": 2.510195590228185e-05, "loss": 0.0018, "step": 282550 }, { "epoch": 33.34119867862199, "grad_norm": 0.08273782581090927, "learning_rate": 2.5085848423038223e-05, "loss": 0.0012, "step": 282600 }, { "epoch": 33.34709768758849, "grad_norm": 0.0019900731276720762, "learning_rate": 2.506974438266569e-05, "loss": 0.001, "step": 282650 }, { "epoch": 33.35299669655498, "grad_norm": 0.007127570919692516, "learning_rate": 2.505364378338705e-05, "loss": 0.0022, "step": 282700 }, { "epoch": 33.35889570552147, "grad_norm": 0.05060092732310295, "learning_rate": 2.5037546627424658e-05, "loss": 0.0011, "step": 282750 }, { "epoch": 33.364794714487964, "grad_norm": 0.3024836480617523, "learning_rate": 2.5021452917000375e-05, "loss": 0.0019, "step": 282800 }, { "epoch": 33.37069372345446, "grad_norm": 0.03281022608280182, "learning_rate": 2.5005362654335597e-05, "loss": 0.0013, "step": 282850 }, { "epoch": 33.37659273242095, "grad_norm": 0.062168803066015244, "learning_rate": 2.4989275841651255e-05, "loss": 0.0012, "step": 282900 }, { "epoch": 33.38249174138745, "grad_norm": 0.027901260182261467, "learning_rate": 2.497319248116779e-05, "loss": 0.0014, "step": 282950 }, { "epoch": 33.38839075035394, "grad_norm": 0.2690637707710266, "learning_rate": 2.4957112575105167e-05, "loss": 0.0022, "step": 283000 }, { "epoch": 33.38839075035394, "eval_cer": 0.08347676419965576, "eval_loss": 8.038189844228327e-05, "eval_runtime": 2.0662, "eval_samples_per_second": 48.398, "eval_steps_per_second": 1.936, "eval_wer": 0.26, "step": 283000 }, { "epoch": 33.394289759320436, "grad_norm": 0.005559055134654045, "learning_rate": 2.4941036125682883e-05, "loss": 0.002, "step": 283050 }, { "epoch": 33.40018876828693, "grad_norm": 0.012636817991733551, "learning_rate": 2.4924963135119934e-05, "loss": 0.0013, "step": 283100 }, { "epoch": 33.40608777725342, "grad_norm": 0.010338692925870419, "learning_rate": 2.4908893605634875e-05, "loss": 0.0012, "step": 283150 }, { "epoch": 33.41198678621991, "grad_norm": 0.3387446105480194, "learning_rate": 2.489282753944575e-05, "loss": 0.0014, "step": 283200 }, { "epoch": 33.41788579518641, "grad_norm": 0.17184658348560333, "learning_rate": 2.4876764938770153e-05, "loss": 0.001, "step": 283250 }, { "epoch": 33.4237848041529, "grad_norm": 0.0062254322692751884, "learning_rate": 2.4860705805825175e-05, "loss": 0.0016, "step": 283300 }, { "epoch": 33.4296838131194, "grad_norm": 0.06749612092971802, "learning_rate": 2.484465014282744e-05, "loss": 0.0013, "step": 283350 }, { "epoch": 33.43558282208589, "grad_norm": 0.17695456743240356, "learning_rate": 2.4828597951993092e-05, "loss": 0.0012, "step": 283400 }, { "epoch": 33.441481831052386, "grad_norm": 0.01048823818564415, "learning_rate": 2.4812549235537786e-05, "loss": 0.0023, "step": 283450 }, { "epoch": 33.44738084001888, "grad_norm": 0.13865350186824799, "learning_rate": 2.479650399567674e-05, "loss": 0.0012, "step": 283500 }, { "epoch": 33.45327984898537, "grad_norm": 0.060665395110845566, "learning_rate": 2.4780462234624634e-05, "loss": 0.0019, "step": 283550 }, { "epoch": 33.45917885795186, "grad_norm": 0.005558319855481386, "learning_rate": 2.4764423954595705e-05, "loss": 0.0011, "step": 283600 }, { "epoch": 33.46507786691836, "grad_norm": 0.01023904699832201, "learning_rate": 2.4748389157803693e-05, "loss": 0.001, "step": 283650 }, { "epoch": 33.47097687588485, "grad_norm": 0.08061282336711884, "learning_rate": 2.4732357846461857e-05, "loss": 0.001, "step": 283700 }, { "epoch": 33.476875884851346, "grad_norm": 0.006515785586088896, "learning_rate": 2.4716330022782985e-05, "loss": 0.0009, "step": 283750 }, { "epoch": 33.48277489381784, "grad_norm": 0.02914832904934883, "learning_rate": 2.470030568897938e-05, "loss": 0.0012, "step": 283800 }, { "epoch": 33.488673902784335, "grad_norm": 0.001004009391181171, "learning_rate": 2.4684284847262855e-05, "loss": 0.0013, "step": 283850 }, { "epoch": 33.49457291175083, "grad_norm": 0.008687610737979412, "learning_rate": 2.466826749984476e-05, "loss": 0.0013, "step": 283900 }, { "epoch": 33.50047192071732, "grad_norm": 0.06497080624103546, "learning_rate": 2.465225364893594e-05, "loss": 0.0019, "step": 283950 }, { "epoch": 33.50637092968381, "grad_norm": 0.11143437772989273, "learning_rate": 2.4636243296746776e-05, "loss": 0.0015, "step": 284000 }, { "epoch": 33.50637092968381, "eval_cer": 0.08347676419965576, "eval_loss": 4.829216413781978e-05, "eval_runtime": 2.03, "eval_samples_per_second": 49.262, "eval_steps_per_second": 1.97, "eval_wer": 0.26, "step": 284000 }, { "epoch": 33.512269938650306, "grad_norm": 0.007646745536476374, "learning_rate": 2.4620236445487133e-05, "loss": 0.0012, "step": 284050 }, { "epoch": 33.5181689476168, "grad_norm": 0.04138139262795448, "learning_rate": 2.460423309736646e-05, "loss": 0.0012, "step": 284100 }, { "epoch": 33.524067956583295, "grad_norm": 0.032054152339696884, "learning_rate": 2.458823325459365e-05, "loss": 0.0012, "step": 284150 }, { "epoch": 33.52996696554979, "grad_norm": 0.09770364314317703, "learning_rate": 2.457223691937716e-05, "loss": 0.0017, "step": 284200 }, { "epoch": 33.535865974516284, "grad_norm": 0.1287263035774231, "learning_rate": 2.455624409392492e-05, "loss": 0.0019, "step": 284250 }, { "epoch": 33.54176498348278, "grad_norm": 0.014659453183412552, "learning_rate": 2.4540254780444428e-05, "loss": 0.0012, "step": 284300 }, { "epoch": 33.547663992449266, "grad_norm": 0.021191803738474846, "learning_rate": 2.4524268981142668e-05, "loss": 0.0011, "step": 284350 }, { "epoch": 33.55356300141576, "grad_norm": 0.15967975556850433, "learning_rate": 2.450828669822613e-05, "loss": 0.0011, "step": 284400 }, { "epoch": 33.559462010382255, "grad_norm": 0.3221556842327118, "learning_rate": 2.4492307933900833e-05, "loss": 0.0015, "step": 284450 }, { "epoch": 33.56536101934875, "grad_norm": 0.002662832150235772, "learning_rate": 2.4476332690372305e-05, "loss": 0.0014, "step": 284500 }, { "epoch": 33.571260028315244, "grad_norm": 0.0018694507889449596, "learning_rate": 2.446036096984559e-05, "loss": 0.0011, "step": 284550 }, { "epoch": 33.57715903728174, "grad_norm": 0.005383910611271858, "learning_rate": 2.4444392774525254e-05, "loss": 0.0009, "step": 284600 }, { "epoch": 33.58305804624823, "grad_norm": 0.006353110074996948, "learning_rate": 2.4428428106615364e-05, "loss": 0.0014, "step": 284650 }, { "epoch": 33.58895705521472, "grad_norm": 0.010495931841433048, "learning_rate": 2.4412466968319498e-05, "loss": 0.0008, "step": 284700 }, { "epoch": 33.594856064181215, "grad_norm": 0.06059371307492256, "learning_rate": 2.439650936184076e-05, "loss": 0.0007, "step": 284750 }, { "epoch": 33.60075507314771, "grad_norm": 0.0711750015616417, "learning_rate": 2.4380555289381734e-05, "loss": 0.001, "step": 284800 }, { "epoch": 33.606654082114204, "grad_norm": 0.0442914254963398, "learning_rate": 2.4364604753144588e-05, "loss": 0.0014, "step": 284850 }, { "epoch": 33.6125530910807, "grad_norm": 0.07548398524522781, "learning_rate": 2.434865775533093e-05, "loss": 0.0017, "step": 284900 }, { "epoch": 33.61845210004719, "grad_norm": 0.03688079118728638, "learning_rate": 2.4332714298141913e-05, "loss": 0.0017, "step": 284950 }, { "epoch": 33.62435110901369, "grad_norm": 0.029486654326319695, "learning_rate": 2.4316774383778183e-05, "loss": 0.001, "step": 285000 }, { "epoch": 33.62435110901369, "eval_cer": 0.08347676419965576, "eval_loss": 4.2064024455612525e-05, "eval_runtime": 2.0684, "eval_samples_per_second": 48.347, "eval_steps_per_second": 1.934, "eval_wer": 0.26, "step": 285000 }, { "epoch": 33.63025011798018, "grad_norm": 0.00594136631116271, "learning_rate": 2.430083801443992e-05, "loss": 0.0014, "step": 285050 }, { "epoch": 33.63614912694667, "grad_norm": 0.0008511045598424971, "learning_rate": 2.428490519232679e-05, "loss": 0.0017, "step": 285100 }, { "epoch": 33.642048135913164, "grad_norm": 0.18933705985546112, "learning_rate": 2.426897591963799e-05, "loss": 0.0012, "step": 285150 }, { "epoch": 33.64794714487966, "grad_norm": 0.14703741669654846, "learning_rate": 2.425305019857222e-05, "loss": 0.0012, "step": 285200 }, { "epoch": 33.65384615384615, "grad_norm": 0.0040461779572069645, "learning_rate": 2.423712803132768e-05, "loss": 0.0017, "step": 285250 }, { "epoch": 33.65974516281265, "grad_norm": 0.018894337117671967, "learning_rate": 2.4221209420102096e-05, "loss": 0.0013, "step": 285300 }, { "epoch": 33.66564417177914, "grad_norm": 0.2308182567358017, "learning_rate": 2.4205294367092696e-05, "loss": 0.0014, "step": 285350 }, { "epoch": 33.67154318074564, "grad_norm": 0.02082563191652298, "learning_rate": 2.4189382874496185e-05, "loss": 0.0014, "step": 285400 }, { "epoch": 33.67744218971213, "grad_norm": 0.022631777450442314, "learning_rate": 2.4173474944508862e-05, "loss": 0.002, "step": 285450 }, { "epoch": 33.68334119867862, "grad_norm": 0.018835844472050667, "learning_rate": 2.4157570579326448e-05, "loss": 0.001, "step": 285500 }, { "epoch": 33.68924020764511, "grad_norm": 0.0034263620618730783, "learning_rate": 2.414166978114421e-05, "loss": 0.0017, "step": 285550 }, { "epoch": 33.69513921661161, "grad_norm": 0.16251012682914734, "learning_rate": 2.4125772552156918e-05, "loss": 0.0013, "step": 285600 }, { "epoch": 33.7010382255781, "grad_norm": 0.0006633661687374115, "learning_rate": 2.4109878894558842e-05, "loss": 0.0016, "step": 285650 }, { "epoch": 33.7069372345446, "grad_norm": 0.014141217805445194, "learning_rate": 2.4093988810543765e-05, "loss": 0.001, "step": 285700 }, { "epoch": 33.71283624351109, "grad_norm": 0.5937416553497314, "learning_rate": 2.4078102302304983e-05, "loss": 0.0018, "step": 285750 }, { "epoch": 33.718735252477586, "grad_norm": 0.0011764946393668652, "learning_rate": 2.4062219372035295e-05, "loss": 0.0013, "step": 285800 }, { "epoch": 33.72463426144408, "grad_norm": 0.20636063814163208, "learning_rate": 2.404634002192699e-05, "loss": 0.0018, "step": 285850 }, { "epoch": 33.73053327041057, "grad_norm": 0.0037690033204853535, "learning_rate": 2.4030464254171865e-05, "loss": 0.0008, "step": 285900 }, { "epoch": 33.73643227937706, "grad_norm": 0.1079976037144661, "learning_rate": 2.4014592070961266e-05, "loss": 0.0017, "step": 285950 }, { "epoch": 33.74233128834356, "grad_norm": 0.0350305400788784, "learning_rate": 2.3998723474486008e-05, "loss": 0.0012, "step": 286000 }, { "epoch": 33.74233128834356, "eval_cer": 0.08347676419965576, "eval_loss": 1.4829689462203532e-05, "eval_runtime": 2.0834, "eval_samples_per_second": 47.999, "eval_steps_per_second": 1.92, "eval_wer": 0.26, "step": 286000 }, { "epoch": 33.74823029731005, "grad_norm": 0.02701607346534729, "learning_rate": 2.398285846693639e-05, "loss": 0.0016, "step": 286050 }, { "epoch": 33.754129306276546, "grad_norm": 0.12359538674354553, "learning_rate": 2.3966997050502267e-05, "loss": 0.0011, "step": 286100 }, { "epoch": 33.76002831524304, "grad_norm": 0.08258085697889328, "learning_rate": 2.395113922737294e-05, "loss": 0.0009, "step": 286150 }, { "epoch": 33.765927324209535, "grad_norm": 0.018598372116684914, "learning_rate": 2.3935284999737274e-05, "loss": 0.0016, "step": 286200 }, { "epoch": 33.77182633317603, "grad_norm": 0.09396577626466751, "learning_rate": 2.3919434369783607e-05, "loss": 0.0016, "step": 286250 }, { "epoch": 33.77772534214252, "grad_norm": 0.001671002828516066, "learning_rate": 2.3903587339699772e-05, "loss": 0.0013, "step": 286300 }, { "epoch": 33.78362435110901, "grad_norm": 0.006599112413823605, "learning_rate": 2.3887743911673127e-05, "loss": 0.0019, "step": 286350 }, { "epoch": 33.789523360075506, "grad_norm": 0.07908410578966141, "learning_rate": 2.3871904087890505e-05, "loss": 0.0015, "step": 286400 }, { "epoch": 33.795422369042, "grad_norm": 0.03571554645895958, "learning_rate": 2.3856067870538277e-05, "loss": 0.0009, "step": 286450 }, { "epoch": 33.801321378008495, "grad_norm": 0.0031339856795966625, "learning_rate": 2.3840235261802287e-05, "loss": 0.0019, "step": 286500 }, { "epoch": 33.80722038697499, "grad_norm": 0.09581556171178818, "learning_rate": 2.3824406263867888e-05, "loss": 0.0016, "step": 286550 }, { "epoch": 33.813119395941484, "grad_norm": 0.022672178223729134, "learning_rate": 2.3808580878919946e-05, "loss": 0.0013, "step": 286600 }, { "epoch": 33.81901840490798, "grad_norm": 0.05547204986214638, "learning_rate": 2.379275910914282e-05, "loss": 0.0011, "step": 286650 }, { "epoch": 33.824917413874466, "grad_norm": 0.03235333040356636, "learning_rate": 2.3776940956720362e-05, "loss": 0.0019, "step": 286700 }, { "epoch": 33.83081642284096, "grad_norm": 0.0466625951230526, "learning_rate": 2.3761126423835923e-05, "loss": 0.0009, "step": 286750 }, { "epoch": 33.836715431807455, "grad_norm": 0.2928151786327362, "learning_rate": 2.37453155126724e-05, "loss": 0.0024, "step": 286800 }, { "epoch": 33.84261444077395, "grad_norm": 0.019980819895863533, "learning_rate": 2.372950822541213e-05, "loss": 0.001, "step": 286850 }, { "epoch": 33.848513449740445, "grad_norm": 0.01696648634970188, "learning_rate": 2.3713704564236982e-05, "loss": 0.0015, "step": 286900 }, { "epoch": 33.85441245870694, "grad_norm": 0.017883507534861565, "learning_rate": 2.3697904531328314e-05, "loss": 0.0011, "step": 286950 }, { "epoch": 33.860311467673434, "grad_norm": 0.011190563440322876, "learning_rate": 2.368210812886698e-05, "loss": 0.001, "step": 287000 }, { "epoch": 33.860311467673434, "eval_cer": 0.08347676419965576, "eval_loss": 5.430531018646434e-05, "eval_runtime": 2.0838, "eval_samples_per_second": 47.99, "eval_steps_per_second": 1.92, "eval_wer": 0.26, "step": 287000 }, { "epoch": 33.86621047663992, "grad_norm": 0.08078229427337646, "learning_rate": 2.3666315359033348e-05, "loss": 0.0012, "step": 287050 }, { "epoch": 33.872109485606416, "grad_norm": 0.0072535923682153225, "learning_rate": 2.3650526224007273e-05, "loss": 0.001, "step": 287100 }, { "epoch": 33.87800849457291, "grad_norm": 0.06832773983478546, "learning_rate": 2.363474072596811e-05, "loss": 0.0016, "step": 287150 }, { "epoch": 33.883907503539405, "grad_norm": 0.10892587900161743, "learning_rate": 2.361895886709471e-05, "loss": 0.0007, "step": 287200 }, { "epoch": 33.8898065125059, "grad_norm": 0.026974910870194435, "learning_rate": 2.3603180649565427e-05, "loss": 0.0015, "step": 287250 }, { "epoch": 33.895705521472394, "grad_norm": 0.016230693086981773, "learning_rate": 2.358740607555811e-05, "loss": 0.0011, "step": 287300 }, { "epoch": 33.90160453043889, "grad_norm": 0.01001066341996193, "learning_rate": 2.3571635147250083e-05, "loss": 0.0012, "step": 287350 }, { "epoch": 33.90750353940538, "grad_norm": 0.00373836955986917, "learning_rate": 2.3555867866818233e-05, "loss": 0.0017, "step": 287400 }, { "epoch": 33.91340254837187, "grad_norm": 0.29213935136795044, "learning_rate": 2.3540104236438876e-05, "loss": 0.0018, "step": 287450 }, { "epoch": 33.919301557338365, "grad_norm": 0.07618957757949829, "learning_rate": 2.3524344258287845e-05, "loss": 0.0014, "step": 287500 }, { "epoch": 33.92520056630486, "grad_norm": 0.16605731844902039, "learning_rate": 2.3508587934540455e-05, "loss": 0.0023, "step": 287550 }, { "epoch": 33.931099575271354, "grad_norm": 0.002928689122200012, "learning_rate": 2.3492835267371576e-05, "loss": 0.0014, "step": 287600 }, { "epoch": 33.93699858423785, "grad_norm": 0.00884551927447319, "learning_rate": 2.34770862589555e-05, "loss": 0.0021, "step": 287650 }, { "epoch": 33.94289759320434, "grad_norm": 0.004510792437940836, "learning_rate": 2.3461340911466057e-05, "loss": 0.0012, "step": 287700 }, { "epoch": 33.94879660217084, "grad_norm": 0.010559073649346828, "learning_rate": 2.344559922707654e-05, "loss": 0.0014, "step": 287750 }, { "epoch": 33.95469561113733, "grad_norm": 0.026244252920150757, "learning_rate": 2.342986120795978e-05, "loss": 0.0016, "step": 287800 }, { "epoch": 33.96059462010382, "grad_norm": 0.007919575087726116, "learning_rate": 2.341412685628806e-05, "loss": 0.0011, "step": 287850 }, { "epoch": 33.966493629070314, "grad_norm": 0.017952125519514084, "learning_rate": 2.3398396174233178e-05, "loss": 0.0014, "step": 287900 }, { "epoch": 33.97239263803681, "grad_norm": 0.1238836944103241, "learning_rate": 2.338266916396642e-05, "loss": 0.0011, "step": 287950 }, { "epoch": 33.9782916470033, "grad_norm": 0.0027807108126580715, "learning_rate": 2.336694582765857e-05, "loss": 0.002, "step": 288000 }, { "epoch": 33.9782916470033, "eval_cer": 0.08347676419965576, "eval_loss": 4.024916415801272e-05, "eval_runtime": 2.0385, "eval_samples_per_second": 49.055, "eval_steps_per_second": 1.962, "eval_wer": 0.26, "step": 288000 }, { "epoch": 33.9841906559698, "grad_norm": 0.2177305370569229, "learning_rate": 2.33512261674799e-05, "loss": 0.0016, "step": 288050 }, { "epoch": 33.99008966493629, "grad_norm": 0.0168016217648983, "learning_rate": 2.3335510185600163e-05, "loss": 0.0013, "step": 288100 }, { "epoch": 33.99598867390279, "grad_norm": 0.35836315155029297, "learning_rate": 2.3319797884188642e-05, "loss": 0.0016, "step": 288150 }, { "epoch": 34.00188768286928, "grad_norm": 0.03977404907345772, "learning_rate": 2.3304089265414087e-05, "loss": 0.0011, "step": 288200 }, { "epoch": 34.00778669183577, "grad_norm": 0.12057054042816162, "learning_rate": 2.3288384331444722e-05, "loss": 0.001, "step": 288250 }, { "epoch": 34.01368570080226, "grad_norm": 0.01815667189657688, "learning_rate": 2.327268308444829e-05, "loss": 0.0009, "step": 288300 }, { "epoch": 34.01958470976876, "grad_norm": 0.09177345037460327, "learning_rate": 2.325698552659201e-05, "loss": 0.0014, "step": 288350 }, { "epoch": 34.02548371873525, "grad_norm": 0.1663520485162735, "learning_rate": 2.3241291660042614e-05, "loss": 0.0011, "step": 288400 }, { "epoch": 34.03138272770175, "grad_norm": 0.0010278886184096336, "learning_rate": 2.3225601486966285e-05, "loss": 0.001, "step": 288450 }, { "epoch": 34.03728173666824, "grad_norm": 0.012377436272799969, "learning_rate": 2.320991500952873e-05, "loss": 0.001, "step": 288500 }, { "epoch": 34.043180745634736, "grad_norm": 0.1314006894826889, "learning_rate": 2.319423222989514e-05, "loss": 0.0011, "step": 288550 }, { "epoch": 34.04907975460123, "grad_norm": 0.17132672667503357, "learning_rate": 2.3178553150230187e-05, "loss": 0.0009, "step": 288600 }, { "epoch": 34.05497876356772, "grad_norm": 0.0018988446099683642, "learning_rate": 2.316287777269803e-05, "loss": 0.0016, "step": 288650 }, { "epoch": 34.06087777253421, "grad_norm": 0.009995155967772007, "learning_rate": 2.314720609946231e-05, "loss": 0.0015, "step": 288700 }, { "epoch": 34.06677678150071, "grad_norm": 0.0012730683665722609, "learning_rate": 2.3131538132686204e-05, "loss": 0.0015, "step": 288750 }, { "epoch": 34.0726757904672, "grad_norm": 0.005307656712830067, "learning_rate": 2.3115873874532322e-05, "loss": 0.0013, "step": 288800 }, { "epoch": 34.078574799433696, "grad_norm": 0.006601813714951277, "learning_rate": 2.3100213327162795e-05, "loss": 0.0013, "step": 288850 }, { "epoch": 34.08447380840019, "grad_norm": 0.00978102721273899, "learning_rate": 2.3084556492739224e-05, "loss": 0.0013, "step": 288900 }, { "epoch": 34.090372817366685, "grad_norm": 0.03305713087320328, "learning_rate": 2.3068903373422695e-05, "loss": 0.0013, "step": 288950 }, { "epoch": 34.09627182633318, "grad_norm": 0.20777527987957, "learning_rate": 2.3053253971373796e-05, "loss": 0.001, "step": 289000 }, { "epoch": 34.09627182633318, "eval_cer": 0.08347676419965576, "eval_loss": 3.682514216052368e-05, "eval_runtime": 2.0508, "eval_samples_per_second": 48.761, "eval_steps_per_second": 1.95, "eval_wer": 0.26, "step": 289000 }, { "epoch": 34.10217083529967, "grad_norm": 0.03606303036212921, "learning_rate": 2.30376082887526e-05, "loss": 0.001, "step": 289050 }, { "epoch": 34.10806984426616, "grad_norm": 0.11656595021486282, "learning_rate": 2.3021966327718658e-05, "loss": 0.0011, "step": 289100 }, { "epoch": 34.113968853232656, "grad_norm": 0.0015541489701718092, "learning_rate": 2.3006328090431013e-05, "loss": 0.0012, "step": 289150 }, { "epoch": 34.11986786219915, "grad_norm": 0.5888087153434753, "learning_rate": 2.2990693579048166e-05, "loss": 0.0014, "step": 289200 }, { "epoch": 34.125766871165645, "grad_norm": 0.052672337740659714, "learning_rate": 2.297506279572818e-05, "loss": 0.001, "step": 289250 }, { "epoch": 34.13166588013214, "grad_norm": 0.04428485780954361, "learning_rate": 2.2959435742628528e-05, "loss": 0.0015, "step": 289300 }, { "epoch": 34.137564889098634, "grad_norm": 0.00526119489222765, "learning_rate": 2.294381242190619e-05, "loss": 0.0015, "step": 289350 }, { "epoch": 34.14346389806512, "grad_norm": 0.0706230029463768, "learning_rate": 2.2928192835717644e-05, "loss": 0.0011, "step": 289400 }, { "epoch": 34.149362907031616, "grad_norm": 0.0030102620366960764, "learning_rate": 2.291257698621882e-05, "loss": 0.0015, "step": 289450 }, { "epoch": 34.15526191599811, "grad_norm": 0.12408813089132309, "learning_rate": 2.289696487556519e-05, "loss": 0.0016, "step": 289500 }, { "epoch": 34.161160924964605, "grad_norm": 0.02069687470793724, "learning_rate": 2.288135650591166e-05, "loss": 0.0012, "step": 289550 }, { "epoch": 34.1670599339311, "grad_norm": 0.0011118620168417692, "learning_rate": 2.2865751879412635e-05, "loss": 0.0017, "step": 289600 }, { "epoch": 34.172958942897594, "grad_norm": 0.01262096781283617, "learning_rate": 2.2850150998222003e-05, "loss": 0.0013, "step": 289650 }, { "epoch": 34.17885795186409, "grad_norm": 0.06857644766569138, "learning_rate": 2.2834553864493137e-05, "loss": 0.0011, "step": 289700 }, { "epoch": 34.18475696083058, "grad_norm": 0.011239897459745407, "learning_rate": 2.2818960480378883e-05, "loss": 0.0011, "step": 289750 }, { "epoch": 34.19065596979707, "grad_norm": 0.06478054076433182, "learning_rate": 2.2803370848031586e-05, "loss": 0.0015, "step": 289800 }, { "epoch": 34.196554978763565, "grad_norm": 0.0030432173516601324, "learning_rate": 2.278778496960306e-05, "loss": 0.0013, "step": 289850 }, { "epoch": 34.20245398773006, "grad_norm": 0.00484401686117053, "learning_rate": 2.277220284724461e-05, "loss": 0.0009, "step": 289900 }, { "epoch": 34.208352996696554, "grad_norm": 0.07170015573501587, "learning_rate": 2.2756624483107014e-05, "loss": 0.0007, "step": 289950 }, { "epoch": 34.21425200566305, "grad_norm": 0.04843827337026596, "learning_rate": 2.274104987934054e-05, "loss": 0.001, "step": 290000 }, { "epoch": 34.21425200566305, "eval_cer": 0.08347676419965576, "eval_loss": 1.9664212231873535e-05, "eval_runtime": 2.0527, "eval_samples_per_second": 48.717, "eval_steps_per_second": 1.949, "eval_wer": 0.26, "step": 290000 }, { "epoch": 34.22015101462954, "grad_norm": 0.025286532938480377, "learning_rate": 2.272547903809491e-05, "loss": 0.0015, "step": 290050 }, { "epoch": 34.22605002359604, "grad_norm": 0.023441001772880554, "learning_rate": 2.270991196151938e-05, "loss": 0.0012, "step": 290100 }, { "epoch": 34.23194903256253, "grad_norm": 0.008075795136392117, "learning_rate": 2.269434865176265e-05, "loss": 0.0016, "step": 290150 }, { "epoch": 34.23784804152902, "grad_norm": 0.17604094743728638, "learning_rate": 2.2678789110972897e-05, "loss": 0.0018, "step": 290200 }, { "epoch": 34.243747050495514, "grad_norm": 0.11242374777793884, "learning_rate": 2.2663233341297784e-05, "loss": 0.0014, "step": 290250 }, { "epoch": 34.24964605946201, "grad_norm": 0.009531076066195965, "learning_rate": 2.2647681344884465e-05, "loss": 0.0017, "step": 290300 }, { "epoch": 34.2555450684285, "grad_norm": 0.0016519613564014435, "learning_rate": 2.2632133123879558e-05, "loss": 0.0012, "step": 290350 }, { "epoch": 34.261444077395, "grad_norm": 0.0034066836815327406, "learning_rate": 2.2616588680429156e-05, "loss": 0.0012, "step": 290400 }, { "epoch": 34.26734308636149, "grad_norm": 0.009652846492826939, "learning_rate": 2.2601048016678862e-05, "loss": 0.0022, "step": 290450 }, { "epoch": 34.27324209532799, "grad_norm": 0.0024912336375564337, "learning_rate": 2.258551113477372e-05, "loss": 0.0018, "step": 290500 }, { "epoch": 34.27914110429448, "grad_norm": 0.1214621290564537, "learning_rate": 2.2569978036858268e-05, "loss": 0.0015, "step": 290550 }, { "epoch": 34.28504011326097, "grad_norm": 0.0018910991493612528, "learning_rate": 2.2554448725076527e-05, "loss": 0.0011, "step": 290600 }, { "epoch": 34.29093912222746, "grad_norm": 0.021646469831466675, "learning_rate": 2.2538923201571966e-05, "loss": 0.0013, "step": 290650 }, { "epoch": 34.29683813119396, "grad_norm": 0.0332694835960865, "learning_rate": 2.25234014684876e-05, "loss": 0.0008, "step": 290700 }, { "epoch": 34.30273714016045, "grad_norm": 0.21470439434051514, "learning_rate": 2.2507883527965846e-05, "loss": 0.0011, "step": 290750 }, { "epoch": 34.30863614912695, "grad_norm": 0.011063688434660435, "learning_rate": 2.249236938214863e-05, "loss": 0.0009, "step": 290800 }, { "epoch": 34.31453515809344, "grad_norm": 0.048572272062301636, "learning_rate": 2.2476859033177345e-05, "loss": 0.0012, "step": 290850 }, { "epoch": 34.320434167059936, "grad_norm": 0.0065259430557489395, "learning_rate": 2.246135248319289e-05, "loss": 0.0024, "step": 290900 }, { "epoch": 34.32633317602643, "grad_norm": 0.004523668438196182, "learning_rate": 2.2445849734335595e-05, "loss": 0.0014, "step": 290950 }, { "epoch": 34.33223218499292, "grad_norm": 0.3461451530456543, "learning_rate": 2.2430350788745298e-05, "loss": 0.0015, "step": 291000 }, { "epoch": 34.33223218499292, "eval_cer": 0.08347676419965576, "eval_loss": 3.8738329749321565e-05, "eval_runtime": 2.0694, "eval_samples_per_second": 48.323, "eval_steps_per_second": 1.933, "eval_wer": 0.26, "step": 291000 }, { "epoch": 34.33813119395941, "grad_norm": 0.001303012017160654, "learning_rate": 2.2414855648561288e-05, "loss": 0.0017, "step": 291050 }, { "epoch": 34.34403020292591, "grad_norm": 0.008928433060646057, "learning_rate": 2.2399364315922357e-05, "loss": 0.0022, "step": 291100 }, { "epoch": 34.3499292118924, "grad_norm": 0.036998726427555084, "learning_rate": 2.238387679296674e-05, "loss": 0.0011, "step": 291150 }, { "epoch": 34.355828220858896, "grad_norm": 0.14751005172729492, "learning_rate": 2.2368393081832166e-05, "loss": 0.0014, "step": 291200 }, { "epoch": 34.36172722982539, "grad_norm": 0.013234134763479233, "learning_rate": 2.235291318465584e-05, "loss": 0.0019, "step": 291250 }, { "epoch": 34.367626238791885, "grad_norm": 0.17597825825214386, "learning_rate": 2.233743710357443e-05, "loss": 0.0011, "step": 291300 }, { "epoch": 34.37352524775838, "grad_norm": 0.010557626374065876, "learning_rate": 2.2321964840724075e-05, "loss": 0.001, "step": 291350 }, { "epoch": 34.37942425672487, "grad_norm": 0.06275910139083862, "learning_rate": 2.2306496398240383e-05, "loss": 0.0012, "step": 291400 }, { "epoch": 34.38532326569136, "grad_norm": 0.024701643735170364, "learning_rate": 2.2291031778258475e-05, "loss": 0.0011, "step": 291450 }, { "epoch": 34.391222274657856, "grad_norm": 0.03306541219353676, "learning_rate": 2.2275570982912907e-05, "loss": 0.0016, "step": 291500 }, { "epoch": 34.39712128362435, "grad_norm": 0.02944233827292919, "learning_rate": 2.2260114014337702e-05, "loss": 0.0013, "step": 291550 }, { "epoch": 34.403020292590845, "grad_norm": 0.06863254308700562, "learning_rate": 2.2244660874666372e-05, "loss": 0.0013, "step": 291600 }, { "epoch": 34.40891930155734, "grad_norm": 0.13627265393733978, "learning_rate": 2.22292115660319e-05, "loss": 0.0014, "step": 291650 }, { "epoch": 34.414818310523835, "grad_norm": 0.24262727797031403, "learning_rate": 2.221376609056673e-05, "loss": 0.0011, "step": 291700 }, { "epoch": 34.42071731949033, "grad_norm": 0.0029083224944770336, "learning_rate": 2.2198324450402786e-05, "loss": 0.0009, "step": 291750 }, { "epoch": 34.42661632845682, "grad_norm": 0.008520333096385002, "learning_rate": 2.2182886647671453e-05, "loss": 0.0009, "step": 291800 }, { "epoch": 34.43251533742331, "grad_norm": 0.007394048385322094, "learning_rate": 2.2167452684503604e-05, "loss": 0.0009, "step": 291850 }, { "epoch": 34.438414346389806, "grad_norm": 0.12181705981492996, "learning_rate": 2.215202256302956e-05, "loss": 0.0011, "step": 291900 }, { "epoch": 34.4443133553563, "grad_norm": 0.0011827951529994607, "learning_rate": 2.2136596285379135e-05, "loss": 0.0013, "step": 291950 }, { "epoch": 34.450212364322795, "grad_norm": 0.006530520506203175, "learning_rate": 2.212117385368157e-05, "loss": 0.0015, "step": 292000 }, { "epoch": 34.450212364322795, "eval_cer": 0.08347676419965576, "eval_loss": 0.00012502203753683716, "eval_runtime": 2.0352, "eval_samples_per_second": 49.136, "eval_steps_per_second": 1.965, "eval_wer": 0.26, "step": 292000 }, { "epoch": 34.45611137328929, "grad_norm": 0.001476325560361147, "learning_rate": 2.210575527006565e-05, "loss": 0.0011, "step": 292050 }, { "epoch": 34.462010382255784, "grad_norm": 0.03818708285689354, "learning_rate": 2.2090340536659554e-05, "loss": 0.0013, "step": 292100 }, { "epoch": 34.46790939122227, "grad_norm": 0.17618410289287567, "learning_rate": 2.207492965559097e-05, "loss": 0.0014, "step": 292150 }, { "epoch": 34.473808400188766, "grad_norm": 0.012230322696268559, "learning_rate": 2.205952262898704e-05, "loss": 0.001, "step": 292200 }, { "epoch": 34.47970740915526, "grad_norm": 0.10866373777389526, "learning_rate": 2.2044119458974384e-05, "loss": 0.0013, "step": 292250 }, { "epoch": 34.485606418121755, "grad_norm": 0.007713909260928631, "learning_rate": 2.2028720147679072e-05, "loss": 0.0014, "step": 292300 }, { "epoch": 34.49150542708825, "grad_norm": 0.29527369141578674, "learning_rate": 2.201332469722666e-05, "loss": 0.001, "step": 292350 }, { "epoch": 34.497404436054744, "grad_norm": 0.014220318756997585, "learning_rate": 2.1997933109742163e-05, "loss": 0.0008, "step": 292400 }, { "epoch": 34.50330344502124, "grad_norm": 0.18183648586273193, "learning_rate": 2.198254538735007e-05, "loss": 0.0014, "step": 292450 }, { "epoch": 34.50920245398773, "grad_norm": 0.0946035385131836, "learning_rate": 2.196716153217432e-05, "loss": 0.0021, "step": 292500 }, { "epoch": 34.51510146295422, "grad_norm": 0.17281462252140045, "learning_rate": 2.195178154633831e-05, "loss": 0.0016, "step": 292550 }, { "epoch": 34.521000471920715, "grad_norm": 0.005073923151940107, "learning_rate": 2.193640543196497e-05, "loss": 0.0011, "step": 292600 }, { "epoch": 34.52689948088721, "grad_norm": 0.07558583468198776, "learning_rate": 2.192103319117661e-05, "loss": 0.0012, "step": 292650 }, { "epoch": 34.532798489853704, "grad_norm": 0.019060231745243073, "learning_rate": 2.1905664826095063e-05, "loss": 0.0006, "step": 292700 }, { "epoch": 34.5386974988202, "grad_norm": 0.0020853523164987564, "learning_rate": 2.1890300338841574e-05, "loss": 0.001, "step": 292750 }, { "epoch": 34.54459650778669, "grad_norm": 0.2941110134124756, "learning_rate": 2.1874939731536925e-05, "loss": 0.0016, "step": 292800 }, { "epoch": 34.55049551675319, "grad_norm": 0.0002874523343052715, "learning_rate": 2.185958300630131e-05, "loss": 0.0012, "step": 292850 }, { "epoch": 34.55639452571968, "grad_norm": 0.18398070335388184, "learning_rate": 2.1844230165254392e-05, "loss": 0.0018, "step": 292900 }, { "epoch": 34.56229353468617, "grad_norm": 0.05119207501411438, "learning_rate": 2.1828881210515306e-05, "loss": 0.0016, "step": 292950 }, { "epoch": 34.568192543652664, "grad_norm": 0.0023581532295793295, "learning_rate": 2.1813536144202655e-05, "loss": 0.0019, "step": 293000 }, { "epoch": 34.568192543652664, "eval_cer": 0.08347676419965576, "eval_loss": 1.610898289072793e-05, "eval_runtime": 2.0423, "eval_samples_per_second": 48.963, "eval_steps_per_second": 1.959, "eval_wer": 0.26, "step": 293000 }, { "epoch": 34.57409155261916, "grad_norm": 0.003934924490749836, "learning_rate": 2.1798194968434498e-05, "loss": 0.0015, "step": 293050 }, { "epoch": 34.57999056158565, "grad_norm": 0.041333556175231934, "learning_rate": 2.178285768532836e-05, "loss": 0.0017, "step": 293100 }, { "epoch": 34.58588957055215, "grad_norm": 0.009024061262607574, "learning_rate": 2.176752429700123e-05, "loss": 0.0014, "step": 293150 }, { "epoch": 34.59178857951864, "grad_norm": 0.15277493000030518, "learning_rate": 2.1752194805569555e-05, "loss": 0.0015, "step": 293200 }, { "epoch": 34.59768758848514, "grad_norm": 0.00572414044290781, "learning_rate": 2.173686921314924e-05, "loss": 0.001, "step": 293250 }, { "epoch": 34.60358659745163, "grad_norm": 0.01957131177186966, "learning_rate": 2.1721547521855675e-05, "loss": 0.0016, "step": 293300 }, { "epoch": 34.60948560641812, "grad_norm": 0.07287047803401947, "learning_rate": 2.170622973380366e-05, "loss": 0.0011, "step": 293350 }, { "epoch": 34.61538461538461, "grad_norm": 0.24521493911743164, "learning_rate": 2.1690915851107542e-05, "loss": 0.0013, "step": 293400 }, { "epoch": 34.62128362435111, "grad_norm": 0.32259079813957214, "learning_rate": 2.167560587588105e-05, "loss": 0.0018, "step": 293450 }, { "epoch": 34.6271826333176, "grad_norm": 0.1403234750032425, "learning_rate": 2.1660299810237406e-05, "loss": 0.0009, "step": 293500 }, { "epoch": 34.6330816422841, "grad_norm": 0.029725156724452972, "learning_rate": 2.1644997656289288e-05, "loss": 0.0015, "step": 293550 }, { "epoch": 34.63898065125059, "grad_norm": 0.024512682110071182, "learning_rate": 2.162969941614883e-05, "loss": 0.0021, "step": 293600 }, { "epoch": 34.644879660217086, "grad_norm": 0.19463369250297546, "learning_rate": 2.1614405091927637e-05, "loss": 0.0014, "step": 293650 }, { "epoch": 34.65077866918358, "grad_norm": 0.012933775782585144, "learning_rate": 2.1599114685736766e-05, "loss": 0.0009, "step": 293700 }, { "epoch": 34.65667767815007, "grad_norm": 0.04134133458137512, "learning_rate": 2.1583828199686728e-05, "loss": 0.0015, "step": 293750 }, { "epoch": 34.66257668711656, "grad_norm": 0.1627771109342575, "learning_rate": 2.15685456358875e-05, "loss": 0.0013, "step": 293800 }, { "epoch": 34.66847569608306, "grad_norm": 0.004159718751907349, "learning_rate": 2.1553266996448522e-05, "loss": 0.0017, "step": 293850 }, { "epoch": 34.67437470504955, "grad_norm": 0.12472772598266602, "learning_rate": 2.153799228347868e-05, "loss": 0.0014, "step": 293900 }, { "epoch": 34.680273714016046, "grad_norm": 0.020544767379760742, "learning_rate": 2.152272149908631e-05, "loss": 0.0012, "step": 293950 }, { "epoch": 34.68617272298254, "grad_norm": 0.0056894319131970406, "learning_rate": 2.150745464537926e-05, "loss": 0.0015, "step": 294000 }, { "epoch": 34.68617272298254, "eval_cer": 0.08347676419965576, "eval_loss": 3.062442920054309e-05, "eval_runtime": 2.0478, "eval_samples_per_second": 48.833, "eval_steps_per_second": 1.953, "eval_wer": 0.26, "step": 294000 }, { "epoch": 34.692071731949035, "grad_norm": 0.17264391481876373, "learning_rate": 2.1492191724464772e-05, "loss": 0.0012, "step": 294050 }, { "epoch": 34.69797074091553, "grad_norm": 0.1633884310722351, "learning_rate": 2.1476932738449566e-05, "loss": 0.0012, "step": 294100 }, { "epoch": 34.70386974988202, "grad_norm": 0.007947257719933987, "learning_rate": 2.1461677689439837e-05, "loss": 0.0011, "step": 294150 }, { "epoch": 34.70976875884851, "grad_norm": 0.0033150925301015377, "learning_rate": 2.1446426579541186e-05, "loss": 0.0009, "step": 294200 }, { "epoch": 34.715667767815006, "grad_norm": 0.017942260950803757, "learning_rate": 2.1431179410858747e-05, "loss": 0.001, "step": 294250 }, { "epoch": 34.7215667767815, "grad_norm": 0.008136266842484474, "learning_rate": 2.141593618549706e-05, "loss": 0.001, "step": 294300 }, { "epoch": 34.727465785747995, "grad_norm": 0.0003086813958361745, "learning_rate": 2.1400696905560118e-05, "loss": 0.0016, "step": 294350 }, { "epoch": 34.73336479471449, "grad_norm": 0.0016284491866827011, "learning_rate": 2.138546157315139e-05, "loss": 0.0012, "step": 294400 }, { "epoch": 34.739263803680984, "grad_norm": 0.028792357072234154, "learning_rate": 2.137023019037378e-05, "loss": 0.0012, "step": 294450 }, { "epoch": 34.74516281264748, "grad_norm": 0.11010618507862091, "learning_rate": 2.135500275932967e-05, "loss": 0.0014, "step": 294500 }, { "epoch": 34.751061821613966, "grad_norm": 0.0025303333532065153, "learning_rate": 2.1339779282120875e-05, "loss": 0.0019, "step": 294550 }, { "epoch": 34.75696083058046, "grad_norm": 0.01605524681508541, "learning_rate": 2.1324559760848677e-05, "loss": 0.001, "step": 294600 }, { "epoch": 34.762859839546955, "grad_norm": 0.001269079279154539, "learning_rate": 2.1309344197613813e-05, "loss": 0.0008, "step": 294650 }, { "epoch": 34.76875884851345, "grad_norm": 0.14247113466262817, "learning_rate": 2.1294132594516448e-05, "loss": 0.0016, "step": 294700 }, { "epoch": 34.774657857479944, "grad_norm": 0.005622392985969782, "learning_rate": 2.1278924953656248e-05, "loss": 0.0011, "step": 294750 }, { "epoch": 34.78055686644644, "grad_norm": 0.11187683790922165, "learning_rate": 2.1263721277132303e-05, "loss": 0.0015, "step": 294800 }, { "epoch": 34.78645587541293, "grad_norm": 0.002867193194106221, "learning_rate": 2.1248521567043156e-05, "loss": 0.0013, "step": 294850 }, { "epoch": 34.79235488437942, "grad_norm": 0.06088979169726372, "learning_rate": 2.1233325825486798e-05, "loss": 0.001, "step": 294900 }, { "epoch": 34.798253893345915, "grad_norm": 0.006975814234465361, "learning_rate": 2.1218134054560684e-05, "loss": 0.0011, "step": 294950 }, { "epoch": 34.80415290231241, "grad_norm": 0.0006847589393146336, "learning_rate": 2.120294625636171e-05, "loss": 0.0009, "step": 295000 }, { "epoch": 34.80415290231241, "eval_cer": 0.08347676419965576, "eval_loss": 4.621942207450047e-05, "eval_runtime": 2.0273, "eval_samples_per_second": 49.328, "eval_steps_per_second": 1.973, "eval_wer": 0.26, "step": 295000 }, { "epoch": 34.810051911278904, "grad_norm": 0.030993055552244186, "learning_rate": 2.118776243298624e-05, "loss": 0.0007, "step": 295050 }, { "epoch": 34.8159509202454, "grad_norm": 0.02968749962747097, "learning_rate": 2.1172582586530077e-05, "loss": 0.0013, "step": 295100 }, { "epoch": 34.82184992921189, "grad_norm": 0.16873091459274292, "learning_rate": 2.115740671908847e-05, "loss": 0.0017, "step": 295150 }, { "epoch": 34.82774893817839, "grad_norm": 0.006901362910866737, "learning_rate": 2.114223483275613e-05, "loss": 0.0007, "step": 295200 }, { "epoch": 34.83364794714488, "grad_norm": 0.11856191605329514, "learning_rate": 2.112706692962722e-05, "loss": 0.0012, "step": 295250 }, { "epoch": 34.83954695611137, "grad_norm": 0.0643618032336235, "learning_rate": 2.1111903011795325e-05, "loss": 0.0014, "step": 295300 }, { "epoch": 34.845445965077865, "grad_norm": 0.048587020486593246, "learning_rate": 2.109674308135354e-05, "loss": 0.0015, "step": 295350 }, { "epoch": 34.85134497404436, "grad_norm": 0.000802834692876786, "learning_rate": 2.1081587140394348e-05, "loss": 0.0008, "step": 295400 }, { "epoch": 34.857243983010854, "grad_norm": 0.02178666554391384, "learning_rate": 2.1066435191009715e-05, "loss": 0.0011, "step": 295450 }, { "epoch": 34.86314299197735, "grad_norm": 0.055913280695676804, "learning_rate": 2.1051287235291046e-05, "loss": 0.0011, "step": 295500 }, { "epoch": 34.86904200094384, "grad_norm": 0.14208215475082397, "learning_rate": 2.103614327532919e-05, "loss": 0.0016, "step": 295550 }, { "epoch": 34.87494100991034, "grad_norm": 0.00613782973960042, "learning_rate": 2.1021003313214453e-05, "loss": 0.0016, "step": 295600 }, { "epoch": 34.88084001887683, "grad_norm": 0.02633592113852501, "learning_rate": 2.1005867351036584e-05, "loss": 0.0013, "step": 295650 }, { "epoch": 34.88673902784332, "grad_norm": 0.013284694403409958, "learning_rate": 2.0990735390884785e-05, "loss": 0.0011, "step": 295700 }, { "epoch": 34.892638036809814, "grad_norm": 0.017173245549201965, "learning_rate": 2.0975607434847706e-05, "loss": 0.0011, "step": 295750 }, { "epoch": 34.89853704577631, "grad_norm": 0.00860433280467987, "learning_rate": 2.096048348501343e-05, "loss": 0.001, "step": 295800 }, { "epoch": 34.9044360547428, "grad_norm": 0.001732426811940968, "learning_rate": 2.094536354346949e-05, "loss": 0.0011, "step": 295850 }, { "epoch": 34.9103350637093, "grad_norm": 0.009586975909769535, "learning_rate": 2.0930247612302906e-05, "loss": 0.0012, "step": 295900 }, { "epoch": 34.91623407267579, "grad_norm": 0.023968465626239777, "learning_rate": 2.0915135693600096e-05, "loss": 0.0015, "step": 295950 }, { "epoch": 34.922133081642286, "grad_norm": 0.00018990928947459906, "learning_rate": 2.0900027789446942e-05, "loss": 0.0014, "step": 296000 }, { "epoch": 34.922133081642286, "eval_cer": 0.08347676419965576, "eval_loss": 2.3932181647978723e-05, "eval_runtime": 2.0859, "eval_samples_per_second": 47.942, "eval_steps_per_second": 1.918, "eval_wer": 0.26, "step": 296000 }, { "epoch": 34.92803209060878, "grad_norm": 0.0009144946234300733, "learning_rate": 2.0884923901928743e-05, "loss": 0.0013, "step": 296050 }, { "epoch": 34.93393109957527, "grad_norm": 0.0949844941496849, "learning_rate": 2.0869824033130315e-05, "loss": 0.0014, "step": 296100 }, { "epoch": 34.93983010854176, "grad_norm": 0.08284731954336166, "learning_rate": 2.0854728185135858e-05, "loss": 0.0009, "step": 296150 }, { "epoch": 34.94572911750826, "grad_norm": 0.03933456167578697, "learning_rate": 2.0839636360029025e-05, "loss": 0.0018, "step": 296200 }, { "epoch": 34.95162812647475, "grad_norm": 0.002290574135258794, "learning_rate": 2.0824548559892937e-05, "loss": 0.0012, "step": 296250 }, { "epoch": 34.95752713544125, "grad_norm": 0.001371797057799995, "learning_rate": 2.0809464786810134e-05, "loss": 0.0013, "step": 296300 }, { "epoch": 34.96342614440774, "grad_norm": 0.01972656510770321, "learning_rate": 2.0794385042862612e-05, "loss": 0.0009, "step": 296350 }, { "epoch": 34.969325153374236, "grad_norm": 0.004036187659949064, "learning_rate": 2.077930933013182e-05, "loss": 0.0019, "step": 296400 }, { "epoch": 34.97522416234073, "grad_norm": 0.07318464666604996, "learning_rate": 2.0764237650698633e-05, "loss": 0.0019, "step": 296450 }, { "epoch": 34.98112317130722, "grad_norm": 0.0018536221468821168, "learning_rate": 2.0749170006643377e-05, "loss": 0.0016, "step": 296500 }, { "epoch": 34.98702218027371, "grad_norm": 0.010428613051772118, "learning_rate": 2.0734106400045823e-05, "loss": 0.0006, "step": 296550 }, { "epoch": 34.99292118924021, "grad_norm": 0.06449630111455917, "learning_rate": 2.0719046832985186e-05, "loss": 0.0012, "step": 296600 }, { "epoch": 34.9988201982067, "grad_norm": 0.02059135213494301, "learning_rate": 2.0703991307540095e-05, "loss": 0.0012, "step": 296650 }, { "epoch": 35.004719207173196, "grad_norm": 0.14915210008621216, "learning_rate": 2.0688939825788684e-05, "loss": 0.0015, "step": 296700 }, { "epoch": 35.01061821613969, "grad_norm": 0.1558426469564438, "learning_rate": 2.067389238980848e-05, "loss": 0.0011, "step": 296750 }, { "epoch": 35.016517225106185, "grad_norm": 0.0030815256759524345, "learning_rate": 2.065884900167646e-05, "loss": 0.0014, "step": 296800 }, { "epoch": 35.02241623407268, "grad_norm": 0.045468904078006744, "learning_rate": 2.0643809663469044e-05, "loss": 0.001, "step": 296850 }, { "epoch": 35.02831524303917, "grad_norm": 0.004029233008623123, "learning_rate": 2.062877437726209e-05, "loss": 0.0008, "step": 296900 }, { "epoch": 35.03421425200566, "grad_norm": 0.051983825862407684, "learning_rate": 2.061374314513091e-05, "loss": 0.0015, "step": 296950 }, { "epoch": 35.040113260972156, "grad_norm": 0.002998655429109931, "learning_rate": 2.059871596915024e-05, "loss": 0.0015, "step": 297000 }, { "epoch": 35.040113260972156, "eval_cer": 0.08347676419965576, "eval_loss": 2.2253647330217063e-05, "eval_runtime": 2.1186, "eval_samples_per_second": 47.201, "eval_steps_per_second": 1.888, "eval_wer": 0.26, "step": 297000 }, { "epoch": 35.04601226993865, "grad_norm": 0.004414792638272047, "learning_rate": 2.0583692851394265e-05, "loss": 0.0007, "step": 297050 }, { "epoch": 35.051911278905145, "grad_norm": 0.010149551555514336, "learning_rate": 2.056867379393661e-05, "loss": 0.001, "step": 297100 }, { "epoch": 35.05781028787164, "grad_norm": 0.016077103093266487, "learning_rate": 2.055365879885034e-05, "loss": 0.0016, "step": 297150 }, { "epoch": 35.063709296838134, "grad_norm": 0.013265683315694332, "learning_rate": 2.053864786820795e-05, "loss": 0.0011, "step": 297200 }, { "epoch": 35.06960830580462, "grad_norm": 0.006760894320905209, "learning_rate": 2.0523641004081367e-05, "loss": 0.0009, "step": 297250 }, { "epoch": 35.075507314771116, "grad_norm": 0.017486779019236565, "learning_rate": 2.0508638208542015e-05, "loss": 0.0006, "step": 297300 }, { "epoch": 35.08140632373761, "grad_norm": 0.004693678580224514, "learning_rate": 2.0493639483660675e-05, "loss": 0.0006, "step": 297350 }, { "epoch": 35.087305332704105, "grad_norm": 0.0628078430891037, "learning_rate": 2.0478644831507627e-05, "loss": 0.0012, "step": 297400 }, { "epoch": 35.0932043416706, "grad_norm": 0.25266364216804504, "learning_rate": 2.0463654254152544e-05, "loss": 0.0011, "step": 297450 }, { "epoch": 35.099103350637094, "grad_norm": 0.05819575861096382, "learning_rate": 2.0448667753664552e-05, "loss": 0.0017, "step": 297500 }, { "epoch": 35.10500235960359, "grad_norm": 0.040239594876766205, "learning_rate": 2.043368533211225e-05, "loss": 0.0011, "step": 297550 }, { "epoch": 35.11090136857008, "grad_norm": 0.0031007244251668453, "learning_rate": 2.0418706991563634e-05, "loss": 0.0009, "step": 297600 }, { "epoch": 35.11680037753657, "grad_norm": 0.01984095573425293, "learning_rate": 2.040373273408614e-05, "loss": 0.0009, "step": 297650 }, { "epoch": 35.122699386503065, "grad_norm": 0.06875376403331757, "learning_rate": 2.0388762561746643e-05, "loss": 0.001, "step": 297700 }, { "epoch": 35.12859839546956, "grad_norm": 0.0073614539578557014, "learning_rate": 2.0373796476611474e-05, "loss": 0.0015, "step": 297750 }, { "epoch": 35.134497404436054, "grad_norm": 0.003699176013469696, "learning_rate": 2.0358834480746365e-05, "loss": 0.0011, "step": 297800 }, { "epoch": 35.14039641340255, "grad_norm": 0.03417438641190529, "learning_rate": 2.0343876576216515e-05, "loss": 0.0014, "step": 297850 }, { "epoch": 35.14629542236904, "grad_norm": 0.0002870483440347016, "learning_rate": 2.0328922765086543e-05, "loss": 0.0011, "step": 297900 }, { "epoch": 35.15219443133554, "grad_norm": 0.018561426550149918, "learning_rate": 2.031397304942051e-05, "loss": 0.0013, "step": 297950 }, { "epoch": 35.15809344030203, "grad_norm": 0.003466768190264702, "learning_rate": 2.029902743128188e-05, "loss": 0.001, "step": 298000 }, { "epoch": 35.15809344030203, "eval_cer": 0.08347676419965576, "eval_loss": 1.242472899321001e-05, "eval_runtime": 2.0648, "eval_samples_per_second": 48.431, "eval_steps_per_second": 1.937, "eval_wer": 0.26, "step": 298000 }, { "epoch": 35.16399244926852, "grad_norm": 0.04534978047013283, "learning_rate": 2.0284085912733624e-05, "loss": 0.0011, "step": 298050 }, { "epoch": 35.169891458235014, "grad_norm": 0.001219091471284628, "learning_rate": 2.026914849583808e-05, "loss": 0.0013, "step": 298100 }, { "epoch": 35.17579046720151, "grad_norm": 0.08050567656755447, "learning_rate": 2.0254215182657042e-05, "loss": 0.0015, "step": 298150 }, { "epoch": 35.181689476168, "grad_norm": 0.029928410425782204, "learning_rate": 2.0239285975251743e-05, "loss": 0.0008, "step": 298200 }, { "epoch": 35.1875884851345, "grad_norm": 0.2337680160999298, "learning_rate": 2.0224360875682837e-05, "loss": 0.0014, "step": 298250 }, { "epoch": 35.19348749410099, "grad_norm": 0.16186293959617615, "learning_rate": 2.020943988601042e-05, "loss": 0.0012, "step": 298300 }, { "epoch": 35.19938650306749, "grad_norm": 0.1794130951166153, "learning_rate": 2.019452300829402e-05, "loss": 0.0008, "step": 298350 }, { "epoch": 35.20528551203398, "grad_norm": 0.002903803251683712, "learning_rate": 2.0179610244592595e-05, "loss": 0.0014, "step": 298400 }, { "epoch": 35.21118452100047, "grad_norm": 0.004235414322465658, "learning_rate": 2.0164701596964535e-05, "loss": 0.0013, "step": 298450 }, { "epoch": 35.21708352996696, "grad_norm": 0.06720995903015137, "learning_rate": 2.0149797067467668e-05, "loss": 0.0006, "step": 298500 }, { "epoch": 35.22298253893346, "grad_norm": 0.0020868077408522367, "learning_rate": 2.0134896658159245e-05, "loss": 0.001, "step": 298550 }, { "epoch": 35.22888154789995, "grad_norm": 0.060700297355651855, "learning_rate": 2.0120000371095936e-05, "loss": 0.0009, "step": 298600 }, { "epoch": 35.23478055686645, "grad_norm": 0.005857496988028288, "learning_rate": 2.010510820833389e-05, "loss": 0.0011, "step": 298650 }, { "epoch": 35.24067956583294, "grad_norm": 0.01295412052422762, "learning_rate": 2.009022017192864e-05, "loss": 0.0014, "step": 298700 }, { "epoch": 35.246578574799436, "grad_norm": 0.003416123567149043, "learning_rate": 2.0075336263935164e-05, "loss": 0.0014, "step": 298750 }, { "epoch": 35.25247758376593, "grad_norm": 0.0002652370894793421, "learning_rate": 2.006045648640787e-05, "loss": 0.0013, "step": 298800 }, { "epoch": 35.25837659273242, "grad_norm": 0.01378607377409935, "learning_rate": 2.0045580841400597e-05, "loss": 0.0006, "step": 298850 }, { "epoch": 35.26427560169891, "grad_norm": 0.00017314846627414227, "learning_rate": 2.0030709330966607e-05, "loss": 0.0012, "step": 298900 }, { "epoch": 35.27017461066541, "grad_norm": 0.08736252784729004, "learning_rate": 2.0015841957158606e-05, "loss": 0.0015, "step": 298950 }, { "epoch": 35.2760736196319, "grad_norm": 0.002123940270394087, "learning_rate": 2.0000978722028713e-05, "loss": 0.0008, "step": 299000 }, { "epoch": 35.2760736196319, "eval_cer": 0.08347676419965576, "eval_loss": 3.4568576666060835e-05, "eval_runtime": 2.0381, "eval_samples_per_second": 49.066, "eval_steps_per_second": 1.963, "eval_wer": 0.26, "step": 299000 }, { "epoch": 35.281972628598396, "grad_norm": 0.029221175238490105, "learning_rate": 1.998611962762849e-05, "loss": 0.0015, "step": 299050 }, { "epoch": 35.28787163756489, "grad_norm": 0.0013048562686890364, "learning_rate": 1.997126467600891e-05, "loss": 0.001, "step": 299100 }, { "epoch": 35.293770646531385, "grad_norm": 0.03419286757707596, "learning_rate": 1.9956413869220382e-05, "loss": 0.0007, "step": 299150 }, { "epoch": 35.29966965549788, "grad_norm": 0.007459016051143408, "learning_rate": 1.9941567209312767e-05, "loss": 0.001, "step": 299200 }, { "epoch": 35.30556866446437, "grad_norm": 0.2270236760377884, "learning_rate": 1.992672469833532e-05, "loss": 0.0009, "step": 299250 }, { "epoch": 35.31146767343086, "grad_norm": 0.15374557673931122, "learning_rate": 1.9911886338336733e-05, "loss": 0.0012, "step": 299300 }, { "epoch": 35.317366682397356, "grad_norm": 0.0679054707288742, "learning_rate": 1.989705213136511e-05, "loss": 0.0009, "step": 299350 }, { "epoch": 35.32326569136385, "grad_norm": 0.07133659720420837, "learning_rate": 1.988222207946804e-05, "loss": 0.0008, "step": 299400 }, { "epoch": 35.329164700330345, "grad_norm": 0.014459307305514812, "learning_rate": 1.9867396184692473e-05, "loss": 0.0016, "step": 299450 }, { "epoch": 35.33506370929684, "grad_norm": 0.0029914553742855787, "learning_rate": 1.9852574449084816e-05, "loss": 0.0012, "step": 299500 }, { "epoch": 35.340962718263334, "grad_norm": 0.00997429434210062, "learning_rate": 1.983775687469089e-05, "loss": 0.0015, "step": 299550 }, { "epoch": 35.34686172722983, "grad_norm": 0.005415024235844612, "learning_rate": 1.982294346355595e-05, "loss": 0.0008, "step": 299600 }, { "epoch": 35.352760736196316, "grad_norm": 0.00976363942027092, "learning_rate": 1.9808134217724673e-05, "loss": 0.0014, "step": 299650 }, { "epoch": 35.35865974516281, "grad_norm": 0.02540217526257038, "learning_rate": 1.9793329139241163e-05, "loss": 0.0009, "step": 299700 }, { "epoch": 35.364558754129305, "grad_norm": 0.08395938575267792, "learning_rate": 1.9778528230148945e-05, "loss": 0.0013, "step": 299750 }, { "epoch": 35.3704577630958, "grad_norm": 0.12186859548091888, "learning_rate": 1.9763731492490976e-05, "loss": 0.0009, "step": 299800 }, { "epoch": 35.376356772062294, "grad_norm": 0.0026277322322130203, "learning_rate": 1.974893892830963e-05, "loss": 0.0005, "step": 299850 }, { "epoch": 35.38225578102879, "grad_norm": 0.13605764508247375, "learning_rate": 1.9734150539646705e-05, "loss": 0.001, "step": 299900 }, { "epoch": 35.38815478999528, "grad_norm": 0.09443001449108124, "learning_rate": 1.971936632854341e-05, "loss": 0.0008, "step": 299950 }, { "epoch": 35.39405379896177, "grad_norm": 0.13513748347759247, "learning_rate": 1.9704586297040424e-05, "loss": 0.0009, "step": 300000 }, { "epoch": 35.39405379896177, "eval_cer": 0.08347676419965576, "eval_loss": 6.100803511799313e-05, "eval_runtime": 2.0302, "eval_samples_per_second": 49.256, "eval_steps_per_second": 1.97, "eval_wer": 0.26, "step": 300000 }, { "epoch": 35.399952807928265, "grad_norm": 0.11849627643823624, "learning_rate": 1.9689810447177803e-05, "loss": 0.0014, "step": 300050 }, { "epoch": 35.40585181689476, "grad_norm": 0.001587721984833479, "learning_rate": 1.967503878099504e-05, "loss": 0.001, "step": 300100 }, { "epoch": 35.411750825861255, "grad_norm": 0.0007083056843839586, "learning_rate": 1.9660271300531047e-05, "loss": 0.0009, "step": 300150 }, { "epoch": 35.41764983482775, "grad_norm": 0.002820670371875167, "learning_rate": 1.964550800782417e-05, "loss": 0.0008, "step": 300200 }, { "epoch": 35.423548843794244, "grad_norm": 0.0607202909886837, "learning_rate": 1.963074890491216e-05, "loss": 0.0012, "step": 300250 }, { "epoch": 35.42944785276074, "grad_norm": 0.008994832634925842, "learning_rate": 1.9615993993832204e-05, "loss": 0.0012, "step": 300300 }, { "epoch": 35.43534686172723, "grad_norm": 0.0038693337701261044, "learning_rate": 1.9601243276620905e-05, "loss": 0.0013, "step": 300350 }, { "epoch": 35.44124587069372, "grad_norm": 0.0883549228310585, "learning_rate": 1.9586496755314287e-05, "loss": 0.0013, "step": 300400 }, { "epoch": 35.447144879660215, "grad_norm": 0.006155063863843679, "learning_rate": 1.957175443194779e-05, "loss": 0.0014, "step": 300450 }, { "epoch": 35.45304388862671, "grad_norm": 0.005775638855993748, "learning_rate": 1.9557016308556288e-05, "loss": 0.0012, "step": 300500 }, { "epoch": 35.458942897593204, "grad_norm": 0.0008899843087419868, "learning_rate": 1.9542282387174042e-05, "loss": 0.0015, "step": 300550 }, { "epoch": 35.4648419065597, "grad_norm": 0.10413625836372375, "learning_rate": 1.9527552669834798e-05, "loss": 0.0007, "step": 300600 }, { "epoch": 35.47074091552619, "grad_norm": 0.026665717363357544, "learning_rate": 1.951282715857166e-05, "loss": 0.0009, "step": 300650 }, { "epoch": 35.47663992449269, "grad_norm": 0.002335060853511095, "learning_rate": 1.9498105855417177e-05, "loss": 0.0018, "step": 300700 }, { "epoch": 35.48253893345918, "grad_norm": 0.006469929590821266, "learning_rate": 1.9483388762403308e-05, "loss": 0.0006, "step": 300750 }, { "epoch": 35.48843794242567, "grad_norm": 0.05744355171918869, "learning_rate": 1.9468675881561422e-05, "loss": 0.0022, "step": 300800 }, { "epoch": 35.494336951392164, "grad_norm": 0.006927142385393381, "learning_rate": 1.9453967214922352e-05, "loss": 0.0011, "step": 300850 }, { "epoch": 35.50023596035866, "grad_norm": 0.0005299526965245605, "learning_rate": 1.9439262764516307e-05, "loss": 0.0007, "step": 300900 }, { "epoch": 35.50613496932515, "grad_norm": 0.018722476437687874, "learning_rate": 1.9424562532372926e-05, "loss": 0.0009, "step": 300950 }, { "epoch": 35.51203397829165, "grad_norm": 0.010536981746554375, "learning_rate": 1.9409866520521257e-05, "loss": 0.0009, "step": 301000 }, { "epoch": 35.51203397829165, "eval_cer": 0.08347676419965576, "eval_loss": 8.076433005044237e-05, "eval_runtime": 2.054, "eval_samples_per_second": 48.684, "eval_steps_per_second": 1.947, "eval_wer": 0.26, "step": 301000 }, { "epoch": 35.51793298725814, "grad_norm": 0.21465979516506195, "learning_rate": 1.9395174730989775e-05, "loss": 0.0011, "step": 301050 }, { "epoch": 35.52383199622464, "grad_norm": 0.06401073932647705, "learning_rate": 1.938048716580637e-05, "loss": 0.0013, "step": 301100 }, { "epoch": 35.52973100519113, "grad_norm": 0.08816991746425629, "learning_rate": 1.9365803826998356e-05, "loss": 0.0009, "step": 301150 }, { "epoch": 35.53563001415762, "grad_norm": 0.08018657565116882, "learning_rate": 1.9351124716592456e-05, "loss": 0.001, "step": 301200 }, { "epoch": 35.54152902312411, "grad_norm": 0.09241991490125656, "learning_rate": 1.9336449836614802e-05, "loss": 0.0016, "step": 301250 }, { "epoch": 35.54742803209061, "grad_norm": 0.0665205866098404, "learning_rate": 1.932177918909094e-05, "loss": 0.0006, "step": 301300 }, { "epoch": 35.5533270410571, "grad_norm": 0.007398138288408518, "learning_rate": 1.930711277604587e-05, "loss": 0.0012, "step": 301350 }, { "epoch": 35.5592260500236, "grad_norm": 0.09695389866828918, "learning_rate": 1.929245059950397e-05, "loss": 0.0009, "step": 301400 }, { "epoch": 35.56512505899009, "grad_norm": 0.00048463273560628295, "learning_rate": 1.927779266148904e-05, "loss": 0.0012, "step": 301450 }, { "epoch": 35.571024067956586, "grad_norm": 0.034829702228307724, "learning_rate": 1.9263138964024304e-05, "loss": 0.0017, "step": 301500 }, { "epoch": 35.57692307692308, "grad_norm": 0.0010774765396490693, "learning_rate": 1.9248489509132382e-05, "loss": 0.0011, "step": 301550 }, { "epoch": 35.58282208588957, "grad_norm": 0.1145947054028511, "learning_rate": 1.923384429883533e-05, "loss": 0.0014, "step": 301600 }, { "epoch": 35.58872109485606, "grad_norm": 0.06274580955505371, "learning_rate": 1.9219203335154605e-05, "loss": 0.0011, "step": 301650 }, { "epoch": 35.59462010382256, "grad_norm": 0.0031536617316305637, "learning_rate": 1.9204566620111092e-05, "loss": 0.001, "step": 301700 }, { "epoch": 35.60051911278905, "grad_norm": 0.04546411335468292, "learning_rate": 1.9189934155725063e-05, "loss": 0.0014, "step": 301750 }, { "epoch": 35.606418121755546, "grad_norm": 0.12127187103033066, "learning_rate": 1.9175305944016237e-05, "loss": 0.001, "step": 301800 }, { "epoch": 35.61231713072204, "grad_norm": 0.13781358301639557, "learning_rate": 1.9160681987003714e-05, "loss": 0.001, "step": 301850 }, { "epoch": 35.618216139688535, "grad_norm": 0.007678101304918528, "learning_rate": 1.914606228670601e-05, "loss": 0.0012, "step": 301900 }, { "epoch": 35.62411514865503, "grad_norm": 0.3605640232563019, "learning_rate": 1.9131446845141103e-05, "loss": 0.0014, "step": 301950 }, { "epoch": 35.63001415762152, "grad_norm": 0.11286227405071259, "learning_rate": 1.9116835664326326e-05, "loss": 0.0012, "step": 302000 }, { "epoch": 35.63001415762152, "eval_cer": 0.08347676419965576, "eval_loss": 6.421191210392863e-05, "eval_runtime": 2.0386, "eval_samples_per_second": 49.054, "eval_steps_per_second": 1.962, "eval_wer": 0.26, "step": 302000 }, { "epoch": 35.63591316658801, "grad_norm": 0.11558385193347931, "learning_rate": 1.9102228746278444e-05, "loss": 0.0017, "step": 302050 }, { "epoch": 35.641812175554506, "grad_norm": 0.0009048639331012964, "learning_rate": 1.9087626093013634e-05, "loss": 0.0015, "step": 302100 }, { "epoch": 35.647711184521, "grad_norm": 0.016301453113555908, "learning_rate": 1.9073027706547473e-05, "loss": 0.0008, "step": 302150 }, { "epoch": 35.653610193487495, "grad_norm": 0.0009141730843111873, "learning_rate": 1.9058433588894968e-05, "loss": 0.0009, "step": 302200 }, { "epoch": 35.65950920245399, "grad_norm": 0.03407090902328491, "learning_rate": 1.904384374207053e-05, "loss": 0.0013, "step": 302250 }, { "epoch": 35.665408211420484, "grad_norm": 0.14931030571460724, "learning_rate": 1.9029258168087972e-05, "loss": 0.0009, "step": 302300 }, { "epoch": 35.67130722038698, "grad_norm": 0.11616888642311096, "learning_rate": 1.901467686896053e-05, "loss": 0.0008, "step": 302350 }, { "epoch": 35.677206229353466, "grad_norm": 0.044303279370069504, "learning_rate": 1.9000099846700838e-05, "loss": 0.0014, "step": 302400 }, { "epoch": 35.68310523831996, "grad_norm": 0.07338062673807144, "learning_rate": 1.8985527103320927e-05, "loss": 0.0008, "step": 302450 }, { "epoch": 35.689004247286455, "grad_norm": 0.06817618012428284, "learning_rate": 1.8970958640832294e-05, "loss": 0.0005, "step": 302500 }, { "epoch": 35.69490325625295, "grad_norm": 0.0398406907916069, "learning_rate": 1.895639446124579e-05, "loss": 0.0017, "step": 302550 }, { "epoch": 35.700802265219444, "grad_norm": 0.0042649623937904835, "learning_rate": 1.8941834566571694e-05, "loss": 0.0013, "step": 302600 }, { "epoch": 35.70670127418594, "grad_norm": 0.06659501791000366, "learning_rate": 1.8927278958819667e-05, "loss": 0.0013, "step": 302650 }, { "epoch": 35.71260028315243, "grad_norm": 0.07132068276405334, "learning_rate": 1.891272763999884e-05, "loss": 0.0014, "step": 302700 }, { "epoch": 35.71849929211892, "grad_norm": 0.0008284978102892637, "learning_rate": 1.88981806121177e-05, "loss": 0.0009, "step": 302750 }, { "epoch": 35.724398301085415, "grad_norm": 0.31251007318496704, "learning_rate": 1.8883637877184147e-05, "loss": 0.0015, "step": 302800 }, { "epoch": 35.73029731005191, "grad_norm": 0.21263401210308075, "learning_rate": 1.886909943720551e-05, "loss": 0.001, "step": 302850 }, { "epoch": 35.736196319018404, "grad_norm": 0.0002936712116934359, "learning_rate": 1.8854565294188503e-05, "loss": 0.0018, "step": 302900 }, { "epoch": 35.7420953279849, "grad_norm": 0.015688849613070488, "learning_rate": 1.8840035450139266e-05, "loss": 0.0008, "step": 302950 }, { "epoch": 35.74799433695139, "grad_norm": 0.02251521497964859, "learning_rate": 1.8825509907063327e-05, "loss": 0.0011, "step": 303000 }, { "epoch": 35.74799433695139, "eval_cer": 0.08347676419965576, "eval_loss": 6.095783464843407e-05, "eval_runtime": 2.0434, "eval_samples_per_second": 48.938, "eval_steps_per_second": 1.958, "eval_wer": 0.26, "step": 303000 }, { "epoch": 35.75389334591789, "grad_norm": 0.02802714891731739, "learning_rate": 1.8810988666965636e-05, "loss": 0.0017, "step": 303050 }, { "epoch": 35.75979235488438, "grad_norm": 0.004012445453554392, "learning_rate": 1.879647173185054e-05, "loss": 0.0011, "step": 303100 }, { "epoch": 35.76569136385087, "grad_norm": 0.015732817351818085, "learning_rate": 1.8781959103721792e-05, "loss": 0.0016, "step": 303150 }, { "epoch": 35.771590372817364, "grad_norm": 0.11938696354627609, "learning_rate": 1.8767450784582556e-05, "loss": 0.0013, "step": 303200 }, { "epoch": 35.77748938178386, "grad_norm": 0.04584059491753578, "learning_rate": 1.8752946776435388e-05, "loss": 0.0008, "step": 303250 }, { "epoch": 35.78338839075035, "grad_norm": 0.008224118500947952, "learning_rate": 1.873844708128228e-05, "loss": 0.0016, "step": 303300 }, { "epoch": 35.78928739971685, "grad_norm": 0.0004361092287581414, "learning_rate": 1.87239517011246e-05, "loss": 0.0009, "step": 303350 }, { "epoch": 35.79518640868334, "grad_norm": 0.012927633710205555, "learning_rate": 1.8709460637963123e-05, "loss": 0.0017, "step": 303400 }, { "epoch": 35.80108541764984, "grad_norm": 0.007813631556928158, "learning_rate": 1.8694973893798035e-05, "loss": 0.0009, "step": 303450 }, { "epoch": 35.80698442661633, "grad_norm": 0.05375872552394867, "learning_rate": 1.8680491470628924e-05, "loss": 0.0019, "step": 303500 }, { "epoch": 35.81288343558282, "grad_norm": 0.03665873035788536, "learning_rate": 1.866601337045479e-05, "loss": 0.0015, "step": 303550 }, { "epoch": 35.81878244454931, "grad_norm": 0.6218778491020203, "learning_rate": 1.8651539595274016e-05, "loss": 0.0016, "step": 303600 }, { "epoch": 35.82468145351581, "grad_norm": 0.007107184734195471, "learning_rate": 1.8637070147084408e-05, "loss": 0.0009, "step": 303650 }, { "epoch": 35.8305804624823, "grad_norm": 0.002876368584111333, "learning_rate": 1.862260502788316e-05, "loss": 0.0015, "step": 303700 }, { "epoch": 35.8364794714488, "grad_norm": 0.011572964489459991, "learning_rate": 1.860814423966688e-05, "loss": 0.0008, "step": 303750 }, { "epoch": 35.84237848041529, "grad_norm": 0.0005452415789477527, "learning_rate": 1.859368778443158e-05, "loss": 0.0012, "step": 303800 }, { "epoch": 35.848277489381786, "grad_norm": 0.001848946325480938, "learning_rate": 1.857923566417264e-05, "loss": 0.0016, "step": 303850 }, { "epoch": 35.85417649834828, "grad_norm": 0.022002113983035088, "learning_rate": 1.8564787880884904e-05, "loss": 0.0007, "step": 303900 }, { "epoch": 35.86007550731477, "grad_norm": 0.18309707939624786, "learning_rate": 1.855034443656257e-05, "loss": 0.0012, "step": 303950 }, { "epoch": 35.86597451628126, "grad_norm": 0.2799024283885956, "learning_rate": 1.8535905333199248e-05, "loss": 0.0007, "step": 304000 }, { "epoch": 35.86597451628126, "eval_cer": 0.08347676419965576, "eval_loss": 6.342773849610239e-05, "eval_runtime": 2.0552, "eval_samples_per_second": 48.658, "eval_steps_per_second": 1.946, "eval_wer": 0.26, "step": 304000 }, { "epoch": 35.87187352524776, "grad_norm": 0.019275719299912453, "learning_rate": 1.852147057278796e-05, "loss": 0.0008, "step": 304050 }, { "epoch": 35.87777253421425, "grad_norm": 0.08516234159469604, "learning_rate": 1.8507040157321083e-05, "loss": 0.0008, "step": 304100 }, { "epoch": 35.883671543180746, "grad_norm": 0.0800316333770752, "learning_rate": 1.849261408879047e-05, "loss": 0.0014, "step": 304150 }, { "epoch": 35.88957055214724, "grad_norm": 0.02138468809425831, "learning_rate": 1.847819236918733e-05, "loss": 0.0015, "step": 304200 }, { "epoch": 35.895469561113735, "grad_norm": 0.24449782073497772, "learning_rate": 1.8463775000502266e-05, "loss": 0.0014, "step": 304250 }, { "epoch": 35.90136857008023, "grad_norm": 0.005981219466775656, "learning_rate": 1.8449361984725295e-05, "loss": 0.0009, "step": 304300 }, { "epoch": 35.90726757904672, "grad_norm": 0.14112335443496704, "learning_rate": 1.8434953323845816e-05, "loss": 0.0014, "step": 304350 }, { "epoch": 35.91316658801321, "grad_norm": 0.008264207281172276, "learning_rate": 1.8420549019852655e-05, "loss": 0.0008, "step": 304400 }, { "epoch": 35.919065596979706, "grad_norm": 0.11396683007478714, "learning_rate": 1.8406149074734012e-05, "loss": 0.0015, "step": 304450 }, { "epoch": 35.9249646059462, "grad_norm": 0.005869829095900059, "learning_rate": 1.8391753490477505e-05, "loss": 0.0011, "step": 304500 }, { "epoch": 35.930863614912695, "grad_norm": 0.0012061437591910362, "learning_rate": 1.8377362269070127e-05, "loss": 0.001, "step": 304550 }, { "epoch": 35.93676262387919, "grad_norm": 0.04686398431658745, "learning_rate": 1.8362975412498267e-05, "loss": 0.0012, "step": 304600 }, { "epoch": 35.942661632845684, "grad_norm": 0.02664424292743206, "learning_rate": 1.8348592922747766e-05, "loss": 0.0009, "step": 304650 }, { "epoch": 35.94856064181218, "grad_norm": 0.1921277940273285, "learning_rate": 1.83342148018038e-05, "loss": 0.001, "step": 304700 }, { "epoch": 35.95445965077867, "grad_norm": 0.050062257796525955, "learning_rate": 1.831984105165097e-05, "loss": 0.0008, "step": 304750 }, { "epoch": 35.96035865974516, "grad_norm": 0.0002543628797866404, "learning_rate": 1.8305471674273263e-05, "loss": 0.0015, "step": 304800 }, { "epoch": 35.966257668711656, "grad_norm": 0.16513024270534515, "learning_rate": 1.8291106671654062e-05, "loss": 0.0011, "step": 304850 }, { "epoch": 35.97215667767815, "grad_norm": 0.10798899829387665, "learning_rate": 1.827674604577616e-05, "loss": 0.0014, "step": 304900 }, { "epoch": 35.978055686644645, "grad_norm": 0.009903029538691044, "learning_rate": 1.8262389798621738e-05, "loss": 0.001, "step": 304950 }, { "epoch": 35.98395469561114, "grad_norm": 0.014217978343367577, "learning_rate": 1.824803793217237e-05, "loss": 0.001, "step": 305000 }, { "epoch": 35.98395469561114, "eval_cer": 0.08347676419965576, "eval_loss": 0.0001994690828723833, "eval_runtime": 2.0391, "eval_samples_per_second": 49.042, "eval_steps_per_second": 1.962, "eval_wer": 0.26, "step": 305000 }, { "epoch": 35.989853704577634, "grad_norm": 0.07129208743572235, "learning_rate": 1.8233690448409017e-05, "loss": 0.0008, "step": 305050 }, { "epoch": 35.99575271354412, "grad_norm": 0.02357853762805462, "learning_rate": 1.821934734931206e-05, "loss": 0.0013, "step": 305100 }, { "epoch": 36.001651722510616, "grad_norm": 0.0014170745853334665, "learning_rate": 1.8205008636861255e-05, "loss": 0.001, "step": 305150 }, { "epoch": 36.00755073147711, "grad_norm": 0.341286301612854, "learning_rate": 1.8190674313035738e-05, "loss": 0.0011, "step": 305200 }, { "epoch": 36.013449740443605, "grad_norm": 0.33304908871650696, "learning_rate": 1.8176344379814098e-05, "loss": 0.0012, "step": 305250 }, { "epoch": 36.0193487494101, "grad_norm": 0.009584857150912285, "learning_rate": 1.8162018839174256e-05, "loss": 0.0008, "step": 305300 }, { "epoch": 36.025247758376594, "grad_norm": 0.05301161855459213, "learning_rate": 1.8147697693093553e-05, "loss": 0.0008, "step": 305350 }, { "epoch": 36.03114676734309, "grad_norm": 0.04112420976161957, "learning_rate": 1.8133380943548716e-05, "loss": 0.001, "step": 305400 }, { "epoch": 36.03704577630958, "grad_norm": 0.041433919221162796, "learning_rate": 1.811906859251588e-05, "loss": 0.0017, "step": 305450 }, { "epoch": 36.04294478527607, "grad_norm": 0.10452746599912643, "learning_rate": 1.8104760641970547e-05, "loss": 0.0009, "step": 305500 }, { "epoch": 36.048843794242565, "grad_norm": 0.00621511647477746, "learning_rate": 1.8090457093887637e-05, "loss": 0.0009, "step": 305550 }, { "epoch": 36.05474280320906, "grad_norm": 0.06624019891023636, "learning_rate": 1.8076157950241453e-05, "loss": 0.0008, "step": 305600 }, { "epoch": 36.060641812175554, "grad_norm": 0.008715656585991383, "learning_rate": 1.8061863213005682e-05, "loss": 0.0013, "step": 305650 }, { "epoch": 36.06654082114205, "grad_norm": 0.01591675914824009, "learning_rate": 1.8047572884153407e-05, "loss": 0.0015, "step": 305700 }, { "epoch": 36.07243983010854, "grad_norm": 0.0417296402156353, "learning_rate": 1.8033286965657097e-05, "loss": 0.001, "step": 305750 }, { "epoch": 36.07833883907504, "grad_norm": 0.08733287453651428, "learning_rate": 1.8019005459488653e-05, "loss": 0.0012, "step": 305800 }, { "epoch": 36.08423784804153, "grad_norm": 0.014147615060210228, "learning_rate": 1.8004728367619316e-05, "loss": 0.0011, "step": 305850 }, { "epoch": 36.09013685700802, "grad_norm": 0.01538157369941473, "learning_rate": 1.7990455692019738e-05, "loss": 0.0006, "step": 305900 }, { "epoch": 36.096035865974514, "grad_norm": 0.015909481793642044, "learning_rate": 1.797618743465994e-05, "loss": 0.0007, "step": 305950 }, { "epoch": 36.10193487494101, "grad_norm": 0.005818501580506563, "learning_rate": 1.796192359750939e-05, "loss": 0.0009, "step": 306000 }, { "epoch": 36.10193487494101, "eval_cer": 0.08347676419965576, "eval_loss": 5.838303331984207e-05, "eval_runtime": 2.0555, "eval_samples_per_second": 48.651, "eval_steps_per_second": 1.946, "eval_wer": 0.26, "step": 306000 }, { "epoch": 36.1078338839075, "grad_norm": 0.008948691189289093, "learning_rate": 1.7947664182536883e-05, "loss": 0.0007, "step": 306050 }, { "epoch": 36.113732892874, "grad_norm": 0.03682638704776764, "learning_rate": 1.7933409191710642e-05, "loss": 0.0009, "step": 306100 }, { "epoch": 36.11963190184049, "grad_norm": 0.02211911603808403, "learning_rate": 1.7919158626998255e-05, "loss": 0.0012, "step": 306150 }, { "epoch": 36.12553091080699, "grad_norm": 0.047997280955314636, "learning_rate": 1.790491249036672e-05, "loss": 0.0011, "step": 306200 }, { "epoch": 36.13142991977348, "grad_norm": 0.08640244603157043, "learning_rate": 1.7890670783782408e-05, "loss": 0.0008, "step": 306250 }, { "epoch": 36.13732892873997, "grad_norm": 0.050344210118055344, "learning_rate": 1.787643350921109e-05, "loss": 0.0007, "step": 306300 }, { "epoch": 36.14322793770646, "grad_norm": 0.05181210860610008, "learning_rate": 1.786220066861792e-05, "loss": 0.0014, "step": 306350 }, { "epoch": 36.14912694667296, "grad_norm": 0.005314641632139683, "learning_rate": 1.7847972263967433e-05, "loss": 0.0007, "step": 306400 }, { "epoch": 36.15502595563945, "grad_norm": 0.02095305733382702, "learning_rate": 1.783374829722357e-05, "loss": 0.0015, "step": 306450 }, { "epoch": 36.16092496460595, "grad_norm": 0.01890033483505249, "learning_rate": 1.7819528770349636e-05, "loss": 0.0007, "step": 306500 }, { "epoch": 36.16682397357244, "grad_norm": 0.1704421192407608, "learning_rate": 1.780531368530833e-05, "loss": 0.0017, "step": 306550 }, { "epoch": 36.172722982538936, "grad_norm": 0.1756904274225235, "learning_rate": 1.7791103044061768e-05, "loss": 0.0009, "step": 306600 }, { "epoch": 36.17862199150543, "grad_norm": 0.017769930884242058, "learning_rate": 1.7776896848571418e-05, "loss": 0.0008, "step": 306650 }, { "epoch": 36.18452100047192, "grad_norm": 0.005194894038140774, "learning_rate": 1.7762695100798142e-05, "loss": 0.0014, "step": 306700 }, { "epoch": 36.19042000943841, "grad_norm": 0.01694582775235176, "learning_rate": 1.774849780270219e-05, "loss": 0.0009, "step": 306750 }, { "epoch": 36.19631901840491, "grad_norm": 0.00026805835659615695, "learning_rate": 1.7734304956243204e-05, "loss": 0.0016, "step": 306800 }, { "epoch": 36.2022180273714, "grad_norm": 0.12615139782428741, "learning_rate": 1.7720116563380194e-05, "loss": 0.0015, "step": 306850 }, { "epoch": 36.208117036337896, "grad_norm": 0.31049540638923645, "learning_rate": 1.770593262607158e-05, "loss": 0.0009, "step": 306900 }, { "epoch": 36.21401604530439, "grad_norm": 0.04243394359946251, "learning_rate": 1.7691753146275147e-05, "loss": 0.0008, "step": 306950 }, { "epoch": 36.219915054270885, "grad_norm": 0.013373645953834057, "learning_rate": 1.7677578125948068e-05, "loss": 0.001, "step": 307000 }, { "epoch": 36.219915054270885, "eval_cer": 0.08347676419965576, "eval_loss": 6.780117837479338e-05, "eval_runtime": 2.0572, "eval_samples_per_second": 48.61, "eval_steps_per_second": 1.944, "eval_wer": 0.26, "step": 307000 }, { "epoch": 36.22581406323738, "grad_norm": 0.005756912287324667, "learning_rate": 1.7663407567046918e-05, "loss": 0.0008, "step": 307050 }, { "epoch": 36.23171307220387, "grad_norm": 0.18129242956638336, "learning_rate": 1.7649241471527632e-05, "loss": 0.0015, "step": 307100 }, { "epoch": 36.23761208117036, "grad_norm": 0.009825905784964561, "learning_rate": 1.7635079841345527e-05, "loss": 0.0011, "step": 307150 }, { "epoch": 36.243511090136856, "grad_norm": 0.04106299206614494, "learning_rate": 1.762092267845534e-05, "loss": 0.002, "step": 307200 }, { "epoch": 36.24941009910335, "grad_norm": 0.012187343090772629, "learning_rate": 1.7606769984811168e-05, "loss": 0.0011, "step": 307250 }, { "epoch": 36.255309108069845, "grad_norm": 0.008137619122862816, "learning_rate": 1.7592621762366474e-05, "loss": 0.0006, "step": 307300 }, { "epoch": 36.26120811703634, "grad_norm": 0.0023324599023908377, "learning_rate": 1.7578478013074127e-05, "loss": 0.0009, "step": 307350 }, { "epoch": 36.267107126002834, "grad_norm": 0.09110753983259201, "learning_rate": 1.7564338738886367e-05, "loss": 0.0008, "step": 307400 }, { "epoch": 36.27300613496932, "grad_norm": 0.022237716242671013, "learning_rate": 1.7550203941754834e-05, "loss": 0.001, "step": 307450 }, { "epoch": 36.278905143935816, "grad_norm": 0.0015323006082326174, "learning_rate": 1.7536073623630534e-05, "loss": 0.0008, "step": 307500 }, { "epoch": 36.28480415290231, "grad_norm": 0.000783934723585844, "learning_rate": 1.7521947786463855e-05, "loss": 0.0008, "step": 307550 }, { "epoch": 36.290703161868805, "grad_norm": 0.011232925578951836, "learning_rate": 1.750782643220457e-05, "loss": 0.001, "step": 307600 }, { "epoch": 36.2966021708353, "grad_norm": 0.04270437732338905, "learning_rate": 1.7493709562801836e-05, "loss": 0.001, "step": 307650 }, { "epoch": 36.302501179801794, "grad_norm": 0.13592621684074402, "learning_rate": 1.7479597180204182e-05, "loss": 0.0017, "step": 307700 }, { "epoch": 36.30840018876829, "grad_norm": 0.018126346170902252, "learning_rate": 1.7465489286359526e-05, "loss": 0.0009, "step": 307750 }, { "epoch": 36.31429919773478, "grad_norm": 0.09810677915811539, "learning_rate": 1.7451385883215165e-05, "loss": 0.0009, "step": 307800 }, { "epoch": 36.32019820670127, "grad_norm": 0.012932362034916878, "learning_rate": 1.743728697271777e-05, "loss": 0.0011, "step": 307850 }, { "epoch": 36.326097215667765, "grad_norm": 0.00020734134886879474, "learning_rate": 1.742319255681339e-05, "loss": 0.001, "step": 307900 }, { "epoch": 36.33199622463426, "grad_norm": 0.11186184734106064, "learning_rate": 1.740910263744749e-05, "loss": 0.0009, "step": 307950 }, { "epoch": 36.337895233600754, "grad_norm": 0.028521928936243057, "learning_rate": 1.7395017216564863e-05, "loss": 0.0012, "step": 308000 }, { "epoch": 36.337895233600754, "eval_cer": 0.08347676419965576, "eval_loss": 4.9760805268306285e-05, "eval_runtime": 2.034, "eval_samples_per_second": 49.165, "eval_steps_per_second": 1.967, "eval_wer": 0.26, "step": 308000 }, { "epoch": 36.34379424256725, "grad_norm": 0.04584592208266258, "learning_rate": 1.7380936296109708e-05, "loss": 0.0015, "step": 308050 }, { "epoch": 36.34969325153374, "grad_norm": 0.03269997239112854, "learning_rate": 1.7366859878025594e-05, "loss": 0.0008, "step": 308100 }, { "epoch": 36.35559226050024, "grad_norm": 0.007619339507073164, "learning_rate": 1.735278796425548e-05, "loss": 0.0011, "step": 308150 }, { "epoch": 36.36149126946673, "grad_norm": 0.08888516575098038, "learning_rate": 1.733872055674169e-05, "loss": 0.0009, "step": 308200 }, { "epoch": 36.36739027843322, "grad_norm": 0.18509289622306824, "learning_rate": 1.7324657657425936e-05, "loss": 0.0011, "step": 308250 }, { "epoch": 36.373289287399714, "grad_norm": 0.0021576792933046818, "learning_rate": 1.7310599268249294e-05, "loss": 0.0012, "step": 308300 }, { "epoch": 36.37918829636621, "grad_norm": 0.000133052104501985, "learning_rate": 1.7296545391152242e-05, "loss": 0.0009, "step": 308350 }, { "epoch": 36.3850873053327, "grad_norm": 0.0026793130673468113, "learning_rate": 1.7282496028074607e-05, "loss": 0.0008, "step": 308400 }, { "epoch": 36.3909863142992, "grad_norm": 0.050666969269514084, "learning_rate": 1.7268451180955608e-05, "loss": 0.0006, "step": 308450 }, { "epoch": 36.39688532326569, "grad_norm": 0.0012840518029406667, "learning_rate": 1.7254410851733827e-05, "loss": 0.0008, "step": 308500 }, { "epoch": 36.40278433223219, "grad_norm": 0.0040526315569877625, "learning_rate": 1.7240375042347267e-05, "loss": 0.001, "step": 308550 }, { "epoch": 36.40868334119868, "grad_norm": 0.01692880131304264, "learning_rate": 1.7226343754733253e-05, "loss": 0.0011, "step": 308600 }, { "epoch": 36.41458235016517, "grad_norm": 0.0011717814486473799, "learning_rate": 1.7212316990828515e-05, "loss": 0.001, "step": 308650 }, { "epoch": 36.420481359131664, "grad_norm": 0.01254151202738285, "learning_rate": 1.7198294752569137e-05, "loss": 0.0017, "step": 308700 }, { "epoch": 36.42638036809816, "grad_norm": 0.0030485629104077816, "learning_rate": 1.7184277041890612e-05, "loss": 0.0011, "step": 308750 }, { "epoch": 36.43227937706465, "grad_norm": 0.010155200958251953, "learning_rate": 1.717026386072777e-05, "loss": 0.0004, "step": 308800 }, { "epoch": 36.43817838603115, "grad_norm": 0.31319648027420044, "learning_rate": 1.7156255211014844e-05, "loss": 0.0011, "step": 308850 }, { "epoch": 36.44407739499764, "grad_norm": 0.0017453266773372889, "learning_rate": 1.7142251094685425e-05, "loss": 0.0016, "step": 308900 }, { "epoch": 36.449976403964136, "grad_norm": 0.010744467377662659, "learning_rate": 1.7128251513672488e-05, "loss": 0.0012, "step": 308950 }, { "epoch": 36.45587541293063, "grad_norm": 0.7502198815345764, "learning_rate": 1.711425646990838e-05, "loss": 0.0019, "step": 309000 }, { "epoch": 36.45587541293063, "eval_cer": 0.08347676419965576, "eval_loss": 4.665248707169667e-05, "eval_runtime": 2.0756, "eval_samples_per_second": 48.178, "eval_steps_per_second": 1.927, "eval_wer": 0.26, "step": 309000 }, { "epoch": 36.46177442189712, "grad_norm": 0.0065879677422344685, "learning_rate": 1.7100265965324802e-05, "loss": 0.0009, "step": 309050 }, { "epoch": 36.46767343086361, "grad_norm": 0.05816074460744858, "learning_rate": 1.7086280001852883e-05, "loss": 0.0012, "step": 309100 }, { "epoch": 36.47357243983011, "grad_norm": 0.004066971596330404, "learning_rate": 1.707229858142307e-05, "loss": 0.0016, "step": 309150 }, { "epoch": 36.4794714487966, "grad_norm": 0.13791313767433167, "learning_rate": 1.7058321705965203e-05, "loss": 0.0009, "step": 309200 }, { "epoch": 36.485370457763096, "grad_norm": 0.004525486379861832, "learning_rate": 1.7044349377408475e-05, "loss": 0.0018, "step": 309250 }, { "epoch": 36.49126946672959, "grad_norm": 0.017871372401714325, "learning_rate": 1.7030381597681505e-05, "loss": 0.0012, "step": 309300 }, { "epoch": 36.497168475696085, "grad_norm": 0.11795446276664734, "learning_rate": 1.7016418368712227e-05, "loss": 0.0014, "step": 309350 }, { "epoch": 36.50306748466258, "grad_norm": 0.06696313619613647, "learning_rate": 1.700245969242798e-05, "loss": 0.0012, "step": 309400 }, { "epoch": 36.50896649362907, "grad_norm": 0.00878632627427578, "learning_rate": 1.6988505570755457e-05, "loss": 0.0016, "step": 309450 }, { "epoch": 36.51486550259556, "grad_norm": 0.15421953797340393, "learning_rate": 1.6974556005620736e-05, "loss": 0.0011, "step": 309500 }, { "epoch": 36.52076451156206, "grad_norm": 0.14295879006385803, "learning_rate": 1.6960610998949244e-05, "loss": 0.0007, "step": 309550 }, { "epoch": 36.52666352052855, "grad_norm": 0.11398563534021378, "learning_rate": 1.6946670552665806e-05, "loss": 0.0007, "step": 309600 }, { "epoch": 36.532562529495046, "grad_norm": 0.0198355820029974, "learning_rate": 1.6932734668694606e-05, "loss": 0.0012, "step": 309650 }, { "epoch": 36.53846153846154, "grad_norm": 0.0594574399292469, "learning_rate": 1.691880334895919e-05, "loss": 0.0016, "step": 309700 }, { "epoch": 36.544360547428035, "grad_norm": 0.020384851843118668, "learning_rate": 1.690487659538249e-05, "loss": 0.001, "step": 309750 }, { "epoch": 36.55025955639453, "grad_norm": 0.12978649139404297, "learning_rate": 1.6890954409886794e-05, "loss": 0.0012, "step": 309800 }, { "epoch": 36.55615856536102, "grad_norm": 0.08515201508998871, "learning_rate": 1.6877036794393746e-05, "loss": 0.0006, "step": 309850 }, { "epoch": 36.56205757432751, "grad_norm": 0.002907276852056384, "learning_rate": 1.6863123750824422e-05, "loss": 0.0017, "step": 309900 }, { "epoch": 36.567956583294006, "grad_norm": 0.008856755681335926, "learning_rate": 1.6849215281099195e-05, "loss": 0.0009, "step": 309950 }, { "epoch": 36.5738555922605, "grad_norm": 0.033404357731342316, "learning_rate": 1.6835311387137836e-05, "loss": 0.0015, "step": 310000 }, { "epoch": 36.5738555922605, "eval_cer": 0.08347676419965576, "eval_loss": 3.9737911720294505e-05, "eval_runtime": 2.0537, "eval_samples_per_second": 48.692, "eval_steps_per_second": 1.948, "eval_wer": 0.26, "step": 310000 }, { "epoch": 36.579754601226995, "grad_norm": 0.10522979497909546, "learning_rate": 1.6821412070859494e-05, "loss": 0.0012, "step": 310050 }, { "epoch": 36.58565361019349, "grad_norm": 0.012189053930342197, "learning_rate": 1.680751733418266e-05, "loss": 0.001, "step": 310100 }, { "epoch": 36.591552619159984, "grad_norm": 0.00742168165743351, "learning_rate": 1.6793627179025223e-05, "loss": 0.001, "step": 310150 }, { "epoch": 36.59745162812648, "grad_norm": 0.03082684986293316, "learning_rate": 1.6779741607304412e-05, "loss": 0.0009, "step": 310200 }, { "epoch": 36.603350637092966, "grad_norm": 0.0943235531449318, "learning_rate": 1.676586062093684e-05, "loss": 0.0009, "step": 310250 }, { "epoch": 36.60924964605946, "grad_norm": 0.0014744031941518188, "learning_rate": 1.675198422183849e-05, "loss": 0.0008, "step": 310300 }, { "epoch": 36.615148655025955, "grad_norm": 0.07383538037538528, "learning_rate": 1.67381124119247e-05, "loss": 0.0006, "step": 310350 }, { "epoch": 36.62104766399245, "grad_norm": 0.057611629366874695, "learning_rate": 1.6724245193110178e-05, "loss": 0.0015, "step": 310400 }, { "epoch": 36.626946672958944, "grad_norm": 0.029144829139113426, "learning_rate": 1.6710382567309003e-05, "loss": 0.0014, "step": 310450 }, { "epoch": 36.63284568192544, "grad_norm": 0.048211511224508286, "learning_rate": 1.66965245364346e-05, "loss": 0.0013, "step": 310500 }, { "epoch": 36.63874469089193, "grad_norm": 0.008016347885131836, "learning_rate": 1.6682671102399805e-05, "loss": 0.0006, "step": 310550 }, { "epoch": 36.64464369985842, "grad_norm": 0.0009655950125306845, "learning_rate": 1.6668822267116785e-05, "loss": 0.0008, "step": 310600 }, { "epoch": 36.650542708824915, "grad_norm": 0.0331975482404232, "learning_rate": 1.6654978032497065e-05, "loss": 0.0009, "step": 310650 }, { "epoch": 36.65644171779141, "grad_norm": 0.2041444182395935, "learning_rate": 1.6641138400451546e-05, "loss": 0.0013, "step": 310700 }, { "epoch": 36.662340726757904, "grad_norm": 0.0015020312275737524, "learning_rate": 1.662730337289051e-05, "loss": 0.001, "step": 310750 }, { "epoch": 36.6682397357244, "grad_norm": 0.022127946838736534, "learning_rate": 1.6613472951723597e-05, "loss": 0.0009, "step": 310800 }, { "epoch": 36.67413874469089, "grad_norm": 0.2628662586212158, "learning_rate": 1.6599647138859788e-05, "loss": 0.001, "step": 310850 }, { "epoch": 36.68003775365739, "grad_norm": 0.013288683257997036, "learning_rate": 1.658582593620745e-05, "loss": 0.0008, "step": 310900 }, { "epoch": 36.68593676262388, "grad_norm": 0.026475977152585983, "learning_rate": 1.65720093456743e-05, "loss": 0.0016, "step": 310950 }, { "epoch": 36.69183577159037, "grad_norm": 0.0023927248548716307, "learning_rate": 1.6558197369167433e-05, "loss": 0.001, "step": 311000 }, { "epoch": 36.69183577159037, "eval_cer": 0.08347676419965576, "eval_loss": 3.777446909225546e-05, "eval_runtime": 2.0644, "eval_samples_per_second": 48.441, "eval_steps_per_second": 1.938, "eval_wer": 0.26, "step": 311000 }, { "epoch": 36.697734780556864, "grad_norm": 0.0023227371275424957, "learning_rate": 1.65443900085933e-05, "loss": 0.0011, "step": 311050 }, { "epoch": 36.70363378952336, "grad_norm": 0.00046466480125673115, "learning_rate": 1.653058726585771e-05, "loss": 0.0013, "step": 311100 }, { "epoch": 36.70953279848985, "grad_norm": 0.00912527833133936, "learning_rate": 1.6516789142865846e-05, "loss": 0.0006, "step": 311150 }, { "epoch": 36.71543180745635, "grad_norm": 0.08612337708473206, "learning_rate": 1.6502995641522216e-05, "loss": 0.001, "step": 311200 }, { "epoch": 36.72133081642284, "grad_norm": 0.02981458604335785, "learning_rate": 1.6489206763730775e-05, "loss": 0.0006, "step": 311250 }, { "epoch": 36.72722982538934, "grad_norm": 0.03583524003624916, "learning_rate": 1.6475422511394754e-05, "loss": 0.0013, "step": 311300 }, { "epoch": 36.73312883435583, "grad_norm": 0.026952078565955162, "learning_rate": 1.6461642886416777e-05, "loss": 0.0014, "step": 311350 }, { "epoch": 36.73902784332232, "grad_norm": 0.002633970230817795, "learning_rate": 1.6447867890698843e-05, "loss": 0.0014, "step": 311400 }, { "epoch": 36.74492685228881, "grad_norm": 0.0010166224092245102, "learning_rate": 1.6434097526142278e-05, "loss": 0.001, "step": 311450 }, { "epoch": 36.75082586125531, "grad_norm": 0.07585403323173523, "learning_rate": 1.642033179464781e-05, "loss": 0.0018, "step": 311500 }, { "epoch": 36.7567248702218, "grad_norm": 0.002694150432944298, "learning_rate": 1.6406570698115493e-05, "loss": 0.0014, "step": 311550 }, { "epoch": 36.7626238791883, "grad_norm": 0.08933814615011215, "learning_rate": 1.6392814238444753e-05, "loss": 0.0007, "step": 311600 }, { "epoch": 36.76852288815479, "grad_norm": 0.005280863493680954, "learning_rate": 1.637906241753439e-05, "loss": 0.0012, "step": 311650 }, { "epoch": 36.774421897121286, "grad_norm": 0.015650618821382523, "learning_rate": 1.6365315237282546e-05, "loss": 0.001, "step": 311700 }, { "epoch": 36.78032090608778, "grad_norm": 0.0027553823310881853, "learning_rate": 1.6351572699586725e-05, "loss": 0.0011, "step": 311750 }, { "epoch": 36.78621991505427, "grad_norm": 1.2505043745040894, "learning_rate": 1.6337834806343783e-05, "loss": 0.0022, "step": 311800 }, { "epoch": 36.79211892402076, "grad_norm": 0.02391055040061474, "learning_rate": 1.632410155944998e-05, "loss": 0.0009, "step": 311850 }, { "epoch": 36.79801793298726, "grad_norm": 0.10869141668081284, "learning_rate": 1.631037296080087e-05, "loss": 0.0007, "step": 311900 }, { "epoch": 36.80391694195375, "grad_norm": 0.25131550431251526, "learning_rate": 1.6296649012291403e-05, "loss": 0.0017, "step": 311950 }, { "epoch": 36.809815950920246, "grad_norm": 0.0286867655813694, "learning_rate": 1.628292971581588e-05, "loss": 0.0012, "step": 312000 }, { "epoch": 36.809815950920246, "eval_cer": 0.08347676419965576, "eval_loss": 6.212421430973336e-05, "eval_runtime": 2.0616, "eval_samples_per_second": 48.505, "eval_steps_per_second": 1.94, "eval_wer": 0.26, "step": 312000 }, { "epoch": 36.81571495988674, "grad_norm": 0.09454330056905746, "learning_rate": 1.626921507326796e-05, "loss": 0.0008, "step": 312050 }, { "epoch": 36.821613968853235, "grad_norm": 0.04030952602624893, "learning_rate": 1.6255505086540662e-05, "loss": 0.0011, "step": 312100 }, { "epoch": 36.82751297781973, "grad_norm": 0.3629710078239441, "learning_rate": 1.624179975752636e-05, "loss": 0.0016, "step": 312150 }, { "epoch": 36.83341198678622, "grad_norm": 0.009586532600224018, "learning_rate": 1.6228099088116772e-05, "loss": 0.0012, "step": 312200 }, { "epoch": 36.83931099575271, "grad_norm": 0.028559111058712006, "learning_rate": 1.6214403080202995e-05, "loss": 0.001, "step": 312250 }, { "epoch": 36.845210004719206, "grad_norm": 0.0836101546883583, "learning_rate": 1.6200711735675476e-05, "loss": 0.001, "step": 312300 }, { "epoch": 36.8511090136857, "grad_norm": 0.05867423862218857, "learning_rate": 1.6187025056423993e-05, "loss": 0.0012, "step": 312350 }, { "epoch": 36.857008022652195, "grad_norm": 0.0029676773119717836, "learning_rate": 1.6173343044337734e-05, "loss": 0.0009, "step": 312400 }, { "epoch": 36.86290703161869, "grad_norm": 0.002714760834351182, "learning_rate": 1.61596657013052e-05, "loss": 0.0007, "step": 312450 }, { "epoch": 36.868806040585184, "grad_norm": 0.14186419546604156, "learning_rate": 1.6145993029214253e-05, "loss": 0.0017, "step": 312500 }, { "epoch": 36.87470504955168, "grad_norm": 0.003019202034920454, "learning_rate": 1.61323250299521e-05, "loss": 0.0007, "step": 312550 }, { "epoch": 36.880604058518166, "grad_norm": 0.1551789790391922, "learning_rate": 1.6118661705405358e-05, "loss": 0.0007, "step": 312600 }, { "epoch": 36.88650306748466, "grad_norm": 0.007312541827559471, "learning_rate": 1.6105003057459938e-05, "loss": 0.0012, "step": 312650 }, { "epoch": 36.892402076451155, "grad_norm": 0.07456668466329575, "learning_rate": 1.609134908800113e-05, "loss": 0.0006, "step": 312700 }, { "epoch": 36.89830108541765, "grad_norm": 0.012900427915155888, "learning_rate": 1.607769979891357e-05, "loss": 0.0011, "step": 312750 }, { "epoch": 36.904200094384144, "grad_norm": 0.003902676748111844, "learning_rate": 1.6064055192081256e-05, "loss": 0.0018, "step": 312800 }, { "epoch": 36.91009910335064, "grad_norm": 0.023101812228560448, "learning_rate": 1.6050415269387542e-05, "loss": 0.0011, "step": 312850 }, { "epoch": 36.91599811231713, "grad_norm": 0.018759924918413162, "learning_rate": 1.603678003271512e-05, "loss": 0.0012, "step": 312900 }, { "epoch": 36.92189712128362, "grad_norm": 0.008264539763331413, "learning_rate": 1.602314948394606e-05, "loss": 0.0013, "step": 312950 }, { "epoch": 36.927796130250115, "grad_norm": 0.016967717558145523, "learning_rate": 1.600952362496176e-05, "loss": 0.001, "step": 313000 }, { "epoch": 36.927796130250115, "eval_cer": 0.08347676419965576, "eval_loss": 7.238446414703503e-05, "eval_runtime": 2.0241, "eval_samples_per_second": 49.405, "eval_steps_per_second": 1.976, "eval_wer": 0.26, "step": 313000 }, { "epoch": 36.93369513921661, "grad_norm": 0.13199904561042786, "learning_rate": 1.5995902457642986e-05, "loss": 0.0013, "step": 313050 }, { "epoch": 36.939594148183104, "grad_norm": 0.02036667801439762, "learning_rate": 1.5982285983869843e-05, "loss": 0.001, "step": 313100 }, { "epoch": 36.9454931571496, "grad_norm": 0.0020179643761366606, "learning_rate": 1.5968674205521795e-05, "loss": 0.0009, "step": 313150 }, { "epoch": 36.951392166116094, "grad_norm": 0.046281781047582626, "learning_rate": 1.595506712447768e-05, "loss": 0.0009, "step": 313200 }, { "epoch": 36.95729117508259, "grad_norm": 0.014560363255441189, "learning_rate": 1.594146474261566e-05, "loss": 0.001, "step": 313250 }, { "epoch": 36.96319018404908, "grad_norm": 0.019435463473200798, "learning_rate": 1.5927867061813246e-05, "loss": 0.0012, "step": 313300 }, { "epoch": 36.96908919301557, "grad_norm": 0.0005699559114873409, "learning_rate": 1.591427408394732e-05, "loss": 0.0011, "step": 313350 }, { "epoch": 36.974988201982065, "grad_norm": 0.13591720163822174, "learning_rate": 1.59006858108941e-05, "loss": 0.0009, "step": 313400 }, { "epoch": 36.98088721094856, "grad_norm": 0.012264893390238285, "learning_rate": 1.5887102244529157e-05, "loss": 0.0009, "step": 313450 }, { "epoch": 36.986786219915054, "grad_norm": 0.015979228541254997, "learning_rate": 1.587352338672742e-05, "loss": 0.001, "step": 313500 }, { "epoch": 36.99268522888155, "grad_norm": 0.00788032915443182, "learning_rate": 1.585994923936316e-05, "loss": 0.001, "step": 313550 }, { "epoch": 36.99858423784804, "grad_norm": 0.0144459567964077, "learning_rate": 1.5846379804310002e-05, "loss": 0.0014, "step": 313600 }, { "epoch": 37.00448324681454, "grad_norm": 0.00545898312702775, "learning_rate": 1.5832815083440917e-05, "loss": 0.0011, "step": 313650 }, { "epoch": 37.01038225578103, "grad_norm": 0.12681275606155396, "learning_rate": 1.5819255078628225e-05, "loss": 0.0009, "step": 313700 }, { "epoch": 37.01628126474752, "grad_norm": 0.0010359695879742503, "learning_rate": 1.5805699791743604e-05, "loss": 0.0008, "step": 313750 }, { "epoch": 37.022180273714014, "grad_norm": 0.010298876091837883, "learning_rate": 1.5792149224658055e-05, "loss": 0.0005, "step": 313800 }, { "epoch": 37.02807928268051, "grad_norm": 0.19889992475509644, "learning_rate": 1.577860337924198e-05, "loss": 0.0011, "step": 313850 }, { "epoch": 37.033978291647, "grad_norm": 0.012626496143639088, "learning_rate": 1.5765062257365078e-05, "loss": 0.0014, "step": 313900 }, { "epoch": 37.0398773006135, "grad_norm": 0.18246617913246155, "learning_rate": 1.5751525860896415e-05, "loss": 0.0008, "step": 313950 }, { "epoch": 37.04577630957999, "grad_norm": 0.0058857593685388565, "learning_rate": 1.5737994191704387e-05, "loss": 0.0014, "step": 314000 }, { "epoch": 37.04577630957999, "eval_cer": 0.08347676419965576, "eval_loss": 4.8094807425513864e-05, "eval_runtime": 2.037, "eval_samples_per_second": 49.091, "eval_steps_per_second": 1.964, "eval_wer": 0.26, "step": 314000 }, { "epoch": 37.051675318546486, "grad_norm": 0.023511987179517746, "learning_rate": 1.5724467251656793e-05, "loss": 0.0007, "step": 314050 }, { "epoch": 37.05757432751298, "grad_norm": 0.0004290266369935125, "learning_rate": 1.5710945042620718e-05, "loss": 0.001, "step": 314100 }, { "epoch": 37.06347333647947, "grad_norm": 0.061525143682956696, "learning_rate": 1.5697427566462615e-05, "loss": 0.0008, "step": 314150 }, { "epoch": 37.06937234544596, "grad_norm": 0.002085821470245719, "learning_rate": 1.568391482504829e-05, "loss": 0.0011, "step": 314200 }, { "epoch": 37.07527135441246, "grad_norm": 0.015379262156784534, "learning_rate": 1.567040682024289e-05, "loss": 0.0005, "step": 314250 }, { "epoch": 37.08117036337895, "grad_norm": 0.009662051685154438, "learning_rate": 1.5656903553910908e-05, "loss": 0.0011, "step": 314300 }, { "epoch": 37.08706937234545, "grad_norm": 0.24230681359767914, "learning_rate": 1.564340502791618e-05, "loss": 0.0013, "step": 314350 }, { "epoch": 37.09296838131194, "grad_norm": 0.003254856215789914, "learning_rate": 1.5629911244121903e-05, "loss": 0.0015, "step": 314400 }, { "epoch": 37.098867390278436, "grad_norm": 0.035120654851198196, "learning_rate": 1.5616422204390597e-05, "loss": 0.0015, "step": 314450 }, { "epoch": 37.10476639924493, "grad_norm": 0.0518161840736866, "learning_rate": 1.5602937910584126e-05, "loss": 0.0006, "step": 314500 }, { "epoch": 37.11066540821142, "grad_norm": 0.13825878500938416, "learning_rate": 1.558945836456375e-05, "loss": 0.0018, "step": 314550 }, { "epoch": 37.11656441717791, "grad_norm": 0.19437846541404724, "learning_rate": 1.557598356819e-05, "loss": 0.0012, "step": 314600 }, { "epoch": 37.12246342614441, "grad_norm": 0.010995641350746155, "learning_rate": 1.5562513523322807e-05, "loss": 0.0012, "step": 314650 }, { "epoch": 37.1283624351109, "grad_norm": 0.009817679412662983, "learning_rate": 1.5549048231821416e-05, "loss": 0.0006, "step": 314700 }, { "epoch": 37.134261444077396, "grad_norm": 0.0017621010774746537, "learning_rate": 1.553558769554443e-05, "loss": 0.0008, "step": 314750 }, { "epoch": 37.14016045304389, "grad_norm": 0.27183809876441956, "learning_rate": 1.5522131916349787e-05, "loss": 0.0011, "step": 314800 }, { "epoch": 37.146059462010385, "grad_norm": 0.013255060650408268, "learning_rate": 1.5508680896094767e-05, "loss": 0.0008, "step": 314850 }, { "epoch": 37.15195847097688, "grad_norm": 0.00089036620920524, "learning_rate": 1.5495234636636008e-05, "loss": 0.0005, "step": 314900 }, { "epoch": 37.15785747994337, "grad_norm": 0.004369974602013826, "learning_rate": 1.5481793139829482e-05, "loss": 0.0005, "step": 314950 }, { "epoch": 37.16375648890986, "grad_norm": 0.0022960992064327, "learning_rate": 1.5468356407530494e-05, "loss": 0.001, "step": 315000 }, { "epoch": 37.16375648890986, "eval_cer": 0.08347676419965576, "eval_loss": 7.866972737247124e-05, "eval_runtime": 2.0497, "eval_samples_per_second": 48.788, "eval_steps_per_second": 1.952, "eval_wer": 0.26, "step": 315000 }, { "epoch": 37.169655497876356, "grad_norm": 0.008626163937151432, "learning_rate": 1.5454924441593704e-05, "loss": 0.0012, "step": 315050 }, { "epoch": 37.17555450684285, "grad_norm": 0.0006270642043091357, "learning_rate": 1.5441497243873115e-05, "loss": 0.0011, "step": 315100 }, { "epoch": 37.181453515809345, "grad_norm": 0.02496343106031418, "learning_rate": 1.542807481622205e-05, "loss": 0.0007, "step": 315150 }, { "epoch": 37.18735252477584, "grad_norm": 0.11617141962051392, "learning_rate": 1.5414657160493217e-05, "loss": 0.0011, "step": 315200 }, { "epoch": 37.193251533742334, "grad_norm": 0.002072802511975169, "learning_rate": 1.540124427853863e-05, "loss": 0.0008, "step": 315250 }, { "epoch": 37.19915054270882, "grad_norm": 0.0026364740915596485, "learning_rate": 1.5387836172209652e-05, "loss": 0.001, "step": 315300 }, { "epoch": 37.205049551675316, "grad_norm": 0.057467829436063766, "learning_rate": 1.5374432843356984e-05, "loss": 0.001, "step": 315350 }, { "epoch": 37.21094856064181, "grad_norm": 0.015518253669142723, "learning_rate": 1.536103429383067e-05, "loss": 0.0011, "step": 315400 }, { "epoch": 37.216847569608305, "grad_norm": 0.0025342386215925217, "learning_rate": 1.5347640525480112e-05, "loss": 0.0005, "step": 315450 }, { "epoch": 37.2227465785748, "grad_norm": 0.06137629225850105, "learning_rate": 1.5334251540154014e-05, "loss": 0.0007, "step": 315500 }, { "epoch": 37.228645587541294, "grad_norm": 0.0030992652755230665, "learning_rate": 1.5320867339700464e-05, "loss": 0.001, "step": 315550 }, { "epoch": 37.23454459650779, "grad_norm": 0.08688360452651978, "learning_rate": 1.5307487925966846e-05, "loss": 0.001, "step": 315600 }, { "epoch": 37.24044360547428, "grad_norm": 0.039346057921648026, "learning_rate": 1.52941133007999e-05, "loss": 0.001, "step": 315650 }, { "epoch": 37.24634261444077, "grad_norm": 0.04049723595380783, "learning_rate": 1.5280743466045742e-05, "loss": 0.0012, "step": 315700 }, { "epoch": 37.252241623407265, "grad_norm": 0.12778723239898682, "learning_rate": 1.5267378423549784e-05, "loss": 0.0007, "step": 315750 }, { "epoch": 37.25814063237376, "grad_norm": 0.0038957404904067516, "learning_rate": 1.5254018175156775e-05, "loss": 0.0006, "step": 315800 }, { "epoch": 37.264039641340254, "grad_norm": 0.012158235535025597, "learning_rate": 1.5240662722710802e-05, "loss": 0.001, "step": 315850 }, { "epoch": 37.26993865030675, "grad_norm": 0.21612855792045593, "learning_rate": 1.5227312068055338e-05, "loss": 0.0011, "step": 315900 }, { "epoch": 37.27583765927324, "grad_norm": 0.007213029079139233, "learning_rate": 1.5213966213033142e-05, "loss": 0.001, "step": 315950 }, { "epoch": 37.28173666823974, "grad_norm": 0.08233185857534409, "learning_rate": 1.520062515948632e-05, "loss": 0.0011, "step": 316000 }, { "epoch": 37.28173666823974, "eval_cer": 0.08347676419965576, "eval_loss": 8.450008317595348e-05, "eval_runtime": 2.0592, "eval_samples_per_second": 48.561, "eval_steps_per_second": 1.942, "eval_wer": 0.26, "step": 316000 }, { "epoch": 37.28763567720623, "grad_norm": 0.06506513059139252, "learning_rate": 1.5187288909256331e-05, "loss": 0.0005, "step": 316050 }, { "epoch": 37.29353468617272, "grad_norm": 0.0010322211310267448, "learning_rate": 1.5173957464183958e-05, "loss": 0.0008, "step": 316100 }, { "epoch": 37.299433695139214, "grad_norm": 0.05965966358780861, "learning_rate": 1.5160630826109323e-05, "loss": 0.001, "step": 316150 }, { "epoch": 37.30533270410571, "grad_norm": 0.010953654535114765, "learning_rate": 1.514730899687189e-05, "loss": 0.0008, "step": 316200 }, { "epoch": 37.3112317130722, "grad_norm": 0.22263668477535248, "learning_rate": 1.5133991978310452e-05, "loss": 0.0013, "step": 316250 }, { "epoch": 37.3171307220387, "grad_norm": 0.008631766773760319, "learning_rate": 1.5120679772263147e-05, "loss": 0.0007, "step": 316300 }, { "epoch": 37.32302973100519, "grad_norm": 0.01218955498188734, "learning_rate": 1.5107372380567431e-05, "loss": 0.0009, "step": 316350 }, { "epoch": 37.32892873997169, "grad_norm": 0.08458658307790756, "learning_rate": 1.5094069805060124e-05, "loss": 0.0011, "step": 316400 }, { "epoch": 37.33482774893818, "grad_norm": 0.19045421481132507, "learning_rate": 1.5080772047577336e-05, "loss": 0.0008, "step": 316450 }, { "epoch": 37.34072675790467, "grad_norm": 0.06680509448051453, "learning_rate": 1.5067479109954574e-05, "loss": 0.0008, "step": 316500 }, { "epoch": 37.34662576687116, "grad_norm": 0.042554277926683426, "learning_rate": 1.505419099402664e-05, "loss": 0.0009, "step": 316550 }, { "epoch": 37.35252477583766, "grad_norm": 0.015449280850589275, "learning_rate": 1.5040907701627666e-05, "loss": 0.0011, "step": 316600 }, { "epoch": 37.35842378480415, "grad_norm": 0.01596813276410103, "learning_rate": 1.5027629234591134e-05, "loss": 0.001, "step": 316650 }, { "epoch": 37.36432279377065, "grad_norm": 0.00017659047443885356, "learning_rate": 1.5014355594749857e-05, "loss": 0.001, "step": 316700 }, { "epoch": 37.37022180273714, "grad_norm": 0.015041716396808624, "learning_rate": 1.500108678393598e-05, "loss": 0.0008, "step": 316750 }, { "epoch": 37.376120811703636, "grad_norm": 0.010508161038160324, "learning_rate": 1.4987822803980977e-05, "loss": 0.0011, "step": 316800 }, { "epoch": 37.38201982067013, "grad_norm": 0.022587258368730545, "learning_rate": 1.4974563656715668e-05, "loss": 0.0004, "step": 316850 }, { "epoch": 37.38791882963662, "grad_norm": 0.009915085509419441, "learning_rate": 1.4961309343970186e-05, "loss": 0.0013, "step": 316900 }, { "epoch": 37.39381783860311, "grad_norm": 0.004092253744602203, "learning_rate": 1.4948059867574016e-05, "loss": 0.0011, "step": 316950 }, { "epoch": 37.39971684756961, "grad_norm": 0.04098138585686684, "learning_rate": 1.4934815229355964e-05, "loss": 0.0013, "step": 317000 }, { "epoch": 37.39971684756961, "eval_cer": 0.08347676419965576, "eval_loss": 4.16533621319104e-05, "eval_runtime": 2.0541, "eval_samples_per_second": 48.684, "eval_steps_per_second": 1.947, "eval_wer": 0.26, "step": 317000 }, { "epoch": 37.4056158565361, "grad_norm": 0.01939857006072998, "learning_rate": 1.492157543114417e-05, "loss": 0.0009, "step": 317050 }, { "epoch": 37.411514865502596, "grad_norm": 0.008861702866852283, "learning_rate": 1.4908340474766102e-05, "loss": 0.0005, "step": 317100 }, { "epoch": 37.41741387446909, "grad_norm": 0.014713901095092297, "learning_rate": 1.4895110362048587e-05, "loss": 0.0012, "step": 317150 }, { "epoch": 37.423312883435585, "grad_norm": 0.0013320280704647303, "learning_rate": 1.4881885094817749e-05, "loss": 0.0005, "step": 317200 }, { "epoch": 37.42921189240208, "grad_norm": 0.08329498767852783, "learning_rate": 1.4868664674899058e-05, "loss": 0.0015, "step": 317250 }, { "epoch": 37.43511090136857, "grad_norm": 0.00565696507692337, "learning_rate": 1.4855449104117297e-05, "loss": 0.0008, "step": 317300 }, { "epoch": 37.44100991033506, "grad_norm": 0.018330048769712448, "learning_rate": 1.4842238384296624e-05, "loss": 0.0013, "step": 317350 }, { "epoch": 37.446908919301556, "grad_norm": 0.004562980029731989, "learning_rate": 1.4829032517260489e-05, "loss": 0.0009, "step": 317400 }, { "epoch": 37.45280792826805, "grad_norm": 0.2471553236246109, "learning_rate": 1.4815831504831678e-05, "loss": 0.0006, "step": 317450 }, { "epoch": 37.458706937234545, "grad_norm": 0.0010206064907833934, "learning_rate": 1.4802635348832312e-05, "loss": 0.0012, "step": 317500 }, { "epoch": 37.46460594620104, "grad_norm": 0.007800253573805094, "learning_rate": 1.478944405108384e-05, "loss": 0.0012, "step": 317550 }, { "epoch": 37.470504955167534, "grad_norm": 0.004522912669926882, "learning_rate": 1.477625761340704e-05, "loss": 0.0013, "step": 317600 }, { "epoch": 37.47640396413403, "grad_norm": 0.04268435388803482, "learning_rate": 1.4763076037622026e-05, "loss": 0.0007, "step": 317650 }, { "epoch": 37.482302973100516, "grad_norm": 0.02576586790382862, "learning_rate": 1.4749899325548233e-05, "loss": 0.0011, "step": 317700 }, { "epoch": 37.48820198206701, "grad_norm": 0.0008378693601116538, "learning_rate": 1.4736727479004426e-05, "loss": 0.0014, "step": 317750 }, { "epoch": 37.494100991033505, "grad_norm": 0.17029093205928802, "learning_rate": 1.472356049980868e-05, "loss": 0.0012, "step": 317800 }, { "epoch": 37.5, "grad_norm": 0.0008484760182909667, "learning_rate": 1.4710398389778457e-05, "loss": 0.0014, "step": 317850 }, { "epoch": 37.505899008966495, "grad_norm": 0.010450244881212711, "learning_rate": 1.469724115073049e-05, "loss": 0.0007, "step": 317900 }, { "epoch": 37.51179801793299, "grad_norm": 0.037868428975343704, "learning_rate": 1.4684088784480854e-05, "loss": 0.0006, "step": 317950 }, { "epoch": 37.517697026899484, "grad_norm": 0.008311934769153595, "learning_rate": 1.4670941292844953e-05, "loss": 0.0008, "step": 318000 }, { "epoch": 37.517697026899484, "eval_cer": 0.08347676419965576, "eval_loss": 3.533971175784245e-05, "eval_runtime": 2.0736, "eval_samples_per_second": 48.225, "eval_steps_per_second": 1.929, "eval_wer": 0.26, "step": 318000 }, { "epoch": 37.52359603586597, "grad_norm": 0.03847280144691467, "learning_rate": 1.4657798677637523e-05, "loss": 0.0005, "step": 318050 }, { "epoch": 37.529495044832466, "grad_norm": 0.0010355724953114986, "learning_rate": 1.4644660940672627e-05, "loss": 0.0013, "step": 318100 }, { "epoch": 37.53539405379896, "grad_norm": 0.02118888683617115, "learning_rate": 1.4631528083763646e-05, "loss": 0.0008, "step": 318150 }, { "epoch": 37.541293062765455, "grad_norm": 0.004797615576535463, "learning_rate": 1.4618400108723296e-05, "loss": 0.0014, "step": 318200 }, { "epoch": 37.54719207173195, "grad_norm": 0.011795401573181152, "learning_rate": 1.4605277017363617e-05, "loss": 0.0006, "step": 318250 }, { "epoch": 37.553091080698444, "grad_norm": 0.008124439977109432, "learning_rate": 1.4592158811495965e-05, "loss": 0.0011, "step": 318300 }, { "epoch": 37.55899008966494, "grad_norm": 0.03401247039437294, "learning_rate": 1.4579045492931042e-05, "loss": 0.0013, "step": 318350 }, { "epoch": 37.56488909863143, "grad_norm": 0.0009035018156282604, "learning_rate": 1.4565937063478863e-05, "loss": 0.0011, "step": 318400 }, { "epoch": 37.57078810759792, "grad_norm": 0.002136414172127843, "learning_rate": 1.4552833524948745e-05, "loss": 0.0007, "step": 318450 }, { "epoch": 37.576687116564415, "grad_norm": 6.674328324152157e-05, "learning_rate": 1.4539734879149386e-05, "loss": 0.0011, "step": 318500 }, { "epoch": 37.58258612553091, "grad_norm": 0.08699451386928558, "learning_rate": 1.4526641127888774e-05, "loss": 0.0006, "step": 318550 }, { "epoch": 37.588485134497404, "grad_norm": 0.050944436341524124, "learning_rate": 1.4513552272974207e-05, "loss": 0.001, "step": 318600 }, { "epoch": 37.5943841434639, "grad_norm": 0.06147132068872452, "learning_rate": 1.450046831621234e-05, "loss": 0.0008, "step": 318650 }, { "epoch": 37.60028315243039, "grad_norm": 0.002254872117191553, "learning_rate": 1.4487389259409123e-05, "loss": 0.0012, "step": 318700 }, { "epoch": 37.60618216139689, "grad_norm": 0.11810649186372757, "learning_rate": 1.447431510436985e-05, "loss": 0.001, "step": 318750 }, { "epoch": 37.61208117036338, "grad_norm": 0.008011071011424065, "learning_rate": 1.446124585289913e-05, "loss": 0.0007, "step": 318800 }, { "epoch": 37.61798017932987, "grad_norm": 0.12861105799674988, "learning_rate": 1.4448181506800895e-05, "loss": 0.0016, "step": 318850 }, { "epoch": 37.623879188296364, "grad_norm": 0.0006058955914340913, "learning_rate": 1.4435122067878403e-05, "loss": 0.0011, "step": 318900 }, { "epoch": 37.62977819726286, "grad_norm": 0.004823251627385616, "learning_rate": 1.442206753793422e-05, "loss": 0.0006, "step": 318950 }, { "epoch": 37.63567720622935, "grad_norm": 0.012166328728199005, "learning_rate": 1.4409017918770268e-05, "loss": 0.0009, "step": 319000 }, { "epoch": 37.63567720622935, "eval_cer": 0.08347676419965576, "eval_loss": 4.079991776961833e-05, "eval_runtime": 2.1113, "eval_samples_per_second": 47.364, "eval_steps_per_second": 1.895, "eval_wer": 0.26, "step": 319000 }, { "epoch": 37.64157621519585, "grad_norm": 0.018747864291071892, "learning_rate": 1.4395973212187769e-05, "loss": 0.0011, "step": 319050 }, { "epoch": 37.64747522416234, "grad_norm": 0.018416590988636017, "learning_rate": 1.438293341998726e-05, "loss": 0.0011, "step": 319100 }, { "epoch": 37.65337423312884, "grad_norm": 0.14623750746250153, "learning_rate": 1.4369898543968596e-05, "loss": 0.0006, "step": 319150 }, { "epoch": 37.65927324209533, "grad_norm": 0.06659508496522903, "learning_rate": 1.4356868585930993e-05, "loss": 0.0006, "step": 319200 }, { "epoch": 37.66517225106182, "grad_norm": 0.13629727065563202, "learning_rate": 1.4343843547672948e-05, "loss": 0.0018, "step": 319250 }, { "epoch": 37.67107126002831, "grad_norm": 0.05431412160396576, "learning_rate": 1.4330823430992296e-05, "loss": 0.0014, "step": 319300 }, { "epoch": 37.67697026899481, "grad_norm": 0.0009612856665626168, "learning_rate": 1.4317808237686182e-05, "loss": 0.0009, "step": 319350 }, { "epoch": 37.6828692779613, "grad_norm": 0.11649346351623535, "learning_rate": 1.4304797969551077e-05, "loss": 0.0005, "step": 319400 }, { "epoch": 37.6887682869278, "grad_norm": 0.04766927286982536, "learning_rate": 1.4291792628382772e-05, "loss": 0.0013, "step": 319450 }, { "epoch": 37.69466729589429, "grad_norm": 0.006173793692141771, "learning_rate": 1.4278792215976389e-05, "loss": 0.0014, "step": 319500 }, { "epoch": 37.700566304860786, "grad_norm": 0.1856531947851181, "learning_rate": 1.4265796734126353e-05, "loss": 0.0007, "step": 319550 }, { "epoch": 37.70646531382728, "grad_norm": 0.426998108625412, "learning_rate": 1.4252806184626416e-05, "loss": 0.001, "step": 319600 }, { "epoch": 37.71236432279377, "grad_norm": 0.002925705164670944, "learning_rate": 1.4239820569269641e-05, "loss": 0.0006, "step": 319650 }, { "epoch": 37.71826333176026, "grad_norm": 0.010465067811310291, "learning_rate": 1.422683988984843e-05, "loss": 0.0014, "step": 319700 }, { "epoch": 37.72416234072676, "grad_norm": 0.012745576910674572, "learning_rate": 1.4213864148154481e-05, "loss": 0.0009, "step": 319750 }, { "epoch": 37.73006134969325, "grad_norm": 0.011050662957131863, "learning_rate": 1.4200893345978816e-05, "loss": 0.0009, "step": 319800 }, { "epoch": 37.735960358659746, "grad_norm": 0.006762670818716288, "learning_rate": 1.4187927485111795e-05, "loss": 0.0015, "step": 319850 }, { "epoch": 37.74185936762624, "grad_norm": 0.0042379568330943584, "learning_rate": 1.4174966567343079e-05, "loss": 0.0011, "step": 319900 }, { "epoch": 37.747758376592735, "grad_norm": 0.0005791859584860504, "learning_rate": 1.4162010594461639e-05, "loss": 0.001, "step": 319950 }, { "epoch": 37.75365738555923, "grad_norm": 0.0006246957345865667, "learning_rate": 1.4149059568255779e-05, "loss": 0.0008, "step": 320000 }, { "epoch": 37.75365738555923, "eval_cer": 0.08347676419965576, "eval_loss": 2.6895371775026433e-05, "eval_runtime": 2.0252, "eval_samples_per_second": 49.378, "eval_steps_per_second": 1.975, "eval_wer": 0.26, "step": 320000 }, { "epoch": 37.75955639452572, "grad_norm": 0.0005713654682040215, "learning_rate": 1.413611349051311e-05, "loss": 0.0009, "step": 320050 }, { "epoch": 37.76545540349221, "grad_norm": 0.012760796584188938, "learning_rate": 1.4123172363020565e-05, "loss": 0.0011, "step": 320100 }, { "epoch": 37.771354412458706, "grad_norm": 0.00472645228728652, "learning_rate": 1.4110236187564397e-05, "loss": 0.0005, "step": 320150 }, { "epoch": 37.7772534214252, "grad_norm": 0.0777813270688057, "learning_rate": 1.4097304965930158e-05, "loss": 0.0011, "step": 320200 }, { "epoch": 37.783152430391695, "grad_norm": 0.0011614346876740456, "learning_rate": 1.4084378699902745e-05, "loss": 0.0009, "step": 320250 }, { "epoch": 37.78905143935819, "grad_norm": 0.07712720334529877, "learning_rate": 1.4071457391266346e-05, "loss": 0.0006, "step": 320300 }, { "epoch": 37.794950448324684, "grad_norm": 0.007473218720406294, "learning_rate": 1.4058541041804479e-05, "loss": 0.001, "step": 320350 }, { "epoch": 37.80084945729118, "grad_norm": 0.011287434957921505, "learning_rate": 1.4045629653299952e-05, "loss": 0.001, "step": 320400 }, { "epoch": 37.806748466257666, "grad_norm": 0.005622709169983864, "learning_rate": 1.4032723227534944e-05, "loss": 0.0009, "step": 320450 }, { "epoch": 37.81264747522416, "grad_norm": 0.011877628043293953, "learning_rate": 1.4019821766290897e-05, "loss": 0.0007, "step": 320500 }, { "epoch": 37.818546484190655, "grad_norm": 0.08149763941764832, "learning_rate": 1.4006925271348582e-05, "loss": 0.0006, "step": 320550 }, { "epoch": 37.82444549315715, "grad_norm": 0.002467978512868285, "learning_rate": 1.3994033744488078e-05, "loss": 0.0016, "step": 320600 }, { "epoch": 37.830344502123644, "grad_norm": 0.12819834053516388, "learning_rate": 1.3981147187488807e-05, "loss": 0.0007, "step": 320650 }, { "epoch": 37.83624351109014, "grad_norm": 0.05280236899852753, "learning_rate": 1.3968265602129482e-05, "loss": 0.0007, "step": 320700 }, { "epoch": 37.84214252005663, "grad_norm": 0.1605154126882553, "learning_rate": 1.3955388990188128e-05, "loss": 0.0009, "step": 320750 }, { "epoch": 37.84804152902312, "grad_norm": 0.04813385009765625, "learning_rate": 1.3942517353442092e-05, "loss": 0.0015, "step": 320800 }, { "epoch": 37.853940537989615, "grad_norm": 0.01095653884112835, "learning_rate": 1.3929650693668023e-05, "loss": 0.0008, "step": 320850 }, { "epoch": 37.85983954695611, "grad_norm": 0.011177117936313152, "learning_rate": 1.3916789012641896e-05, "loss": 0.0012, "step": 320900 }, { "epoch": 37.865738555922604, "grad_norm": 0.4118684232234955, "learning_rate": 1.390393231213899e-05, "loss": 0.0012, "step": 320950 }, { "epoch": 37.8716375648891, "grad_norm": 0.11936071515083313, "learning_rate": 1.3891080593933909e-05, "loss": 0.0012, "step": 321000 }, { "epoch": 37.8716375648891, "eval_cer": 0.08347676419965576, "eval_loss": 3.841842044494115e-05, "eval_runtime": 2.1288, "eval_samples_per_second": 46.974, "eval_steps_per_second": 1.879, "eval_wer": 0.26, "step": 321000 }, { "epoch": 37.87753657385559, "grad_norm": 0.07936596125364304, "learning_rate": 1.3878233859800555e-05, "loss": 0.0007, "step": 321050 }, { "epoch": 37.88343558282209, "grad_norm": 0.00543028861284256, "learning_rate": 1.3865392111512127e-05, "loss": 0.0011, "step": 321100 }, { "epoch": 37.88933459178858, "grad_norm": 0.01205793023109436, "learning_rate": 1.3852555350841196e-05, "loss": 0.0013, "step": 321150 }, { "epoch": 37.89523360075507, "grad_norm": 0.10532195121049881, "learning_rate": 1.3839723579559582e-05, "loss": 0.001, "step": 321200 }, { "epoch": 37.901132609721564, "grad_norm": 0.02600259706377983, "learning_rate": 1.382689679943845e-05, "loss": 0.0009, "step": 321250 }, { "epoch": 37.90703161868806, "grad_norm": 0.007146848365664482, "learning_rate": 1.3814075012248256e-05, "loss": 0.0009, "step": 321300 }, { "epoch": 37.91293062765455, "grad_norm": 0.006159541662782431, "learning_rate": 1.3801258219758778e-05, "loss": 0.0006, "step": 321350 }, { "epoch": 37.91882963662105, "grad_norm": 0.032256241887807846, "learning_rate": 1.3788446423739104e-05, "loss": 0.0008, "step": 321400 }, { "epoch": 37.92472864558754, "grad_norm": 0.07661381363868713, "learning_rate": 1.3775639625957632e-05, "loss": 0.001, "step": 321450 }, { "epoch": 37.93062765455404, "grad_norm": 0.006345294415950775, "learning_rate": 1.3762837828182067e-05, "loss": 0.001, "step": 321500 }, { "epoch": 37.93652666352053, "grad_norm": 0.013083561323583126, "learning_rate": 1.3750041032179428e-05, "loss": 0.0008, "step": 321550 }, { "epoch": 37.94242567248702, "grad_norm": 0.213777557015419, "learning_rate": 1.3737249239716043e-05, "loss": 0.0013, "step": 321600 }, { "epoch": 37.948324681453514, "grad_norm": 0.05856970697641373, "learning_rate": 1.3724462452557545e-05, "loss": 0.0008, "step": 321650 }, { "epoch": 37.95422369042001, "grad_norm": 0.030228104442358017, "learning_rate": 1.3711680672468879e-05, "loss": 0.0008, "step": 321700 }, { "epoch": 37.9601226993865, "grad_norm": 0.23562397062778473, "learning_rate": 1.3698903901214282e-05, "loss": 0.0009, "step": 321750 }, { "epoch": 37.966021708353, "grad_norm": 0.04599441960453987, "learning_rate": 1.3686132140557356e-05, "loss": 0.001, "step": 321800 }, { "epoch": 37.97192071731949, "grad_norm": 0.05277002602815628, "learning_rate": 1.367336539226095e-05, "loss": 0.001, "step": 321850 }, { "epoch": 37.977819726285986, "grad_norm": 0.0040909117087721825, "learning_rate": 1.3660603658087245e-05, "loss": 0.001, "step": 321900 }, { "epoch": 37.98371873525248, "grad_norm": 0.08871497958898544, "learning_rate": 1.3647846939797726e-05, "loss": 0.0015, "step": 321950 }, { "epoch": 37.98961774421897, "grad_norm": 0.0003521889157127589, "learning_rate": 1.3635095239153189e-05, "loss": 0.0011, "step": 322000 }, { "epoch": 37.98961774421897, "eval_cer": 0.08347676419965576, "eval_loss": 4.0760285628493875e-05, "eval_runtime": 2.0503, "eval_samples_per_second": 48.772, "eval_steps_per_second": 1.951, "eval_wer": 0.26, "step": 322000 }, { "epoch": 37.99551675318546, "grad_norm": 0.010578217916190624, "learning_rate": 1.362234855791374e-05, "loss": 0.0007, "step": 322050 }, { "epoch": 38.00141576215196, "grad_norm": 0.005686613265424967, "learning_rate": 1.3609606897838784e-05, "loss": 0.0009, "step": 322100 }, { "epoch": 38.00731477111845, "grad_norm": 0.23928134143352509, "learning_rate": 1.3596870260687039e-05, "loss": 0.001, "step": 322150 }, { "epoch": 38.013213780084946, "grad_norm": 0.061173662543296814, "learning_rate": 1.3584138648216527e-05, "loss": 0.0009, "step": 322200 }, { "epoch": 38.01911278905144, "grad_norm": 0.16370625793933868, "learning_rate": 1.3571412062184563e-05, "loss": 0.0007, "step": 322250 }, { "epoch": 38.025011798017935, "grad_norm": 0.0005818267818540335, "learning_rate": 1.3558690504347815e-05, "loss": 0.0009, "step": 322300 }, { "epoch": 38.03091080698443, "grad_norm": 0.016688216477632523, "learning_rate": 1.3545973976462206e-05, "loss": 0.0012, "step": 322350 }, { "epoch": 38.03680981595092, "grad_norm": 0.0350341759622097, "learning_rate": 1.3533262480282983e-05, "loss": 0.0007, "step": 322400 }, { "epoch": 38.04270882491741, "grad_norm": 0.011823485605418682, "learning_rate": 1.3520556017564689e-05, "loss": 0.0007, "step": 322450 }, { "epoch": 38.048607833883906, "grad_norm": 0.06686527281999588, "learning_rate": 1.3507854590061209e-05, "loss": 0.001, "step": 322500 }, { "epoch": 38.0545068428504, "grad_norm": 0.12372317165136337, "learning_rate": 1.3495158199525688e-05, "loss": 0.0012, "step": 322550 }, { "epoch": 38.060405851816896, "grad_norm": 0.1103016659617424, "learning_rate": 1.3482466847710596e-05, "loss": 0.0008, "step": 322600 }, { "epoch": 38.06630486078339, "grad_norm": 0.1396724432706833, "learning_rate": 1.346978053636771e-05, "loss": 0.0006, "step": 322650 }, { "epoch": 38.072203869749885, "grad_norm": 0.05740782991051674, "learning_rate": 1.3457099267248103e-05, "loss": 0.0009, "step": 322700 }, { "epoch": 38.07810287871638, "grad_norm": 0.008730920031666756, "learning_rate": 1.3444423042102156e-05, "loss": 0.0005, "step": 322750 }, { "epoch": 38.08400188768287, "grad_norm": 0.0006437308620661497, "learning_rate": 1.3431751862679553e-05, "loss": 0.0008, "step": 322800 }, { "epoch": 38.08990089664936, "grad_norm": 0.03494541719555855, "learning_rate": 1.341908573072928e-05, "loss": 0.0007, "step": 322850 }, { "epoch": 38.095799905615856, "grad_norm": 0.14404699206352234, "learning_rate": 1.3406424647999632e-05, "loss": 0.0012, "step": 322900 }, { "epoch": 38.10169891458235, "grad_norm": 0.09167079627513885, "learning_rate": 1.33937686162382e-05, "loss": 0.0008, "step": 322950 }, { "epoch": 38.107597923548845, "grad_norm": 0.010063707828521729, "learning_rate": 1.3381117637191886e-05, "loss": 0.001, "step": 323000 }, { "epoch": 38.107597923548845, "eval_cer": 0.08347676419965576, "eval_loss": 5.5098800658015534e-05, "eval_runtime": 2.0808, "eval_samples_per_second": 48.058, "eval_steps_per_second": 1.922, "eval_wer": 0.26, "step": 323000 }, { "epoch": 38.11349693251534, "grad_norm": 0.0030377479270100594, "learning_rate": 1.3368471712606889e-05, "loss": 0.001, "step": 323050 }, { "epoch": 38.119395941481834, "grad_norm": 0.011408086866140366, "learning_rate": 1.335583084422869e-05, "loss": 0.0015, "step": 323100 }, { "epoch": 38.12529495044832, "grad_norm": 0.012202152982354164, "learning_rate": 1.334319503380213e-05, "loss": 0.0019, "step": 323150 }, { "epoch": 38.131193959414816, "grad_norm": 0.009740849025547504, "learning_rate": 1.3330564283071295e-05, "loss": 0.0006, "step": 323200 }, { "epoch": 38.13709296838131, "grad_norm": 0.01646096631884575, "learning_rate": 1.3317938593779594e-05, "loss": 0.0012, "step": 323250 }, { "epoch": 38.142991977347805, "grad_norm": 0.0640711635351181, "learning_rate": 1.3305317967669739e-05, "loss": 0.0013, "step": 323300 }, { "epoch": 38.1488909863143, "grad_norm": 0.001340393559075892, "learning_rate": 1.3292702406483737e-05, "loss": 0.0009, "step": 323350 }, { "epoch": 38.154789995280794, "grad_norm": 0.025299226865172386, "learning_rate": 1.32800919119629e-05, "loss": 0.0008, "step": 323400 }, { "epoch": 38.16068900424729, "grad_norm": 0.027115289121866226, "learning_rate": 1.326748648584784e-05, "loss": 0.0014, "step": 323450 }, { "epoch": 38.16658801321378, "grad_norm": 0.022416645660996437, "learning_rate": 1.3254886129878475e-05, "loss": 0.0013, "step": 323500 }, { "epoch": 38.17248702218027, "grad_norm": 0.006852483842521906, "learning_rate": 1.3242290845794004e-05, "loss": 0.0007, "step": 323550 }, { "epoch": 38.178386031146765, "grad_norm": 0.28651830554008484, "learning_rate": 1.3229700635332947e-05, "loss": 0.001, "step": 323600 }, { "epoch": 38.18428504011326, "grad_norm": 0.3473757803440094, "learning_rate": 1.3217115500233124e-05, "loss": 0.001, "step": 323650 }, { "epoch": 38.190184049079754, "grad_norm": 0.05459535866975784, "learning_rate": 1.3204535442231614e-05, "loss": 0.0009, "step": 323700 }, { "epoch": 38.19608305804625, "grad_norm": 0.0017720378236845136, "learning_rate": 1.3191960463064873e-05, "loss": 0.0008, "step": 323750 }, { "epoch": 38.20198206701274, "grad_norm": 0.006297956686466932, "learning_rate": 1.3179390564468586e-05, "loss": 0.0014, "step": 323800 }, { "epoch": 38.20788107597924, "grad_norm": 0.0032427783589810133, "learning_rate": 1.3166825748177764e-05, "loss": 0.001, "step": 323850 }, { "epoch": 38.21378008494573, "grad_norm": 0.12937131524085999, "learning_rate": 1.3154266015926704e-05, "loss": 0.0013, "step": 323900 }, { "epoch": 38.21967909391222, "grad_norm": 0.014557153917849064, "learning_rate": 1.3141711369449034e-05, "loss": 0.0009, "step": 323950 }, { "epoch": 38.225578102878714, "grad_norm": 0.011551842093467712, "learning_rate": 1.312916181047764e-05, "loss": 0.0008, "step": 324000 }, { "epoch": 38.225578102878714, "eval_cer": 0.08347676419965576, "eval_loss": 5.926963058300316e-05, "eval_runtime": 2.0419, "eval_samples_per_second": 48.975, "eval_steps_per_second": 1.959, "eval_wer": 0.26, "step": 324000 }, { "epoch": 38.23147711184521, "grad_norm": 0.0025787826161831617, "learning_rate": 1.3116617340744736e-05, "loss": 0.0007, "step": 324050 }, { "epoch": 38.2373761208117, "grad_norm": 0.05401240289211273, "learning_rate": 1.3104077961981809e-05, "loss": 0.0007, "step": 324100 }, { "epoch": 38.2432751297782, "grad_norm": 0.02120387926697731, "learning_rate": 1.3091543675919653e-05, "loss": 0.0006, "step": 324150 }, { "epoch": 38.24917413874469, "grad_norm": 0.0935678482055664, "learning_rate": 1.3079014484288372e-05, "loss": 0.0025, "step": 324200 }, { "epoch": 38.25507314771119, "grad_norm": 0.02230045199394226, "learning_rate": 1.3066490388817349e-05, "loss": 0.0008, "step": 324250 }, { "epoch": 38.26097215667768, "grad_norm": 0.015608621761202812, "learning_rate": 1.3053971391235265e-05, "loss": 0.0009, "step": 324300 }, { "epoch": 38.26687116564417, "grad_norm": 0.0017300255130976439, "learning_rate": 1.3041457493270116e-05, "loss": 0.0014, "step": 324350 }, { "epoch": 38.27277017461066, "grad_norm": 0.007222350221127272, "learning_rate": 1.3028948696649162e-05, "loss": 0.0009, "step": 324400 }, { "epoch": 38.27866918357716, "grad_norm": 0.02373759262263775, "learning_rate": 1.3016445003098981e-05, "loss": 0.0005, "step": 324450 }, { "epoch": 38.28456819254365, "grad_norm": 0.0037500401958823204, "learning_rate": 1.3003946414345463e-05, "loss": 0.0011, "step": 324500 }, { "epoch": 38.29046720151015, "grad_norm": 0.0024601214099675417, "learning_rate": 1.2991452932113762e-05, "loss": 0.001, "step": 324550 }, { "epoch": 38.29636621047664, "grad_norm": 0.025944022461771965, "learning_rate": 1.2978964558128337e-05, "loss": 0.0009, "step": 324600 }, { "epoch": 38.302265219443136, "grad_norm": 0.024724870920181274, "learning_rate": 1.2966481294112943e-05, "loss": 0.0004, "step": 324650 }, { "epoch": 38.30816422840963, "grad_norm": 0.003143553389236331, "learning_rate": 1.2954003141790627e-05, "loss": 0.0013, "step": 324700 }, { "epoch": 38.31406323737612, "grad_norm": 0.003714914433658123, "learning_rate": 1.2941530102883747e-05, "loss": 0.0006, "step": 324750 }, { "epoch": 38.31996224634261, "grad_norm": 0.0003772545314859599, "learning_rate": 1.2929062179113926e-05, "loss": 0.0009, "step": 324800 }, { "epoch": 38.32586125530911, "grad_norm": 0.008630127646028996, "learning_rate": 1.2916599372202105e-05, "loss": 0.0009, "step": 324850 }, { "epoch": 38.3317602642756, "grad_norm": 0.018126357346773148, "learning_rate": 1.2904141683868509e-05, "loss": 0.0007, "step": 324900 }, { "epoch": 38.337659273242096, "grad_norm": 0.0014829152496531606, "learning_rate": 1.289168911583266e-05, "loss": 0.0011, "step": 324950 }, { "epoch": 38.34355828220859, "grad_norm": 0.04327073320746422, "learning_rate": 1.2879241669813369e-05, "loss": 0.0007, "step": 325000 }, { "epoch": 38.34355828220859, "eval_cer": 0.08347676419965576, "eval_loss": 4.786829958902672e-05, "eval_runtime": 2.1197, "eval_samples_per_second": 47.176, "eval_steps_per_second": 1.887, "eval_wer": 0.26, "step": 325000 }, { "epoch": 38.349457291175085, "grad_norm": 0.0023889774456620216, "learning_rate": 1.286679934752873e-05, "loss": 0.0006, "step": 325050 }, { "epoch": 38.35535630014158, "grad_norm": 0.01160081010311842, "learning_rate": 1.285436215069617e-05, "loss": 0.001, "step": 325100 }, { "epoch": 38.36125530910807, "grad_norm": 0.006186833139508963, "learning_rate": 1.2841930081032361e-05, "loss": 0.0003, "step": 325150 }, { "epoch": 38.36715431807456, "grad_norm": 0.10148666054010391, "learning_rate": 1.2829503140253296e-05, "loss": 0.0009, "step": 325200 }, { "epoch": 38.373053327041056, "grad_norm": 0.0018412504578009248, "learning_rate": 1.281708133007425e-05, "loss": 0.0007, "step": 325250 }, { "epoch": 38.37895233600755, "grad_norm": 0.0011937059462070465, "learning_rate": 1.280466465220978e-05, "loss": 0.0008, "step": 325300 }, { "epoch": 38.384851344974045, "grad_norm": 0.002457516733556986, "learning_rate": 1.2792253108373758e-05, "loss": 0.0008, "step": 325350 }, { "epoch": 38.39075035394054, "grad_norm": 0.0006734258495271206, "learning_rate": 1.2779846700279329e-05, "loss": 0.001, "step": 325400 }, { "epoch": 38.396649362907034, "grad_norm": 0.09364018589258194, "learning_rate": 1.2767445429638935e-05, "loss": 0.001, "step": 325450 }, { "epoch": 38.40254837187353, "grad_norm": 0.0006044842884875834, "learning_rate": 1.2755049298164311e-05, "loss": 0.0014, "step": 325500 }, { "epoch": 38.408447380840016, "grad_norm": 0.006285591516643763, "learning_rate": 1.2742658307566463e-05, "loss": 0.0011, "step": 325550 }, { "epoch": 38.41434638980651, "grad_norm": 0.03171022608876228, "learning_rate": 1.2730272459555736e-05, "loss": 0.0009, "step": 325600 }, { "epoch": 38.420245398773005, "grad_norm": 0.05169084295630455, "learning_rate": 1.2717891755841722e-05, "loss": 0.0008, "step": 325650 }, { "epoch": 38.4261444077395, "grad_norm": 0.006498909555375576, "learning_rate": 1.270551619813331e-05, "loss": 0.0008, "step": 325700 }, { "epoch": 38.432043416705994, "grad_norm": 0.00036449861363507807, "learning_rate": 1.2693145788138672e-05, "loss": 0.0011, "step": 325750 }, { "epoch": 38.43794242567249, "grad_norm": 8.096233068499714e-05, "learning_rate": 1.268078052756531e-05, "loss": 0.0009, "step": 325800 }, { "epoch": 38.44384143463898, "grad_norm": 0.00281429011374712, "learning_rate": 1.266842041811997e-05, "loss": 0.0009, "step": 325850 }, { "epoch": 38.44974044360547, "grad_norm": 0.00526901800185442, "learning_rate": 1.265606546150871e-05, "loss": 0.0003, "step": 325900 }, { "epoch": 38.455639452571965, "grad_norm": 0.018037274479866028, "learning_rate": 1.2643715659436855e-05, "loss": 0.0007, "step": 325950 }, { "epoch": 38.46153846153846, "grad_norm": 0.04310476407408714, "learning_rate": 1.263137101360905e-05, "loss": 0.0012, "step": 326000 }, { "epoch": 38.46153846153846, "eval_cer": 0.08347676419965576, "eval_loss": 8.652903488837183e-05, "eval_runtime": 2.0347, "eval_samples_per_second": 49.147, "eval_steps_per_second": 1.966, "eval_wer": 0.26, "step": 326000 }, { "epoch": 38.467437470504954, "grad_norm": 0.01659482531249523, "learning_rate": 1.2619031525729209e-05, "loss": 0.0016, "step": 326050 }, { "epoch": 38.47333647947145, "grad_norm": 0.03226239234209061, "learning_rate": 1.2606697197500529e-05, "loss": 0.0006, "step": 326100 }, { "epoch": 38.47923548843794, "grad_norm": 0.0436723418533802, "learning_rate": 1.2594368030625508e-05, "loss": 0.001, "step": 326150 }, { "epoch": 38.48513449740444, "grad_norm": 0.06407372653484344, "learning_rate": 1.2582044026805922e-05, "loss": 0.0008, "step": 326200 }, { "epoch": 38.49103350637093, "grad_norm": 0.14186500012874603, "learning_rate": 1.2569725187742848e-05, "loss": 0.001, "step": 326250 }, { "epoch": 38.49693251533742, "grad_norm": 0.0010821642354130745, "learning_rate": 1.2557411515136636e-05, "loss": 0.0013, "step": 326300 }, { "epoch": 38.502831524303915, "grad_norm": 0.1250559389591217, "learning_rate": 1.2545103010686921e-05, "loss": 0.0007, "step": 326350 }, { "epoch": 38.50873053327041, "grad_norm": 0.14057525992393494, "learning_rate": 1.2532799676092627e-05, "loss": 0.0007, "step": 326400 }, { "epoch": 38.514629542236904, "grad_norm": 0.007796843536198139, "learning_rate": 1.2520501513051996e-05, "loss": 0.0007, "step": 326450 }, { "epoch": 38.5205285512034, "grad_norm": 0.034982871264219284, "learning_rate": 1.250820852326251e-05, "loss": 0.001, "step": 326500 }, { "epoch": 38.52642756016989, "grad_norm": 0.0006374497897922993, "learning_rate": 1.249592070842096e-05, "loss": 0.0009, "step": 326550 }, { "epoch": 38.53232656913639, "grad_norm": 0.02247411571443081, "learning_rate": 1.2483638070223413e-05, "loss": 0.001, "step": 326600 }, { "epoch": 38.53822557810288, "grad_norm": 0.002286682603880763, "learning_rate": 1.2471360610365234e-05, "loss": 0.0014, "step": 326650 }, { "epoch": 38.54412458706937, "grad_norm": 0.013060844503343105, "learning_rate": 1.2459088330541068e-05, "loss": 0.0006, "step": 326700 }, { "epoch": 38.550023596035864, "grad_norm": 0.17724235355854034, "learning_rate": 1.2446821232444833e-05, "loss": 0.0012, "step": 326750 }, { "epoch": 38.55592260500236, "grad_norm": 0.002937947865575552, "learning_rate": 1.243455931776975e-05, "loss": 0.0007, "step": 326800 }, { "epoch": 38.56182161396885, "grad_norm": 0.016244450584053993, "learning_rate": 1.2422302588208319e-05, "loss": 0.0004, "step": 326850 }, { "epoch": 38.56772062293535, "grad_norm": 0.02203923836350441, "learning_rate": 1.2410051045452309e-05, "loss": 0.0012, "step": 326900 }, { "epoch": 38.57361963190184, "grad_norm": 0.0008064369321800768, "learning_rate": 1.23978046911928e-05, "loss": 0.0005, "step": 326950 }, { "epoch": 38.579518640868336, "grad_norm": 0.062439728528261185, "learning_rate": 1.238556352712012e-05, "loss": 0.0011, "step": 327000 }, { "epoch": 38.579518640868336, "eval_cer": 0.08347676419965576, "eval_loss": 6.834517262177542e-05, "eval_runtime": 2.0324, "eval_samples_per_second": 49.202, "eval_steps_per_second": 1.968, "eval_wer": 0.26, "step": 327000 }, { "epoch": 38.58541764983483, "grad_norm": 0.029904335737228394, "learning_rate": 1.2373327554923936e-05, "loss": 0.0011, "step": 327050 }, { "epoch": 38.59131665880132, "grad_norm": 0.030110927298665047, "learning_rate": 1.2361096776293147e-05, "loss": 0.0007, "step": 327100 }, { "epoch": 38.59721566776781, "grad_norm": 0.08667214214801788, "learning_rate": 1.2348871192915957e-05, "loss": 0.0014, "step": 327150 }, { "epoch": 38.60311467673431, "grad_norm": 0.0474715381860733, "learning_rate": 1.2336650806479827e-05, "loss": 0.0004, "step": 327200 }, { "epoch": 38.6090136857008, "grad_norm": 0.0005142098525539041, "learning_rate": 1.232443561867156e-05, "loss": 0.0012, "step": 327250 }, { "epoch": 38.6149126946673, "grad_norm": 0.3534987270832062, "learning_rate": 1.2312225631177182e-05, "loss": 0.0006, "step": 327300 }, { "epoch": 38.62081170363379, "grad_norm": 0.2585655152797699, "learning_rate": 1.2300020845682025e-05, "loss": 0.0007, "step": 327350 }, { "epoch": 38.626710712600286, "grad_norm": 0.0238750372081995, "learning_rate": 1.2287821263870708e-05, "loss": 0.0008, "step": 327400 }, { "epoch": 38.63260972156678, "grad_norm": 0.03932753950357437, "learning_rate": 1.2275626887427116e-05, "loss": 0.0005, "step": 327450 }, { "epoch": 38.63850873053327, "grad_norm": 0.0035276131238788366, "learning_rate": 1.2263437718034432e-05, "loss": 0.0009, "step": 327500 }, { "epoch": 38.64440773949976, "grad_norm": 0.004212182015180588, "learning_rate": 1.2251253757375108e-05, "loss": 0.0007, "step": 327550 }, { "epoch": 38.65030674846626, "grad_norm": 0.1243007555603981, "learning_rate": 1.2239075007130885e-05, "loss": 0.0013, "step": 327600 }, { "epoch": 38.65620575743275, "grad_norm": 0.06911783665418625, "learning_rate": 1.2226901468982777e-05, "loss": 0.001, "step": 327650 }, { "epoch": 38.662104766399246, "grad_norm": 0.0914042666554451, "learning_rate": 1.2214733144611084e-05, "loss": 0.0009, "step": 327700 }, { "epoch": 38.66800377536574, "grad_norm": 0.002303987042978406, "learning_rate": 1.2202570035695376e-05, "loss": 0.0015, "step": 327750 }, { "epoch": 38.673902784332235, "grad_norm": 0.020636962726712227, "learning_rate": 1.2190412143914537e-05, "loss": 0.0005, "step": 327800 }, { "epoch": 38.67980179329873, "grad_norm": 0.5332985520362854, "learning_rate": 1.217825947094669e-05, "loss": 0.0006, "step": 327850 }, { "epoch": 38.68570080226522, "grad_norm": 0.09984830021858215, "learning_rate": 1.2166112018469262e-05, "loss": 0.0008, "step": 327900 }, { "epoch": 38.69159981123171, "grad_norm": 0.002834880258888006, "learning_rate": 1.2153969788158948e-05, "loss": 0.0012, "step": 327950 }, { "epoch": 38.697498820198206, "grad_norm": 0.01661858521401882, "learning_rate": 1.214183278169172e-05, "loss": 0.0007, "step": 328000 }, { "epoch": 38.697498820198206, "eval_cer": 0.08605851979345955, "eval_loss": 0.0006984819192439318, "eval_runtime": 2.0893, "eval_samples_per_second": 47.864, "eval_steps_per_second": 1.915, "eval_wer": 0.27, "step": 328000 }, { "epoch": 38.7033978291647, "grad_norm": 0.01749107427895069, "learning_rate": 1.2129701000742844e-05, "loss": 0.0007, "step": 328050 }, { "epoch": 38.709296838131195, "grad_norm": 0.06995738297700882, "learning_rate": 1.2117574446986846e-05, "loss": 0.0008, "step": 328100 }, { "epoch": 38.71519584709769, "grad_norm": 0.09908004105091095, "learning_rate": 1.2105453122097549e-05, "loss": 0.0016, "step": 328150 }, { "epoch": 38.721094856064184, "grad_norm": 0.009725704789161682, "learning_rate": 1.2093337027748042e-05, "loss": 0.0009, "step": 328200 }, { "epoch": 38.72699386503068, "grad_norm": 0.007101904135197401, "learning_rate": 1.2081226165610693e-05, "loss": 0.0013, "step": 328250 }, { "epoch": 38.732892873997166, "grad_norm": 0.05806155875325203, "learning_rate": 1.2069120537357154e-05, "loss": 0.0008, "step": 328300 }, { "epoch": 38.73879188296366, "grad_norm": 0.00333024631254375, "learning_rate": 1.2057020144658333e-05, "loss": 0.0011, "step": 328350 }, { "epoch": 38.744690891930155, "grad_norm": 0.0040777577087283134, "learning_rate": 1.2044924989184459e-05, "loss": 0.001, "step": 328400 }, { "epoch": 38.75058990089665, "grad_norm": 0.0014160723658278584, "learning_rate": 1.2032835072605003e-05, "loss": 0.0011, "step": 328450 }, { "epoch": 38.756488909863144, "grad_norm": 0.011910753324627876, "learning_rate": 1.2020750396588725e-05, "loss": 0.0009, "step": 328500 }, { "epoch": 38.76238791882964, "grad_norm": 0.03206611052155495, "learning_rate": 1.2008670962803647e-05, "loss": 0.0011, "step": 328550 }, { "epoch": 38.76828692779613, "grad_norm": 0.0018503785831853747, "learning_rate": 1.199659677291709e-05, "loss": 0.0012, "step": 328600 }, { "epoch": 38.77418593676262, "grad_norm": 0.008740936405956745, "learning_rate": 1.1984527828595638e-05, "loss": 0.0005, "step": 328650 }, { "epoch": 38.780084945729115, "grad_norm": 0.003662056988105178, "learning_rate": 1.197246413150515e-05, "loss": 0.0007, "step": 328700 }, { "epoch": 38.78598395469561, "grad_norm": 0.020939907059073448, "learning_rate": 1.1960405683310766e-05, "loss": 0.0006, "step": 328750 }, { "epoch": 38.791882963662104, "grad_norm": 0.34551718831062317, "learning_rate": 1.1948352485676896e-05, "loss": 0.001, "step": 328800 }, { "epoch": 38.7977819726286, "grad_norm": 0.030527925118803978, "learning_rate": 1.1936304540267218e-05, "loss": 0.0007, "step": 328850 }, { "epoch": 38.80368098159509, "grad_norm": 0.17186327278614044, "learning_rate": 1.1924261848744728e-05, "loss": 0.0011, "step": 328900 }, { "epoch": 38.80957999056159, "grad_norm": 0.00084796262672171, "learning_rate": 1.1912224412771639e-05, "loss": 0.0007, "step": 328950 }, { "epoch": 38.81547899952808, "grad_norm": 0.010351781733334064, "learning_rate": 1.1900192234009477e-05, "loss": 0.0008, "step": 329000 }, { "epoch": 38.81547899952808, "eval_cer": 0.08605851979345955, "eval_loss": 0.0007492555305361748, "eval_runtime": 2.0859, "eval_samples_per_second": 47.942, "eval_steps_per_second": 1.918, "eval_wer": 0.27, "step": 329000 }, { "epoch": 38.82137800849457, "grad_norm": 0.006925256922841072, "learning_rate": 1.1888165314119021e-05, "loss": 0.0006, "step": 329050 }, { "epoch": 38.827277017461064, "grad_norm": 0.015560091473162174, "learning_rate": 1.187614365476032e-05, "loss": 0.001, "step": 329100 }, { "epoch": 38.83317602642756, "grad_norm": 0.0751800611615181, "learning_rate": 1.1864127257592744e-05, "loss": 0.001, "step": 329150 }, { "epoch": 38.83907503539405, "grad_norm": 0.05763586238026619, "learning_rate": 1.1852116124274875e-05, "loss": 0.0009, "step": 329200 }, { "epoch": 38.84497404436055, "grad_norm": 0.0027661731000989676, "learning_rate": 1.184011025646461e-05, "loss": 0.0017, "step": 329250 }, { "epoch": 38.85087305332704, "grad_norm": 0.031117085367441177, "learning_rate": 1.1828109655819097e-05, "loss": 0.0008, "step": 329300 }, { "epoch": 38.85677206229354, "grad_norm": 0.0005726812523789704, "learning_rate": 1.1816114323994765e-05, "loss": 0.0004, "step": 329350 }, { "epoch": 38.86267107126003, "grad_norm": 0.0010296725668013096, "learning_rate": 1.1804124262647314e-05, "loss": 0.0011, "step": 329400 }, { "epoch": 38.86857008022652, "grad_norm": 0.0022486953530460596, "learning_rate": 1.1792139473431729e-05, "loss": 0.001, "step": 329450 }, { "epoch": 38.87446908919301, "grad_norm": 0.003121250541880727, "learning_rate": 1.178015995800224e-05, "loss": 0.0016, "step": 329500 }, { "epoch": 38.88036809815951, "grad_norm": 0.012191368266940117, "learning_rate": 1.1768185718012376e-05, "loss": 0.001, "step": 329550 }, { "epoch": 38.886267107126, "grad_norm": 0.16226674616336823, "learning_rate": 1.1756216755114929e-05, "loss": 0.0013, "step": 329600 }, { "epoch": 38.8921661160925, "grad_norm": 0.002427266677841544, "learning_rate": 1.1744253070961952e-05, "loss": 0.0014, "step": 329650 }, { "epoch": 38.89806512505899, "grad_norm": 0.002096273470669985, "learning_rate": 1.1732294667204769e-05, "loss": 0.0008, "step": 329700 }, { "epoch": 38.903964134025486, "grad_norm": 0.06182553246617317, "learning_rate": 1.1720341545494013e-05, "loss": 0.0009, "step": 329750 }, { "epoch": 38.90986314299198, "grad_norm": 0.054652608931064606, "learning_rate": 1.1708393707479548e-05, "loss": 0.0006, "step": 329800 }, { "epoch": 38.91576215195847, "grad_norm": 0.028735540807247162, "learning_rate": 1.169645115481051e-05, "loss": 0.0006, "step": 329850 }, { "epoch": 38.92166116092496, "grad_norm": 0.1417391300201416, "learning_rate": 1.1684513889135328e-05, "loss": 0.0008, "step": 329900 }, { "epoch": 38.92756016989146, "grad_norm": 0.03684590384364128, "learning_rate": 1.1672581912101688e-05, "loss": 0.0009, "step": 329950 }, { "epoch": 38.93345917885795, "grad_norm": 0.0030952319502830505, "learning_rate": 1.1660655225356531e-05, "loss": 0.0006, "step": 330000 }, { "epoch": 38.93345917885795, "eval_cer": 0.08605851979345955, "eval_loss": 0.0006530744140036404, "eval_runtime": 2.1096, "eval_samples_per_second": 47.403, "eval_steps_per_second": 1.896, "eval_wer": 0.27, "step": 330000 }, { "epoch": 38.939358187824446, "grad_norm": 0.0043966094963252544, "learning_rate": 1.1648733830546104e-05, "loss": 0.0007, "step": 330050 }, { "epoch": 38.94525719679094, "grad_norm": 0.14719204604625702, "learning_rate": 1.1636817729315886e-05, "loss": 0.0016, "step": 330100 }, { "epoch": 38.951156205757435, "grad_norm": 0.013283178210258484, "learning_rate": 1.1624906923310652e-05, "loss": 0.001, "step": 330150 }, { "epoch": 38.95705521472393, "grad_norm": 0.3496917188167572, "learning_rate": 1.161300141417444e-05, "loss": 0.0016, "step": 330200 }, { "epoch": 38.96295422369042, "grad_norm": 0.11169785261154175, "learning_rate": 1.1601101203550541e-05, "loss": 0.001, "step": 330250 }, { "epoch": 38.96885323265691, "grad_norm": 0.41694706678390503, "learning_rate": 1.1589206293081523e-05, "loss": 0.001, "step": 330300 }, { "epoch": 38.974752241623406, "grad_norm": 0.004363809712231159, "learning_rate": 1.1577316684409256e-05, "loss": 0.0006, "step": 330350 }, { "epoch": 38.9806512505899, "grad_norm": 0.20188507437705994, "learning_rate": 1.1565432379174823e-05, "loss": 0.0006, "step": 330400 }, { "epoch": 38.986550259556395, "grad_norm": 0.007387349847704172, "learning_rate": 1.1553553379018611e-05, "loss": 0.0009, "step": 330450 }, { "epoch": 38.99244926852289, "grad_norm": 0.0022602456156164408, "learning_rate": 1.1541679685580247e-05, "loss": 0.0009, "step": 330500 }, { "epoch": 38.998348277489384, "grad_norm": 0.018806548789143562, "learning_rate": 1.1529811300498678e-05, "loss": 0.0013, "step": 330550 }, { "epoch": 39.00424728645588, "grad_norm": 0.0019602212123572826, "learning_rate": 1.1517948225412055e-05, "loss": 0.0011, "step": 330600 }, { "epoch": 39.010146295422366, "grad_norm": 0.0003983756760135293, "learning_rate": 1.1506090461957837e-05, "loss": 0.001, "step": 330650 }, { "epoch": 39.01604530438886, "grad_norm": 0.07329633831977844, "learning_rate": 1.1494238011772723e-05, "loss": 0.0011, "step": 330700 }, { "epoch": 39.021944313355355, "grad_norm": 0.04796399921178818, "learning_rate": 1.148239087649271e-05, "loss": 0.001, "step": 330750 }, { "epoch": 39.02784332232185, "grad_norm": 0.005199606530368328, "learning_rate": 1.1470549057753033e-05, "loss": 0.0012, "step": 330800 }, { "epoch": 39.033742331288344, "grad_norm": 0.0011411155574023724, "learning_rate": 1.1458712557188212e-05, "loss": 0.0008, "step": 330850 }, { "epoch": 39.03964134025484, "grad_norm": 0.024856775999069214, "learning_rate": 1.1446881376432016e-05, "loss": 0.0008, "step": 330900 }, { "epoch": 39.04554034922133, "grad_norm": 0.04494784027338028, "learning_rate": 1.1435055517117499e-05, "loss": 0.001, "step": 330950 }, { "epoch": 39.05143935818782, "grad_norm": 0.04335382208228111, "learning_rate": 1.1423234980876957e-05, "loss": 0.0007, "step": 331000 }, { "epoch": 39.05143935818782, "eval_cer": 0.08605851979345955, "eval_loss": 0.0003054985136259347, "eval_runtime": 2.0954, "eval_samples_per_second": 47.723, "eval_steps_per_second": 1.909, "eval_wer": 0.27, "step": 331000 }, { "epoch": 39.057338367154316, "grad_norm": 0.02460002340376377, "learning_rate": 1.1411419769341963e-05, "loss": 0.0004, "step": 331050 }, { "epoch": 39.06323737612081, "grad_norm": 0.0037435453850775957, "learning_rate": 1.1399609884143387e-05, "loss": 0.0008, "step": 331100 }, { "epoch": 39.069136385087305, "grad_norm": 0.10904879868030548, "learning_rate": 1.1387805326911305e-05, "loss": 0.0011, "step": 331150 }, { "epoch": 39.0750353940538, "grad_norm": 0.0013721948489546776, "learning_rate": 1.13760060992751e-05, "loss": 0.001, "step": 331200 }, { "epoch": 39.080934403020294, "grad_norm": 0.02808663807809353, "learning_rate": 1.1364212202863395e-05, "loss": 0.0006, "step": 331250 }, { "epoch": 39.08683341198679, "grad_norm": 0.0028575388714671135, "learning_rate": 1.1352423639304093e-05, "loss": 0.0006, "step": 331300 }, { "epoch": 39.09273242095328, "grad_norm": 0.008088131435215473, "learning_rate": 1.1340640410224357e-05, "loss": 0.0013, "step": 331350 }, { "epoch": 39.09863142991977, "grad_norm": 0.09188715368509293, "learning_rate": 1.132886251725061e-05, "loss": 0.0007, "step": 331400 }, { "epoch": 39.104530438886265, "grad_norm": 0.00414158683270216, "learning_rate": 1.1317089962008543e-05, "loss": 0.0009, "step": 331450 }, { "epoch": 39.11042944785276, "grad_norm": 0.0239858478307724, "learning_rate": 1.1305322746123104e-05, "loss": 0.0012, "step": 331500 }, { "epoch": 39.116328456819254, "grad_norm": 0.0008375083561986685, "learning_rate": 1.1293560871218511e-05, "loss": 0.0009, "step": 331550 }, { "epoch": 39.12222746578575, "grad_norm": 0.2136620134115219, "learning_rate": 1.128180433891824e-05, "loss": 0.0009, "step": 331600 }, { "epoch": 39.12812647475224, "grad_norm": 0.017429901286959648, "learning_rate": 1.1270053150845022e-05, "loss": 0.001, "step": 331650 }, { "epoch": 39.13402548371874, "grad_norm": 0.002009703777730465, "learning_rate": 1.125830730862088e-05, "loss": 0.0004, "step": 331700 }, { "epoch": 39.13992449268523, "grad_norm": 0.03890170902013779, "learning_rate": 1.1246566813867071e-05, "loss": 0.0011, "step": 331750 }, { "epoch": 39.14582350165172, "grad_norm": 0.05372035503387451, "learning_rate": 1.1234831668204115e-05, "loss": 0.0008, "step": 331800 }, { "epoch": 39.151722510618214, "grad_norm": 0.0002492888015694916, "learning_rate": 1.1223101873251812e-05, "loss": 0.0008, "step": 331850 }, { "epoch": 39.15762151958471, "grad_norm": 0.10335336625576019, "learning_rate": 1.12113774306292e-05, "loss": 0.0016, "step": 331900 }, { "epoch": 39.1635205285512, "grad_norm": 0.00499291718006134, "learning_rate": 1.1199658341954594e-05, "loss": 0.0009, "step": 331950 }, { "epoch": 39.1694195375177, "grad_norm": 0.04810275137424469, "learning_rate": 1.1187944608845569e-05, "loss": 0.0004, "step": 332000 }, { "epoch": 39.1694195375177, "eval_cer": 0.08347676419965576, "eval_loss": 0.00023486676218453795, "eval_runtime": 2.051, "eval_samples_per_second": 48.758, "eval_steps_per_second": 1.95, "eval_wer": 0.26, "step": 332000 }, { "epoch": 39.17531854648419, "grad_norm": 0.004209518898278475, "learning_rate": 1.1176236232918958e-05, "loss": 0.0005, "step": 332050 }, { "epoch": 39.18121755545069, "grad_norm": 0.0014838945353403687, "learning_rate": 1.1164533215790845e-05, "loss": 0.0008, "step": 332100 }, { "epoch": 39.18711656441718, "grad_norm": 0.0032514675986021757, "learning_rate": 1.115283555907658e-05, "loss": 0.001, "step": 332150 }, { "epoch": 39.19301557338367, "grad_norm": 0.0014006792334839702, "learning_rate": 1.1141143264390803e-05, "loss": 0.0005, "step": 332200 }, { "epoch": 39.19891458235016, "grad_norm": 0.09908825159072876, "learning_rate": 1.112945633334737e-05, "loss": 0.0006, "step": 332250 }, { "epoch": 39.20481359131666, "grad_norm": 0.0026025210972875357, "learning_rate": 1.111777476755942e-05, "loss": 0.0008, "step": 332300 }, { "epoch": 39.21071260028315, "grad_norm": 0.00024249748094007373, "learning_rate": 1.110609856863934e-05, "loss": 0.0008, "step": 332350 }, { "epoch": 39.21661160924965, "grad_norm": 0.003921993542462587, "learning_rate": 1.109442773819877e-05, "loss": 0.0008, "step": 332400 }, { "epoch": 39.22251061821614, "grad_norm": 0.030036645010113716, "learning_rate": 1.1082762277848651e-05, "loss": 0.0013, "step": 332450 }, { "epoch": 39.228409627182636, "grad_norm": 0.010323888622224331, "learning_rate": 1.1071102189199134e-05, "loss": 0.0006, "step": 332500 }, { "epoch": 39.23430863614913, "grad_norm": 0.021154966205358505, "learning_rate": 1.1059447473859651e-05, "loss": 0.001, "step": 332550 }, { "epoch": 39.24020764511562, "grad_norm": 0.0029591196216642857, "learning_rate": 1.1047798133438891e-05, "loss": 0.0012, "step": 332600 }, { "epoch": 39.24610665408211, "grad_norm": 0.026417408138513565, "learning_rate": 1.1036154169544798e-05, "loss": 0.0012, "step": 332650 }, { "epoch": 39.25200566304861, "grad_norm": 0.1083252876996994, "learning_rate": 1.1024515583784567e-05, "loss": 0.0008, "step": 332700 }, { "epoch": 39.2579046720151, "grad_norm": 0.03283194825053215, "learning_rate": 1.1012882377764667e-05, "loss": 0.0009, "step": 332750 }, { "epoch": 39.263803680981596, "grad_norm": 0.006719025783240795, "learning_rate": 1.1001254553090812e-05, "loss": 0.0013, "step": 332800 }, { "epoch": 39.26970268994809, "grad_norm": 0.029158227145671844, "learning_rate": 1.0989632111367976e-05, "loss": 0.0006, "step": 332850 }, { "epoch": 39.275601698914585, "grad_norm": 0.004496088717132807, "learning_rate": 1.0978015054200397e-05, "loss": 0.0008, "step": 332900 }, { "epoch": 39.28150070788108, "grad_norm": 0.014822916127741337, "learning_rate": 1.0966403383191553e-05, "loss": 0.0013, "step": 332950 }, { "epoch": 39.28739971684757, "grad_norm": 0.0004831670084968209, "learning_rate": 1.0954797099944186e-05, "loss": 0.0006, "step": 333000 }, { "epoch": 39.28739971684757, "eval_cer": 0.08605851979345955, "eval_loss": 0.0003949478850699961, "eval_runtime": 2.0352, "eval_samples_per_second": 49.136, "eval_steps_per_second": 1.965, "eval_wer": 0.27, "step": 333000 }, { "epoch": 39.29329872581406, "grad_norm": 0.09127975255250931, "learning_rate": 1.0943196206060324e-05, "loss": 0.0007, "step": 333050 }, { "epoch": 39.299197734780556, "grad_norm": 0.09624979645013809, "learning_rate": 1.0931600703141203e-05, "loss": 0.0007, "step": 333100 }, { "epoch": 39.30509674374705, "grad_norm": 0.061255257576704025, "learning_rate": 1.0920010592787344e-05, "loss": 0.001, "step": 333150 }, { "epoch": 39.310995752713545, "grad_norm": 0.013637321069836617, "learning_rate": 1.090842587659851e-05, "loss": 0.0006, "step": 333200 }, { "epoch": 39.31689476168004, "grad_norm": 0.01937410607933998, "learning_rate": 1.0896846556173734e-05, "loss": 0.0006, "step": 333250 }, { "epoch": 39.322793770646534, "grad_norm": 0.01501206960529089, "learning_rate": 1.0885272633111292e-05, "loss": 0.0011, "step": 333300 }, { "epoch": 39.32869277961302, "grad_norm": 0.0004457637551240623, "learning_rate": 1.087370410900872e-05, "loss": 0.0008, "step": 333350 }, { "epoch": 39.334591788579516, "grad_norm": 0.0029454694595187902, "learning_rate": 1.0862140985462805e-05, "loss": 0.0009, "step": 333400 }, { "epoch": 39.34049079754601, "grad_norm": 0.038849372416734695, "learning_rate": 1.0850583264069592e-05, "loss": 0.0009, "step": 333450 }, { "epoch": 39.346389806512505, "grad_norm": 0.0036999182775616646, "learning_rate": 1.0839030946424384e-05, "loss": 0.0011, "step": 333500 }, { "epoch": 39.352288815479, "grad_norm": 0.0009791386546567082, "learning_rate": 1.0827484034121727e-05, "loss": 0.0003, "step": 333550 }, { "epoch": 39.358187824445494, "grad_norm": 0.027217034250497818, "learning_rate": 1.0815942528755419e-05, "loss": 0.0006, "step": 333600 }, { "epoch": 39.36408683341199, "grad_norm": 0.00019649094610940665, "learning_rate": 1.080440643191854e-05, "loss": 0.001, "step": 333650 }, { "epoch": 39.36998584237848, "grad_norm": 0.044873397797346115, "learning_rate": 1.0792875745203407e-05, "loss": 0.0009, "step": 333700 }, { "epoch": 39.37588485134497, "grad_norm": 0.00034623086685314775, "learning_rate": 1.0781350470201568e-05, "loss": 0.0005, "step": 333750 }, { "epoch": 39.381783860311465, "grad_norm": 0.008236217312514782, "learning_rate": 1.0769830608503845e-05, "loss": 0.0013, "step": 333800 }, { "epoch": 39.38768286927796, "grad_norm": 0.004633722361177206, "learning_rate": 1.0758316161700326e-05, "loss": 0.0006, "step": 333850 }, { "epoch": 39.393581878244454, "grad_norm": 0.04687037691473961, "learning_rate": 1.0746807131380332e-05, "loss": 0.0008, "step": 333900 }, { "epoch": 39.39948088721095, "grad_norm": 0.10672181844711304, "learning_rate": 1.073530351913244e-05, "loss": 0.0008, "step": 333950 }, { "epoch": 39.40537989617744, "grad_norm": 0.0012784412829205394, "learning_rate": 1.0723805326544473e-05, "loss": 0.0007, "step": 334000 }, { "epoch": 39.40537989617744, "eval_cer": 0.08605851979345955, "eval_loss": 0.00034376070834696293, "eval_runtime": 2.0505, "eval_samples_per_second": 48.768, "eval_steps_per_second": 1.951, "eval_wer": 0.27, "step": 334000 }, { "epoch": 39.41127890514394, "grad_norm": 0.0014094359939917922, "learning_rate": 1.0712312555203519e-05, "loss": 0.0005, "step": 334050 }, { "epoch": 39.41717791411043, "grad_norm": 0.0010554844047874212, "learning_rate": 1.0700825206695908e-05, "loss": 0.0008, "step": 334100 }, { "epoch": 39.42307692307692, "grad_norm": 0.009711381047964096, "learning_rate": 1.0689343282607229e-05, "loss": 0.0007, "step": 334150 }, { "epoch": 39.428975932043414, "grad_norm": 0.21471695601940155, "learning_rate": 1.0677866784522317e-05, "loss": 0.0007, "step": 334200 }, { "epoch": 39.43487494100991, "grad_norm": 0.027163082733750343, "learning_rate": 1.0666395714025262e-05, "loss": 0.0008, "step": 334250 }, { "epoch": 39.4407739499764, "grad_norm": 0.002423756057396531, "learning_rate": 1.0654930072699393e-05, "loss": 0.0009, "step": 334300 }, { "epoch": 39.4466729589429, "grad_norm": 0.016921743750572205, "learning_rate": 1.0643469862127297e-05, "loss": 0.0006, "step": 334350 }, { "epoch": 39.45257196790939, "grad_norm": 0.012697450816631317, "learning_rate": 1.063201508389084e-05, "loss": 0.0008, "step": 334400 }, { "epoch": 39.45847097687589, "grad_norm": 0.018991852179169655, "learning_rate": 1.0620565739571087e-05, "loss": 0.0011, "step": 334450 }, { "epoch": 39.46436998584238, "grad_norm": 0.01110992580652237, "learning_rate": 1.0609121830748387e-05, "loss": 0.0011, "step": 334500 }, { "epoch": 39.47026899480887, "grad_norm": 0.007804549764841795, "learning_rate": 1.0597683359002325e-05, "loss": 0.0011, "step": 334550 }, { "epoch": 39.47616800377536, "grad_norm": 0.05400853976607323, "learning_rate": 1.0586250325911744e-05, "loss": 0.0009, "step": 334600 }, { "epoch": 39.48206701274186, "grad_norm": 0.03977838158607483, "learning_rate": 1.057482273305473e-05, "loss": 0.0011, "step": 334650 }, { "epoch": 39.48796602170835, "grad_norm": 0.033302612602710724, "learning_rate": 1.0563400582008614e-05, "loss": 0.0006, "step": 334700 }, { "epoch": 39.49386503067485, "grad_norm": 0.21218684315681458, "learning_rate": 1.0551983874349997e-05, "loss": 0.001, "step": 334750 }, { "epoch": 39.49976403964134, "grad_norm": 0.0117412731051445, "learning_rate": 1.0540572611654697e-05, "loss": 0.0008, "step": 334800 }, { "epoch": 39.505663048607836, "grad_norm": 0.0008204579935409129, "learning_rate": 1.0529166795497809e-05, "loss": 0.0011, "step": 334850 }, { "epoch": 39.51156205757433, "grad_norm": 0.050710272043943405, "learning_rate": 1.0517766427453657e-05, "loss": 0.0015, "step": 334900 }, { "epoch": 39.51746106654082, "grad_norm": 0.009940115734934807, "learning_rate": 1.0506371509095814e-05, "loss": 0.0006, "step": 334950 }, { "epoch": 39.52336007550731, "grad_norm": 0.000641890219412744, "learning_rate": 1.0494982041997126e-05, "loss": 0.0005, "step": 335000 }, { "epoch": 39.52336007550731, "eval_cer": 0.08605851979345955, "eval_loss": 0.0003770265029743314, "eval_runtime": 2.0563, "eval_samples_per_second": 48.632, "eval_steps_per_second": 1.945, "eval_wer": 0.27, "step": 335000 }, { "epoch": 39.52925908447381, "grad_norm": 0.0059653292410075665, "learning_rate": 1.0483598027729663e-05, "loss": 0.0007, "step": 335050 }, { "epoch": 39.5351580934403, "grad_norm": 0.003424467984586954, "learning_rate": 1.0472219467864742e-05, "loss": 0.0009, "step": 335100 }, { "epoch": 39.541057102406796, "grad_norm": 0.0029570087790489197, "learning_rate": 1.0460846363972931e-05, "loss": 0.0012, "step": 335150 }, { "epoch": 39.54695611137329, "grad_norm": 0.47632893919944763, "learning_rate": 1.044947871762405e-05, "loss": 0.0015, "step": 335200 }, { "epoch": 39.552855120339785, "grad_norm": 0.001687456271611154, "learning_rate": 1.0438116530387154e-05, "loss": 0.0009, "step": 335250 }, { "epoch": 39.55875412930628, "grad_norm": 0.013492920435965061, "learning_rate": 1.0426759803830566e-05, "loss": 0.0006, "step": 335300 }, { "epoch": 39.56465313827277, "grad_norm": 0.007995083928108215, "learning_rate": 1.041540853952183e-05, "loss": 0.0009, "step": 335350 }, { "epoch": 39.57055214723926, "grad_norm": 0.1425168812274933, "learning_rate": 1.0404062739027753e-05, "loss": 0.001, "step": 335400 }, { "epoch": 39.576451156205756, "grad_norm": 0.08914814889431, "learning_rate": 1.0392722403914368e-05, "loss": 0.0008, "step": 335450 }, { "epoch": 39.58235016517225, "grad_norm": 0.029803317040205002, "learning_rate": 1.0381387535746995e-05, "loss": 0.0007, "step": 335500 }, { "epoch": 39.588249174138745, "grad_norm": 0.11331958323717117, "learning_rate": 1.037005813609016e-05, "loss": 0.0006, "step": 335550 }, { "epoch": 39.59414818310524, "grad_norm": 0.03137630224227905, "learning_rate": 1.0358734206507641e-05, "loss": 0.0008, "step": 335600 }, { "epoch": 39.600047192071735, "grad_norm": 0.001953406957909465, "learning_rate": 1.0347415748562478e-05, "loss": 0.0008, "step": 335650 }, { "epoch": 39.60594620103823, "grad_norm": 0.2277536690235138, "learning_rate": 1.0336102763816918e-05, "loss": 0.001, "step": 335700 }, { "epoch": 39.61184521000472, "grad_norm": 0.01231370773166418, "learning_rate": 1.032479525383252e-05, "loss": 0.0007, "step": 335750 }, { "epoch": 39.61774421897121, "grad_norm": 0.002068756613880396, "learning_rate": 1.0313493220170017e-05, "loss": 0.0005, "step": 335800 }, { "epoch": 39.623643227937706, "grad_norm": 0.0189678855240345, "learning_rate": 1.030219666438943e-05, "loss": 0.0011, "step": 335850 }, { "epoch": 39.6295422369042, "grad_norm": 0.006769835017621517, "learning_rate": 1.0290905588049998e-05, "loss": 0.0008, "step": 335900 }, { "epoch": 39.635441245870695, "grad_norm": 0.03565560653805733, "learning_rate": 1.0279619992710221e-05, "loss": 0.0006, "step": 335950 }, { "epoch": 39.64134025483719, "grad_norm": 0.0004898161278106272, "learning_rate": 1.0268339879927836e-05, "loss": 0.0009, "step": 336000 }, { "epoch": 39.64134025483719, "eval_cer": 0.08347676419965576, "eval_loss": 0.0002922388375736773, "eval_runtime": 2.0498, "eval_samples_per_second": 48.786, "eval_steps_per_second": 1.951, "eval_wer": 0.26, "step": 336000 }, { "epoch": 39.647239263803684, "grad_norm": 0.0017525521107017994, "learning_rate": 1.0257065251259823e-05, "loss": 0.0008, "step": 336050 }, { "epoch": 39.65313827277018, "grad_norm": 0.047493353486061096, "learning_rate": 1.0245796108262407e-05, "loss": 0.0012, "step": 336100 }, { "epoch": 39.659037281736666, "grad_norm": 0.002829104894772172, "learning_rate": 1.0234532452491047e-05, "loss": 0.0006, "step": 336150 }, { "epoch": 39.66493629070316, "grad_norm": 0.0008780458010733128, "learning_rate": 1.0223274285500468e-05, "loss": 0.0008, "step": 336200 }, { "epoch": 39.670835299669655, "grad_norm": 0.004161285236477852, "learning_rate": 1.0212021608844601e-05, "loss": 0.0008, "step": 336250 }, { "epoch": 39.67673430863615, "grad_norm": 0.18123669922351837, "learning_rate": 1.0200774424076648e-05, "loss": 0.0007, "step": 336300 }, { "epoch": 39.682633317602644, "grad_norm": 0.00046205916441977024, "learning_rate": 1.0189532732749057e-05, "loss": 0.0007, "step": 336350 }, { "epoch": 39.68853232656914, "grad_norm": 0.24733151495456696, "learning_rate": 1.0178296536413495e-05, "loss": 0.0009, "step": 336400 }, { "epoch": 39.69443133553563, "grad_norm": 0.031159335747361183, "learning_rate": 1.0167065836620882e-05, "loss": 0.0015, "step": 336450 }, { "epoch": 39.70033034450212, "grad_norm": 0.03549516946077347, "learning_rate": 1.0155840634921377e-05, "loss": 0.0006, "step": 336500 }, { "epoch": 39.706229353468615, "grad_norm": 0.010469136759638786, "learning_rate": 1.0144620932864386e-05, "loss": 0.0011, "step": 336550 }, { "epoch": 39.71212836243511, "grad_norm": 0.0009043080499395728, "learning_rate": 1.0133406731998546e-05, "loss": 0.0014, "step": 336600 }, { "epoch": 39.718027371401604, "grad_norm": 0.006010300945490599, "learning_rate": 1.0122198033871744e-05, "loss": 0.0009, "step": 336650 }, { "epoch": 39.7239263803681, "grad_norm": 0.0014632344245910645, "learning_rate": 1.0110994840031096e-05, "loss": 0.0009, "step": 336700 }, { "epoch": 39.72982538933459, "grad_norm": 0.013443312607705593, "learning_rate": 1.0099797152022978e-05, "loss": 0.0012, "step": 336750 }, { "epoch": 39.73572439830109, "grad_norm": 0.005301808472722769, "learning_rate": 1.0088604971392979e-05, "loss": 0.0006, "step": 336800 }, { "epoch": 39.74162340726758, "grad_norm": 0.17615030705928802, "learning_rate": 1.007741829968596e-05, "loss": 0.0011, "step": 336850 }, { "epoch": 39.74752241623407, "grad_norm": 0.004257189109921455, "learning_rate": 1.0066237138445971e-05, "loss": 0.0013, "step": 336900 }, { "epoch": 39.753421425200564, "grad_norm": 0.1907743364572525, "learning_rate": 1.0055061489216377e-05, "loss": 0.0013, "step": 336950 }, { "epoch": 39.75932043416706, "grad_norm": 0.0775420069694519, "learning_rate": 1.004389135353972e-05, "loss": 0.0007, "step": 337000 }, { "epoch": 39.75932043416706, "eval_cer": 0.08605851979345955, "eval_loss": 0.0003518399898894131, "eval_runtime": 2.0404, "eval_samples_per_second": 49.009, "eval_steps_per_second": 1.96, "eval_wer": 0.27, "step": 337000 }, { "epoch": 39.76521944313355, "grad_norm": 0.11953657865524292, "learning_rate": 1.0032726732957803e-05, "loss": 0.0011, "step": 337050 }, { "epoch": 39.77111845210005, "grad_norm": 0.16896753013134003, "learning_rate": 1.0021567629011646e-05, "loss": 0.0016, "step": 337100 }, { "epoch": 39.77701746106654, "grad_norm": 0.0663546696305275, "learning_rate": 1.0010414043241562e-05, "loss": 0.0008, "step": 337150 }, { "epoch": 39.78291647003304, "grad_norm": 0.06126606836915016, "learning_rate": 9.99926597718705e-06, "loss": 0.0005, "step": 337200 }, { "epoch": 39.78881547899953, "grad_norm": 0.13940326869487762, "learning_rate": 9.988123432386865e-06, "loss": 0.0009, "step": 337250 }, { "epoch": 39.79471448796602, "grad_norm": 0.008732050657272339, "learning_rate": 9.976986410378997e-06, "loss": 0.0006, "step": 337300 }, { "epoch": 39.80061349693251, "grad_norm": 0.07161232829093933, "learning_rate": 9.965854912700679e-06, "loss": 0.0012, "step": 337350 }, { "epoch": 39.80651250589901, "grad_norm": 0.005577186122536659, "learning_rate": 9.95472894088838e-06, "loss": 0.0011, "step": 337400 }, { "epoch": 39.8124115148655, "grad_norm": 0.0007587310974486172, "learning_rate": 9.943608496477802e-06, "loss": 0.0008, "step": 337450 }, { "epoch": 39.818310523832, "grad_norm": 0.01316419430077076, "learning_rate": 9.932493581003882e-06, "loss": 0.0008, "step": 337500 }, { "epoch": 39.82420953279849, "grad_norm": 0.0027483059093356133, "learning_rate": 9.921384196000811e-06, "loss": 0.001, "step": 337550 }, { "epoch": 39.830108541764986, "grad_norm": 0.0012443949235603213, "learning_rate": 9.910280343001993e-06, "loss": 0.0007, "step": 337600 }, { "epoch": 39.83600755073148, "grad_norm": 0.00362761365249753, "learning_rate": 9.899182023540071e-06, "loss": 0.0006, "step": 337650 }, { "epoch": 39.84190655969797, "grad_norm": 0.02029910311102867, "learning_rate": 9.888089239146963e-06, "loss": 0.001, "step": 337700 }, { "epoch": 39.84780556866446, "grad_norm": 0.0005201530293561518, "learning_rate": 9.877001991353773e-06, "loss": 0.0012, "step": 337750 }, { "epoch": 39.85370457763096, "grad_norm": 0.0391446053981781, "learning_rate": 9.865920281690866e-06, "loss": 0.0008, "step": 337800 }, { "epoch": 39.85960358659745, "grad_norm": 0.009537708945572376, "learning_rate": 9.854844111687833e-06, "loss": 0.0008, "step": 337850 }, { "epoch": 39.865502595563946, "grad_norm": 0.018755024299025536, "learning_rate": 9.843773482873508e-06, "loss": 0.0007, "step": 337900 }, { "epoch": 39.87140160453044, "grad_norm": 0.035946860909461975, "learning_rate": 9.832708396775959e-06, "loss": 0.0003, "step": 337950 }, { "epoch": 39.877300613496935, "grad_norm": 0.0028994663152843714, "learning_rate": 9.821648854922482e-06, "loss": 0.0011, "step": 338000 }, { "epoch": 39.877300613496935, "eval_cer": 0.08605851979345955, "eval_loss": 0.0003859636781271547, "eval_runtime": 2.0616, "eval_samples_per_second": 48.505, "eval_steps_per_second": 1.94, "eval_wer": 0.27, "step": 338000 }, { "epoch": 39.88319962246343, "grad_norm": 0.016520198434591293, "learning_rate": 9.810594858839617e-06, "loss": 0.0008, "step": 338050 }, { "epoch": 39.88909863142992, "grad_norm": 0.1477249711751938, "learning_rate": 9.799546410053128e-06, "loss": 0.0005, "step": 338100 }, { "epoch": 39.89499764039641, "grad_norm": 0.02405615895986557, "learning_rate": 9.788503510088021e-06, "loss": 0.0006, "step": 338150 }, { "epoch": 39.900896649362906, "grad_norm": 0.002697093179449439, "learning_rate": 9.77746616046854e-06, "loss": 0.0009, "step": 338200 }, { "epoch": 39.9067956583294, "grad_norm": 0.018410678952932358, "learning_rate": 9.766434362718141e-06, "loss": 0.0006, "step": 338250 }, { "epoch": 39.912694667295895, "grad_norm": 0.3518282175064087, "learning_rate": 9.755408118359554e-06, "loss": 0.0009, "step": 338300 }, { "epoch": 39.91859367626239, "grad_norm": 0.0014386505354195833, "learning_rate": 9.744387428914708e-06, "loss": 0.0019, "step": 338350 }, { "epoch": 39.924492685228884, "grad_norm": 0.0014505585422739387, "learning_rate": 9.733372295904774e-06, "loss": 0.0008, "step": 338400 }, { "epoch": 39.93039169419538, "grad_norm": 0.1488172858953476, "learning_rate": 9.722362720850164e-06, "loss": 0.0012, "step": 338450 }, { "epoch": 39.936290703161866, "grad_norm": 0.000672923750244081, "learning_rate": 9.711358705270506e-06, "loss": 0.0011, "step": 338500 }, { "epoch": 39.94218971212836, "grad_norm": 0.001731565804220736, "learning_rate": 9.700360250684681e-06, "loss": 0.0012, "step": 338550 }, { "epoch": 39.948088721094855, "grad_norm": 0.0067861382849514484, "learning_rate": 9.68936735861079e-06, "loss": 0.0006, "step": 338600 }, { "epoch": 39.95398773006135, "grad_norm": 0.006405560299754143, "learning_rate": 9.67838003056617e-06, "loss": 0.0009, "step": 338650 }, { "epoch": 39.959886739027844, "grad_norm": 0.02140147238969803, "learning_rate": 9.667398268067379e-06, "loss": 0.0008, "step": 338700 }, { "epoch": 39.96578574799434, "grad_norm": 0.0017259952146559954, "learning_rate": 9.656422072630217e-06, "loss": 0.0008, "step": 338750 }, { "epoch": 39.97168475696083, "grad_norm": 0.0049636997282505035, "learning_rate": 9.645451445769737e-06, "loss": 0.0011, "step": 338800 }, { "epoch": 39.97758376592732, "grad_norm": 0.001088836113922298, "learning_rate": 9.634486389000185e-06, "loss": 0.0009, "step": 338850 }, { "epoch": 39.983482774893815, "grad_norm": 0.06164191663265228, "learning_rate": 9.623526903835056e-06, "loss": 0.0008, "step": 338900 }, { "epoch": 39.98938178386031, "grad_norm": 0.0039424230344593525, "learning_rate": 9.612572991787083e-06, "loss": 0.001, "step": 338950 }, { "epoch": 39.995280792826804, "grad_norm": 0.003744878340512514, "learning_rate": 9.601624654368197e-06, "loss": 0.0007, "step": 339000 }, { "epoch": 39.995280792826804, "eval_cer": 0.08605851979345955, "eval_loss": 0.0006135509465821087, "eval_runtime": 2.0602, "eval_samples_per_second": 48.54, "eval_steps_per_second": 1.942, "eval_wer": 0.27, "step": 339000 }, { "epoch": 40.0011798017933, "grad_norm": 0.021265409886837006, "learning_rate": 9.590681893089614e-06, "loss": 0.0006, "step": 339050 }, { "epoch": 40.00707881075979, "grad_norm": 0.0031611016020178795, "learning_rate": 9.579744709461747e-06, "loss": 0.0005, "step": 339100 }, { "epoch": 40.01297781972629, "grad_norm": 0.07084979116916656, "learning_rate": 9.56881310499423e-06, "loss": 0.001, "step": 339150 }, { "epoch": 40.01887682869278, "grad_norm": 0.004224643111228943, "learning_rate": 9.557887081195938e-06, "loss": 0.0004, "step": 339200 }, { "epoch": 40.02477583765927, "grad_norm": 0.004857118707150221, "learning_rate": 9.546966639574989e-06, "loss": 0.0007, "step": 339250 }, { "epoch": 40.030674846625764, "grad_norm": 0.0003391981590539217, "learning_rate": 9.536051781638711e-06, "loss": 0.0006, "step": 339300 }, { "epoch": 40.03657385559226, "grad_norm": 0.0002848178264684975, "learning_rate": 9.525142508893676e-06, "loss": 0.0011, "step": 339350 }, { "epoch": 40.04247286455875, "grad_norm": 0.01080300286412239, "learning_rate": 9.514238822845667e-06, "loss": 0.0004, "step": 339400 }, { "epoch": 40.04837187352525, "grad_norm": 0.12346962094306946, "learning_rate": 9.503340724999715e-06, "loss": 0.0008, "step": 339450 }, { "epoch": 40.05427088249174, "grad_norm": 0.13773365318775177, "learning_rate": 9.492448216860073e-06, "loss": 0.0008, "step": 339500 }, { "epoch": 40.06016989145824, "grad_norm": 0.005911028012633324, "learning_rate": 9.481561299930214e-06, "loss": 0.0012, "step": 339550 }, { "epoch": 40.06606890042473, "grad_norm": 0.003390068421140313, "learning_rate": 9.470679975712837e-06, "loss": 0.001, "step": 339600 }, { "epoch": 40.07196790939122, "grad_norm": 0.0003758979437407106, "learning_rate": 9.459804245709907e-06, "loss": 0.001, "step": 339650 }, { "epoch": 40.077866918357714, "grad_norm": 0.006259907968342304, "learning_rate": 9.44893411142257e-06, "loss": 0.0006, "step": 339700 }, { "epoch": 40.08376592732421, "grad_norm": 0.12548547983169556, "learning_rate": 9.43806957435122e-06, "loss": 0.0005, "step": 339750 }, { "epoch": 40.0896649362907, "grad_norm": 0.0002877652004826814, "learning_rate": 9.427210635995482e-06, "loss": 0.0004, "step": 339800 }, { "epoch": 40.0955639452572, "grad_norm": 0.0011397217167541385, "learning_rate": 9.416357297854195e-06, "loss": 0.0009, "step": 339850 }, { "epoch": 40.10146295422369, "grad_norm": 0.0007907942053861916, "learning_rate": 9.405509561425434e-06, "loss": 0.0007, "step": 339900 }, { "epoch": 40.107361963190186, "grad_norm": 0.003474564291536808, "learning_rate": 9.3946674282065e-06, "loss": 0.0006, "step": 339950 }, { "epoch": 40.11326097215668, "grad_norm": 0.020159315317869186, "learning_rate": 9.383830899693924e-06, "loss": 0.0006, "step": 340000 }, { "epoch": 40.11326097215668, "eval_cer": 0.08605851979345955, "eval_loss": 0.0005769513081759214, "eval_runtime": 2.045, "eval_samples_per_second": 48.901, "eval_steps_per_second": 1.956, "eval_wer": 0.27, "step": 340000 }, { "epoch": 40.11915998112317, "grad_norm": 0.3474354147911072, "learning_rate": 9.372999977383451e-06, "loss": 0.0007, "step": 340050 }, { "epoch": 40.12505899008966, "grad_norm": 0.09401071816682816, "learning_rate": 9.362174662770068e-06, "loss": 0.0008, "step": 340100 }, { "epoch": 40.13095799905616, "grad_norm": 0.03924385830760002, "learning_rate": 9.351354957347975e-06, "loss": 0.0004, "step": 340150 }, { "epoch": 40.13685700802265, "grad_norm": 0.02163039706647396, "learning_rate": 9.340540862610592e-06, "loss": 0.0007, "step": 340200 }, { "epoch": 40.142756016989146, "grad_norm": 0.010694344528019428, "learning_rate": 9.329732380050604e-06, "loss": 0.0009, "step": 340250 }, { "epoch": 40.14865502595564, "grad_norm": 0.021133525297045708, "learning_rate": 9.31892951115988e-06, "loss": 0.0008, "step": 340300 }, { "epoch": 40.154554034922135, "grad_norm": 0.05727230757474899, "learning_rate": 9.308132257429526e-06, "loss": 0.0006, "step": 340350 }, { "epoch": 40.16045304388863, "grad_norm": 0.00036737846676260233, "learning_rate": 9.297340620349854e-06, "loss": 0.0009, "step": 340400 }, { "epoch": 40.16635205285512, "grad_norm": 0.0007427487289533019, "learning_rate": 9.286554601410463e-06, "loss": 0.0007, "step": 340450 }, { "epoch": 40.17225106182161, "grad_norm": 0.005601045675575733, "learning_rate": 9.275774202100107e-06, "loss": 0.001, "step": 340500 }, { "epoch": 40.17815007078811, "grad_norm": 0.009058552794158459, "learning_rate": 9.2649994239068e-06, "loss": 0.0006, "step": 340550 }, { "epoch": 40.1840490797546, "grad_norm": 0.000693802023306489, "learning_rate": 9.25423026831777e-06, "loss": 0.0009, "step": 340600 }, { "epoch": 40.189948088721096, "grad_norm": 0.0009502402390353382, "learning_rate": 9.243466736819472e-06, "loss": 0.0008, "step": 340650 }, { "epoch": 40.19584709768759, "grad_norm": 0.0008953263750299811, "learning_rate": 9.23270883089758e-06, "loss": 0.0005, "step": 340700 }, { "epoch": 40.201746106654085, "grad_norm": 0.16284964978694916, "learning_rate": 9.221956552036992e-06, "loss": 0.0009, "step": 340750 }, { "epoch": 40.20764511562058, "grad_norm": 0.020426584407687187, "learning_rate": 9.211209901721845e-06, "loss": 0.0007, "step": 340800 }, { "epoch": 40.21354412458707, "grad_norm": 0.13417963683605194, "learning_rate": 9.200468881435475e-06, "loss": 0.0009, "step": 340850 }, { "epoch": 40.21944313355356, "grad_norm": 0.004649774637073278, "learning_rate": 9.189733492660457e-06, "loss": 0.0005, "step": 340900 }, { "epoch": 40.225342142520056, "grad_norm": 0.0925385057926178, "learning_rate": 9.179003736878572e-06, "loss": 0.0008, "step": 340950 }, { "epoch": 40.23124115148655, "grad_norm": 0.00013633607886731625, "learning_rate": 9.168279615570863e-06, "loss": 0.0006, "step": 341000 }, { "epoch": 40.23124115148655, "eval_cer": 0.08605851979345955, "eval_loss": 0.0005538560217246413, "eval_runtime": 2.0752, "eval_samples_per_second": 48.187, "eval_steps_per_second": 1.927, "eval_wer": 0.27, "step": 341000 }, { "epoch": 40.237140160453045, "grad_norm": 0.14414438605308533, "learning_rate": 9.157561130217546e-06, "loss": 0.0009, "step": 341050 }, { "epoch": 40.24303916941954, "grad_norm": 0.011985127814114094, "learning_rate": 9.146848282298094e-06, "loss": 0.0012, "step": 341100 }, { "epoch": 40.248938178386034, "grad_norm": 0.00043885692139156163, "learning_rate": 9.136141073291177e-06, "loss": 0.001, "step": 341150 }, { "epoch": 40.25483718735252, "grad_norm": 0.0008909865864552557, "learning_rate": 9.125439504674699e-06, "loss": 0.0008, "step": 341200 }, { "epoch": 40.260736196319016, "grad_norm": 0.0028274415526539087, "learning_rate": 9.114743577925794e-06, "loss": 0.0006, "step": 341250 }, { "epoch": 40.26663520528551, "grad_norm": 0.16246889531612396, "learning_rate": 9.104053294520804e-06, "loss": 0.0008, "step": 341300 }, { "epoch": 40.272534214252005, "grad_norm": 0.007628168445080519, "learning_rate": 9.093368655935297e-06, "loss": 0.0006, "step": 341350 }, { "epoch": 40.2784332232185, "grad_norm": 0.008465082384645939, "learning_rate": 9.082689663644057e-06, "loss": 0.0007, "step": 341400 }, { "epoch": 40.284332232184994, "grad_norm": 0.0022780466824769974, "learning_rate": 9.072016319121095e-06, "loss": 0.0011, "step": 341450 }, { "epoch": 40.29023124115149, "grad_norm": 0.04208945482969284, "learning_rate": 9.061348623839644e-06, "loss": 0.001, "step": 341500 }, { "epoch": 40.29613025011798, "grad_norm": 0.010630182921886444, "learning_rate": 9.050686579272132e-06, "loss": 0.0006, "step": 341550 }, { "epoch": 40.30202925908447, "grad_norm": 0.07707159966230392, "learning_rate": 9.040030186890265e-06, "loss": 0.0007, "step": 341600 }, { "epoch": 40.307928268050965, "grad_norm": 0.0002779766800813377, "learning_rate": 9.02937944816491e-06, "loss": 0.0006, "step": 341650 }, { "epoch": 40.31382727701746, "grad_norm": 0.0020980066619813442, "learning_rate": 9.018734364566184e-06, "loss": 0.0007, "step": 341700 }, { "epoch": 40.319726285983954, "grad_norm": 0.012095690704882145, "learning_rate": 9.00809493756341e-06, "loss": 0.0007, "step": 341750 }, { "epoch": 40.32562529495045, "grad_norm": 0.008623924106359482, "learning_rate": 8.997461168625137e-06, "loss": 0.0009, "step": 341800 }, { "epoch": 40.33152430391694, "grad_norm": 0.1455872654914856, "learning_rate": 8.98683305921913e-06, "loss": 0.0008, "step": 341850 }, { "epoch": 40.33742331288344, "grad_norm": 0.40033382177352905, "learning_rate": 8.976210610812375e-06, "loss": 0.0009, "step": 341900 }, { "epoch": 40.34332232184993, "grad_norm": 0.008320626802742481, "learning_rate": 8.965593824871077e-06, "loss": 0.0006, "step": 341950 }, { "epoch": 40.34922133081642, "grad_norm": 0.014395048841834068, "learning_rate": 8.954982702860664e-06, "loss": 0.0005, "step": 342000 }, { "epoch": 40.34922133081642, "eval_cer": 0.08605851979345955, "eval_loss": 0.0004677116812672466, "eval_runtime": 2.0591, "eval_samples_per_second": 48.564, "eval_steps_per_second": 1.943, "eval_wer": 0.27, "step": 342000 }, { "epoch": 40.355120339782914, "grad_norm": 0.001199149526655674, "learning_rate": 8.944377246245755e-06, "loss": 0.0008, "step": 342050 }, { "epoch": 40.36101934874941, "grad_norm": 0.0015000278363004327, "learning_rate": 8.93377745649024e-06, "loss": 0.0011, "step": 342100 }, { "epoch": 40.3669183577159, "grad_norm": 0.005863214377313852, "learning_rate": 8.923183335057172e-06, "loss": 0.0006, "step": 342150 }, { "epoch": 40.3728173666824, "grad_norm": 0.038452811539173126, "learning_rate": 8.912594883408864e-06, "loss": 0.0006, "step": 342200 }, { "epoch": 40.37871637564889, "grad_norm": 0.017757238820195198, "learning_rate": 8.902012103006813e-06, "loss": 0.0009, "step": 342250 }, { "epoch": 40.38461538461539, "grad_norm": 0.1029340922832489, "learning_rate": 8.891434995311737e-06, "loss": 0.001, "step": 342300 }, { "epoch": 40.39051439358188, "grad_norm": 0.024354517459869385, "learning_rate": 8.880863561783615e-06, "loss": 0.0006, "step": 342350 }, { "epoch": 40.39641340254837, "grad_norm": 0.0017476839711889625, "learning_rate": 8.87029780388159e-06, "loss": 0.0011, "step": 342400 }, { "epoch": 40.40231241151486, "grad_norm": 0.01251863781362772, "learning_rate": 8.859737723064038e-06, "loss": 0.0013, "step": 342450 }, { "epoch": 40.40821142048136, "grad_norm": 0.05222976580262184, "learning_rate": 8.849183320788562e-06, "loss": 0.0005, "step": 342500 }, { "epoch": 40.41411042944785, "grad_norm": 0.0014152531512081623, "learning_rate": 8.83863459851197e-06, "loss": 0.0008, "step": 342550 }, { "epoch": 40.42000943841435, "grad_norm": 0.02141786925494671, "learning_rate": 8.828091557690289e-06, "loss": 0.0007, "step": 342600 }, { "epoch": 40.42590844738084, "grad_norm": 0.1563357561826706, "learning_rate": 8.817554199778766e-06, "loss": 0.0008, "step": 342650 }, { "epoch": 40.431807456347336, "grad_norm": 0.0002860864042304456, "learning_rate": 8.807022526231862e-06, "loss": 0.0005, "step": 342700 }, { "epoch": 40.43770646531383, "grad_norm": 0.0007911486900411546, "learning_rate": 8.796496538503241e-06, "loss": 0.0007, "step": 342750 }, { "epoch": 40.44360547428032, "grad_norm": 0.1646520495414734, "learning_rate": 8.7859762380458e-06, "loss": 0.0009, "step": 342800 }, { "epoch": 40.44950448324681, "grad_norm": 0.026700560003519058, "learning_rate": 8.77546162631165e-06, "loss": 0.0006, "step": 342850 }, { "epoch": 40.45540349221331, "grad_norm": 0.08959300816059113, "learning_rate": 8.764952704752083e-06, "loss": 0.0008, "step": 342900 }, { "epoch": 40.4613025011798, "grad_norm": 0.002916056662797928, "learning_rate": 8.754449474817672e-06, "loss": 0.0005, "step": 342950 }, { "epoch": 40.467201510146296, "grad_norm": 0.0021909507922828197, "learning_rate": 8.743951937958145e-06, "loss": 0.0006, "step": 343000 }, { "epoch": 40.467201510146296, "eval_cer": 0.08605851979345955, "eval_loss": 0.00047489433200098574, "eval_runtime": 2.0428, "eval_samples_per_second": 48.951, "eval_steps_per_second": 1.958, "eval_wer": 0.27, "step": 343000 }, { "epoch": 40.47310051911279, "grad_norm": 0.005162615794688463, "learning_rate": 8.733460095622465e-06, "loss": 0.0009, "step": 343050 }, { "epoch": 40.478999528079285, "grad_norm": 0.13253246247768402, "learning_rate": 8.722973949258811e-06, "loss": 0.0015, "step": 343100 }, { "epoch": 40.48489853704578, "grad_norm": 0.06939822435379028, "learning_rate": 8.712493500314572e-06, "loss": 0.001, "step": 343150 }, { "epoch": 40.49079754601227, "grad_norm": 0.0016507473774254322, "learning_rate": 8.702018750236356e-06, "loss": 0.001, "step": 343200 }, { "epoch": 40.49669655497876, "grad_norm": 0.1837117075920105, "learning_rate": 8.691549700469981e-06, "loss": 0.0007, "step": 343250 }, { "epoch": 40.502595563945256, "grad_norm": 0.004975460469722748, "learning_rate": 8.681086352460466e-06, "loss": 0.0011, "step": 343300 }, { "epoch": 40.50849457291175, "grad_norm": 0.0005997925763949752, "learning_rate": 8.670628707652068e-06, "loss": 0.0009, "step": 343350 }, { "epoch": 40.514393581878245, "grad_norm": 0.0586472786962986, "learning_rate": 8.660176767488237e-06, "loss": 0.0007, "step": 343400 }, { "epoch": 40.52029259084474, "grad_norm": 0.001601343508809805, "learning_rate": 8.649730533411643e-06, "loss": 0.0011, "step": 343450 }, { "epoch": 40.526191599811234, "grad_norm": 0.0005567051121033728, "learning_rate": 8.639290006864153e-06, "loss": 0.0005, "step": 343500 }, { "epoch": 40.53209060877772, "grad_norm": 0.13156141340732574, "learning_rate": 8.628855189286895e-06, "loss": 0.0012, "step": 343550 }, { "epoch": 40.537989617744216, "grad_norm": 0.0054061864502727985, "learning_rate": 8.618426082120146e-06, "loss": 0.001, "step": 343600 }, { "epoch": 40.54388862671071, "grad_norm": 0.0012466454645618796, "learning_rate": 8.60800268680344e-06, "loss": 0.0008, "step": 343650 }, { "epoch": 40.549787635677205, "grad_norm": 0.001379982102662325, "learning_rate": 8.597585004775483e-06, "loss": 0.0012, "step": 343700 }, { "epoch": 40.5556866446437, "grad_norm": 0.08048925548791885, "learning_rate": 8.58717303747424e-06, "loss": 0.0007, "step": 343750 }, { "epoch": 40.561585653610194, "grad_norm": 0.05233379080891609, "learning_rate": 8.576766786336854e-06, "loss": 0.0008, "step": 343800 }, { "epoch": 40.56748466257669, "grad_norm": 0.21644005179405212, "learning_rate": 8.56636625279969e-06, "loss": 0.0011, "step": 343850 }, { "epoch": 40.57338367154318, "grad_norm": 0.030899008736014366, "learning_rate": 8.555971438298316e-06, "loss": 0.0008, "step": 343900 }, { "epoch": 40.57928268050967, "grad_norm": 0.12103582173585892, "learning_rate": 8.545582344267521e-06, "loss": 0.0008, "step": 343950 }, { "epoch": 40.585181689476165, "grad_norm": 0.00673525407910347, "learning_rate": 8.535198972141295e-06, "loss": 0.0005, "step": 344000 }, { "epoch": 40.585181689476165, "eval_cer": 0.08605851979345955, "eval_loss": 0.0005221301107667387, "eval_runtime": 2.05, "eval_samples_per_second": 48.781, "eval_steps_per_second": 1.951, "eval_wer": 0.27, "step": 344000 }, { "epoch": 40.59108069844266, "grad_norm": 0.0019103308441117406, "learning_rate": 8.524821323352845e-06, "loss": 0.0007, "step": 344050 }, { "epoch": 40.596979707409155, "grad_norm": 0.003008270403370261, "learning_rate": 8.514449399334584e-06, "loss": 0.0007, "step": 344100 }, { "epoch": 40.60287871637565, "grad_norm": 0.001716292928904295, "learning_rate": 8.504083201518143e-06, "loss": 0.0009, "step": 344150 }, { "epoch": 40.608777725342144, "grad_norm": 0.05275936424732208, "learning_rate": 8.493722731334347e-06, "loss": 0.0009, "step": 344200 }, { "epoch": 40.61467673430864, "grad_norm": 0.07715974003076553, "learning_rate": 8.483367990213225e-06, "loss": 0.0006, "step": 344250 }, { "epoch": 40.62057574327513, "grad_norm": 0.002098744036629796, "learning_rate": 8.473018979584069e-06, "loss": 0.0008, "step": 344300 }, { "epoch": 40.62647475224162, "grad_norm": 0.0003161120694130659, "learning_rate": 8.462675700875316e-06, "loss": 0.0006, "step": 344350 }, { "epoch": 40.632373761208115, "grad_norm": 0.1323244422674179, "learning_rate": 8.452338155514644e-06, "loss": 0.0007, "step": 344400 }, { "epoch": 40.63827277017461, "grad_norm": 0.10139945894479752, "learning_rate": 8.442006344928927e-06, "loss": 0.0006, "step": 344450 }, { "epoch": 40.644171779141104, "grad_norm": 0.0018598485039547086, "learning_rate": 8.43168027054425e-06, "loss": 0.0004, "step": 344500 }, { "epoch": 40.6500707881076, "grad_norm": 0.00147426372859627, "learning_rate": 8.421359933785916e-06, "loss": 0.0008, "step": 344550 }, { "epoch": 40.65596979707409, "grad_norm": 0.09385184943675995, "learning_rate": 8.411045336078427e-06, "loss": 0.0006, "step": 344600 }, { "epoch": 40.66186880604059, "grad_norm": 0.03856248781085014, "learning_rate": 8.400736478845495e-06, "loss": 0.0009, "step": 344650 }, { "epoch": 40.66776781500708, "grad_norm": 0.0017411690205335617, "learning_rate": 8.390433363510036e-06, "loss": 0.0007, "step": 344700 }, { "epoch": 40.67366682397357, "grad_norm": 0.0012079813750460744, "learning_rate": 8.380135991494181e-06, "loss": 0.0006, "step": 344750 }, { "epoch": 40.679565832940064, "grad_norm": 0.007507613860070705, "learning_rate": 8.369844364219264e-06, "loss": 0.0009, "step": 344800 }, { "epoch": 40.68546484190656, "grad_norm": 0.052883248776197433, "learning_rate": 8.359558483105811e-06, "loss": 0.0012, "step": 344850 }, { "epoch": 40.69136385087305, "grad_norm": 0.0007358857546932995, "learning_rate": 8.349278349573597e-06, "loss": 0.0007, "step": 344900 }, { "epoch": 40.69726285983955, "grad_norm": 0.1305350363254547, "learning_rate": 8.339003965041565e-06, "loss": 0.0004, "step": 344950 }, { "epoch": 40.70316186880604, "grad_norm": 0.23796306550502777, "learning_rate": 8.328735330927873e-06, "loss": 0.0007, "step": 345000 }, { "epoch": 40.70316186880604, "eval_cer": 0.08605851979345955, "eval_loss": 0.00043597404146566987, "eval_runtime": 2.0762, "eval_samples_per_second": 48.165, "eval_steps_per_second": 1.927, "eval_wer": 0.27, "step": 345000 }, { "epoch": 40.70906087777254, "grad_norm": 0.000662688456941396, "learning_rate": 8.31847244864989e-06, "loss": 0.0009, "step": 345050 }, { "epoch": 40.71495988673903, "grad_norm": 0.0027264279779046774, "learning_rate": 8.308215319624185e-06, "loss": 0.0009, "step": 345100 }, { "epoch": 40.72085889570552, "grad_norm": 0.014350242912769318, "learning_rate": 8.297963945266551e-06, "loss": 0.0008, "step": 345150 }, { "epoch": 40.72675790467201, "grad_norm": 0.0040205963887274265, "learning_rate": 8.287718326991961e-06, "loss": 0.0013, "step": 345200 }, { "epoch": 40.73265691363851, "grad_norm": 0.23609386384487152, "learning_rate": 8.277478466214611e-06, "loss": 0.0012, "step": 345250 }, { "epoch": 40.738555922605, "grad_norm": 0.12378836423158646, "learning_rate": 8.267244364347893e-06, "loss": 0.0009, "step": 345300 }, { "epoch": 40.7444549315715, "grad_norm": 0.00019591966702137142, "learning_rate": 8.257016022804408e-06, "loss": 0.0005, "step": 345350 }, { "epoch": 40.75035394053799, "grad_norm": 0.03531147167086601, "learning_rate": 8.246793442995954e-06, "loss": 0.0007, "step": 345400 }, { "epoch": 40.756252949504486, "grad_norm": 0.0035045507829636335, "learning_rate": 8.236576626333569e-06, "loss": 0.0005, "step": 345450 }, { "epoch": 40.76215195847098, "grad_norm": 0.0003075193671975285, "learning_rate": 8.226365574227446e-06, "loss": 0.0011, "step": 345500 }, { "epoch": 40.76805096743747, "grad_norm": 0.03532494604587555, "learning_rate": 8.21616028808701e-06, "loss": 0.0005, "step": 345550 }, { "epoch": 40.77394997640396, "grad_norm": 0.029195912182331085, "learning_rate": 8.205960769320876e-06, "loss": 0.0009, "step": 345600 }, { "epoch": 40.77984898537046, "grad_norm": 0.005677587352693081, "learning_rate": 8.195767019336886e-06, "loss": 0.0006, "step": 345650 }, { "epoch": 40.78574799433695, "grad_norm": 0.057205680757761, "learning_rate": 8.185579039542069e-06, "loss": 0.0011, "step": 345700 }, { "epoch": 40.791647003303446, "grad_norm": 0.0012333921622484922, "learning_rate": 8.175396831342652e-06, "loss": 0.0006, "step": 345750 }, { "epoch": 40.79754601226994, "grad_norm": 0.0017479672096669674, "learning_rate": 8.165220396144085e-06, "loss": 0.0009, "step": 345800 }, { "epoch": 40.803445021236435, "grad_norm": 0.015316251665353775, "learning_rate": 8.155049735350994e-06, "loss": 0.001, "step": 345850 }, { "epoch": 40.80934403020293, "grad_norm": 0.020041022449731827, "learning_rate": 8.144884850367235e-06, "loss": 0.0005, "step": 345900 }, { "epoch": 40.81524303916942, "grad_norm": 0.010676142759621143, "learning_rate": 8.13472574259585e-06, "loss": 0.0008, "step": 345950 }, { "epoch": 40.82114204813591, "grad_norm": 0.03403623774647713, "learning_rate": 8.12457241343909e-06, "loss": 0.0006, "step": 346000 }, { "epoch": 40.82114204813591, "eval_cer": 0.08347676419965576, "eval_loss": 0.0002594063989818096, "eval_runtime": 2.019, "eval_samples_per_second": 49.529, "eval_steps_per_second": 1.981, "eval_wer": 0.26, "step": 346000 }, { "epoch": 40.827041057102406, "grad_norm": 0.28686395287513733, "learning_rate": 8.114424864298408e-06, "loss": 0.0011, "step": 346050 }, { "epoch": 40.8329400660689, "grad_norm": 0.0005770008428953588, "learning_rate": 8.104283096574461e-06, "loss": 0.0006, "step": 346100 }, { "epoch": 40.838839075035395, "grad_norm": 0.25658443570137024, "learning_rate": 8.094147111667094e-06, "loss": 0.001, "step": 346150 }, { "epoch": 40.84473808400189, "grad_norm": 0.003243648447096348, "learning_rate": 8.084016910975367e-06, "loss": 0.0009, "step": 346200 }, { "epoch": 40.850637092968384, "grad_norm": 0.06310776621103287, "learning_rate": 8.073892495897555e-06, "loss": 0.0007, "step": 346250 }, { "epoch": 40.85653610193488, "grad_norm": 0.009345601312816143, "learning_rate": 8.06377386783111e-06, "loss": 0.0006, "step": 346300 }, { "epoch": 40.862435110901366, "grad_norm": 0.0018104850314557552, "learning_rate": 8.053661028172692e-06, "loss": 0.0009, "step": 346350 }, { "epoch": 40.86833411986786, "grad_norm": 0.02610086090862751, "learning_rate": 8.043553978318169e-06, "loss": 0.0008, "step": 346400 }, { "epoch": 40.874233128834355, "grad_norm": 0.016195090487599373, "learning_rate": 8.033452719662598e-06, "loss": 0.0005, "step": 346450 }, { "epoch": 40.88013213780085, "grad_norm": 0.07078031450510025, "learning_rate": 8.02335725360025e-06, "loss": 0.0008, "step": 346500 }, { "epoch": 40.886031146767344, "grad_norm": 0.0005497426027432084, "learning_rate": 8.013267581524591e-06, "loss": 0.0007, "step": 346550 }, { "epoch": 40.89193015573384, "grad_norm": 0.005715323146432638, "learning_rate": 8.003183704828282e-06, "loss": 0.001, "step": 346600 }, { "epoch": 40.89782916470033, "grad_norm": 0.00016634933126624674, "learning_rate": 7.99310562490319e-06, "loss": 0.0005, "step": 346650 }, { "epoch": 40.90372817366682, "grad_norm": 0.0010642033303156495, "learning_rate": 7.983033343140389e-06, "loss": 0.0006, "step": 346700 }, { "epoch": 40.909627182633315, "grad_norm": 0.0054057687520980835, "learning_rate": 7.97296686093013e-06, "loss": 0.0015, "step": 346750 }, { "epoch": 40.91552619159981, "grad_norm": 0.001713036559522152, "learning_rate": 7.962906179661872e-06, "loss": 0.0011, "step": 346800 }, { "epoch": 40.921425200566304, "grad_norm": 0.07986187934875488, "learning_rate": 7.952851300724307e-06, "loss": 0.0012, "step": 346850 }, { "epoch": 40.9273242095328, "grad_norm": 0.020050855353474617, "learning_rate": 7.94280222550528e-06, "loss": 0.0005, "step": 346900 }, { "epoch": 40.93322321849929, "grad_norm": 0.0018899559509009123, "learning_rate": 7.932758955391856e-06, "loss": 0.0009, "step": 346950 }, { "epoch": 40.93912222746579, "grad_norm": 0.058086227625608444, "learning_rate": 7.922721491770296e-06, "loss": 0.0008, "step": 347000 }, { "epoch": 40.93912222746579, "eval_cer": 0.08347676419965576, "eval_loss": 0.00018142512999475002, "eval_runtime": 2.0413, "eval_samples_per_second": 48.988, "eval_steps_per_second": 1.96, "eval_wer": 0.26, "step": 347000 }, { "epoch": 40.94502123643228, "grad_norm": 0.010989953763782978, "learning_rate": 7.912689836026043e-06, "loss": 0.0009, "step": 347050 }, { "epoch": 40.95092024539877, "grad_norm": 0.005253213457763195, "learning_rate": 7.902663989543784e-06, "loss": 0.0011, "step": 347100 }, { "epoch": 40.956819254365264, "grad_norm": 0.0001426953385816887, "learning_rate": 7.892643953707362e-06, "loss": 0.0011, "step": 347150 }, { "epoch": 40.96271826333176, "grad_norm": 0.0060187336057424545, "learning_rate": 7.882629729899831e-06, "loss": 0.001, "step": 347200 }, { "epoch": 40.96861727229825, "grad_norm": 0.0025086035020649433, "learning_rate": 7.872621319503442e-06, "loss": 0.0006, "step": 347250 }, { "epoch": 40.97451628126475, "grad_norm": 0.014835893176496029, "learning_rate": 7.86261872389964e-06, "loss": 0.0013, "step": 347300 }, { "epoch": 40.98041529023124, "grad_norm": 0.030771812424063683, "learning_rate": 7.85262194446908e-06, "loss": 0.0008, "step": 347350 }, { "epoch": 40.98631429919774, "grad_norm": 0.003347455756738782, "learning_rate": 7.842630982591597e-06, "loss": 0.001, "step": 347400 }, { "epoch": 40.99221330816423, "grad_norm": 0.001966438489034772, "learning_rate": 7.832645839646235e-06, "loss": 0.0011, "step": 347450 }, { "epoch": 40.99811231713072, "grad_norm": 0.14203926920890808, "learning_rate": 7.822666517011234e-06, "loss": 0.0006, "step": 347500 }, { "epoch": 41.00401132609721, "grad_norm": 0.07534188777208328, "learning_rate": 7.812693016064015e-06, "loss": 0.0008, "step": 347550 }, { "epoch": 41.00991033506371, "grad_norm": 0.02944064885377884, "learning_rate": 7.802725338181232e-06, "loss": 0.0006, "step": 347600 }, { "epoch": 41.0158093440302, "grad_norm": 0.018587231636047363, "learning_rate": 7.792763484738697e-06, "loss": 0.0006, "step": 347650 }, { "epoch": 41.0217083529967, "grad_norm": 0.0019894689321517944, "learning_rate": 7.782807457111435e-06, "loss": 0.0006, "step": 347700 }, { "epoch": 41.02760736196319, "grad_norm": 0.009761249646544456, "learning_rate": 7.772857256673672e-06, "loss": 0.0007, "step": 347750 }, { "epoch": 41.033506370929686, "grad_norm": 0.005890064872801304, "learning_rate": 7.762912884798812e-06, "loss": 0.0007, "step": 347800 }, { "epoch": 41.03940537989618, "grad_norm": 0.003499580081552267, "learning_rate": 7.752974342859464e-06, "loss": 0.0009, "step": 347850 }, { "epoch": 41.04530438886267, "grad_norm": 0.10721154510974884, "learning_rate": 7.743041632227444e-06, "loss": 0.0005, "step": 347900 }, { "epoch": 41.05120339782916, "grad_norm": 0.20213428139686584, "learning_rate": 7.733114754273746e-06, "loss": 0.001, "step": 347950 }, { "epoch": 41.05710240679566, "grad_norm": 0.006317604798823595, "learning_rate": 7.723193710368565e-06, "loss": 0.0007, "step": 348000 }, { "epoch": 41.05710240679566, "eval_cer": 0.08347676419965576, "eval_loss": 0.00010593948536552489, "eval_runtime": 2.077, "eval_samples_per_second": 48.147, "eval_steps_per_second": 1.926, "eval_wer": 0.26, "step": 348000 }, { "epoch": 41.06300141576215, "grad_norm": 0.07985728979110718, "learning_rate": 7.713278501881293e-06, "loss": 0.001, "step": 348050 }, { "epoch": 41.068900424728646, "grad_norm": 0.18512161076068878, "learning_rate": 7.703369130180516e-06, "loss": 0.0014, "step": 348100 }, { "epoch": 41.07479943369514, "grad_norm": 0.07518848031759262, "learning_rate": 7.693465596633992e-06, "loss": 0.0014, "step": 348150 }, { "epoch": 41.080698442661635, "grad_norm": 0.00015189015539363027, "learning_rate": 7.683567902608729e-06, "loss": 0.0007, "step": 348200 }, { "epoch": 41.08659745162813, "grad_norm": 0.14453740417957306, "learning_rate": 7.673676049470873e-06, "loss": 0.001, "step": 348250 }, { "epoch": 41.09249646059462, "grad_norm": 0.006626752205193043, "learning_rate": 7.663790038585793e-06, "loss": 0.0003, "step": 348300 }, { "epoch": 41.09839546956111, "grad_norm": 0.013808149844408035, "learning_rate": 7.653909871318044e-06, "loss": 0.0011, "step": 348350 }, { "epoch": 41.104294478527606, "grad_norm": 0.03369902819395065, "learning_rate": 7.644035549031365e-06, "loss": 0.0005, "step": 348400 }, { "epoch": 41.1101934874941, "grad_norm": 0.03938049077987671, "learning_rate": 7.6341670730887e-06, "loss": 0.0009, "step": 348450 }, { "epoch": 41.116092496460595, "grad_norm": 0.07442024350166321, "learning_rate": 7.624304444852188e-06, "loss": 0.0007, "step": 348500 }, { "epoch": 41.12199150542709, "grad_norm": 0.012037307024002075, "learning_rate": 7.61444766568315e-06, "loss": 0.0005, "step": 348550 }, { "epoch": 41.127890514393584, "grad_norm": 0.00304055237211287, "learning_rate": 7.604596736942116e-06, "loss": 0.001, "step": 348600 }, { "epoch": 41.13378952336008, "grad_norm": 0.04221036657691002, "learning_rate": 7.594751659988785e-06, "loss": 0.0009, "step": 348650 }, { "epoch": 41.139688532326566, "grad_norm": 0.05107981711626053, "learning_rate": 7.584912436182057e-06, "loss": 0.0007, "step": 348700 }, { "epoch": 41.14558754129306, "grad_norm": 0.010286866687238216, "learning_rate": 7.5750790668800475e-06, "loss": 0.0009, "step": 348750 }, { "epoch": 41.151486550259555, "grad_norm": 0.0017680932069197297, "learning_rate": 7.565251553440039e-06, "loss": 0.001, "step": 348800 }, { "epoch": 41.15738555922605, "grad_norm": 0.0016149056609719992, "learning_rate": 7.5554298972185074e-06, "loss": 0.0009, "step": 348850 }, { "epoch": 41.163284568192545, "grad_norm": 0.005089223384857178, "learning_rate": 7.5456140995711185e-06, "loss": 0.0007, "step": 348900 }, { "epoch": 41.16918357715904, "grad_norm": 0.050575654953718185, "learning_rate": 7.53580416185275e-06, "loss": 0.0004, "step": 348950 }, { "epoch": 41.175082586125534, "grad_norm": 0.05950643867254257, "learning_rate": 7.526000085417451e-06, "loss": 0.0007, "step": 349000 }, { "epoch": 41.175082586125534, "eval_cer": 0.08347676419965576, "eval_loss": 8.538734982721508e-05, "eval_runtime": 2.0952, "eval_samples_per_second": 47.728, "eval_steps_per_second": 1.909, "eval_wer": 0.26, "step": 349000 }, { "epoch": 41.18098159509202, "grad_norm": 0.09489646553993225, "learning_rate": 7.516201871618461e-06, "loss": 0.0005, "step": 349050 }, { "epoch": 41.186880604058516, "grad_norm": 0.5077330470085144, "learning_rate": 7.506409521808222e-06, "loss": 0.0006, "step": 349100 }, { "epoch": 41.19277961302501, "grad_norm": 0.001131537021137774, "learning_rate": 7.4966230373383574e-06, "loss": 0.0008, "step": 349150 }, { "epoch": 41.198678621991505, "grad_norm": 0.05739215388894081, "learning_rate": 7.486842419559681e-06, "loss": 0.0012, "step": 349200 }, { "epoch": 41.204577630958, "grad_norm": 0.01231149211525917, "learning_rate": 7.4770676698222076e-06, "loss": 0.001, "step": 349250 }, { "epoch": 41.210476639924494, "grad_norm": 0.007186174392700195, "learning_rate": 7.46729878947513e-06, "loss": 0.0004, "step": 349300 }, { "epoch": 41.21637564889099, "grad_norm": 0.011913393624126911, "learning_rate": 7.457535779866836e-06, "loss": 0.0008, "step": 349350 }, { "epoch": 41.22227465785748, "grad_norm": 0.0016977203777059913, "learning_rate": 7.4477786423448994e-06, "loss": 0.0007, "step": 349400 }, { "epoch": 41.22817366682397, "grad_norm": 0.032164718955755234, "learning_rate": 7.438027378256091e-06, "loss": 0.0011, "step": 349450 }, { "epoch": 41.234072675790465, "grad_norm": 0.08173463493585587, "learning_rate": 7.428281988946351e-06, "loss": 0.001, "step": 349500 }, { "epoch": 41.23997168475696, "grad_norm": 0.020709797739982605, "learning_rate": 7.4185424757608535e-06, "loss": 0.0005, "step": 349550 }, { "epoch": 41.245870693723454, "grad_norm": 0.0002839227963704616, "learning_rate": 7.408808840043913e-06, "loss": 0.001, "step": 349600 }, { "epoch": 41.25176970268995, "grad_norm": 0.0009718936635181308, "learning_rate": 7.399081083139053e-06, "loss": 0.0007, "step": 349650 }, { "epoch": 41.25766871165644, "grad_norm": 0.00018503711908124387, "learning_rate": 7.389359206388985e-06, "loss": 0.0006, "step": 349700 }, { "epoch": 41.26356772062294, "grad_norm": 0.01609683409333229, "learning_rate": 7.3796432111356114e-06, "loss": 0.0006, "step": 349750 }, { "epoch": 41.26946672958943, "grad_norm": 0.0028880739118903875, "learning_rate": 7.369933098720022e-06, "loss": 0.0009, "step": 349800 }, { "epoch": 41.27536573855592, "grad_norm": 0.0010156150674447417, "learning_rate": 7.360228870482483e-06, "loss": 0.0013, "step": 349850 }, { "epoch": 41.281264747522414, "grad_norm": 0.01131818350404501, "learning_rate": 7.3505305277624614e-06, "loss": 0.0006, "step": 349900 }, { "epoch": 41.28716375648891, "grad_norm": 0.2106054127216339, "learning_rate": 7.3408380718986145e-06, "loss": 0.0012, "step": 349950 }, { "epoch": 41.2930627654554, "grad_norm": 0.004090623464435339, "learning_rate": 7.331151504228767e-06, "loss": 0.0008, "step": 350000 }, { "epoch": 41.2930627654554, "eval_cer": 0.08347676419965576, "eval_loss": 7.234114309540018e-05, "eval_runtime": 2.0525, "eval_samples_per_second": 48.721, "eval_steps_per_second": 1.949, "eval_wer": 0.26, "step": 350000 }, { "epoch": 41.2989617744219, "grad_norm": 0.00350520433858037, "learning_rate": 7.32147082608996e-06, "loss": 0.0011, "step": 350050 }, { "epoch": 41.30486078338839, "grad_norm": 0.8324800133705139, "learning_rate": 7.311796038818386e-06, "loss": 0.0007, "step": 350100 }, { "epoch": 41.31075979235489, "grad_norm": 0.001768268528394401, "learning_rate": 7.302127143749466e-06, "loss": 0.001, "step": 350150 }, { "epoch": 41.31665880132138, "grad_norm": 0.003917486406862736, "learning_rate": 7.292464142217775e-06, "loss": 0.0012, "step": 350200 }, { "epoch": 41.32255781028787, "grad_norm": 0.004311677999794483, "learning_rate": 7.282807035557093e-06, "loss": 0.0004, "step": 350250 }, { "epoch": 41.32845681925436, "grad_norm": 0.001391459722071886, "learning_rate": 7.273155825100375e-06, "loss": 0.0008, "step": 350300 }, { "epoch": 41.33435582822086, "grad_norm": 0.0010481758508831263, "learning_rate": 7.26351051217975e-06, "loss": 0.0008, "step": 350350 }, { "epoch": 41.34025483718735, "grad_norm": 0.0005782524240203202, "learning_rate": 7.25387109812658e-06, "loss": 0.0005, "step": 350400 }, { "epoch": 41.34615384615385, "grad_norm": 0.000521628768183291, "learning_rate": 7.244237584271363e-06, "loss": 0.001, "step": 350450 }, { "epoch": 41.35205285512034, "grad_norm": 0.0017388396663591266, "learning_rate": 7.2346099719438065e-06, "loss": 0.0009, "step": 350500 }, { "epoch": 41.357951864086836, "grad_norm": 0.014179384335875511, "learning_rate": 7.224988262472798e-06, "loss": 0.0009, "step": 350550 }, { "epoch": 41.36385087305333, "grad_norm": 0.03202246502041817, "learning_rate": 7.215372457186415e-06, "loss": 0.0004, "step": 350600 }, { "epoch": 41.36974988201982, "grad_norm": 0.03103979304432869, "learning_rate": 7.205762557411905e-06, "loss": 0.0012, "step": 350650 }, { "epoch": 41.37564889098631, "grad_norm": 0.006808100268244743, "learning_rate": 7.196158564475719e-06, "loss": 0.0006, "step": 350700 }, { "epoch": 41.38154789995281, "grad_norm": 0.3058081865310669, "learning_rate": 7.186560479703481e-06, "loss": 0.0008, "step": 350750 }, { "epoch": 41.3874469089193, "grad_norm": 0.0014559067785739899, "learning_rate": 7.176968304420007e-06, "loss": 0.0007, "step": 350800 }, { "epoch": 41.393345917885796, "grad_norm": 0.008187053725123405, "learning_rate": 7.167382039949277e-06, "loss": 0.0006, "step": 350850 }, { "epoch": 41.39924492685229, "grad_norm": 0.0011359311174601316, "learning_rate": 7.157801687614501e-06, "loss": 0.0011, "step": 350900 }, { "epoch": 41.405143935818785, "grad_norm": 0.0009523017797619104, "learning_rate": 7.148227248738032e-06, "loss": 0.0005, "step": 350950 }, { "epoch": 41.41104294478528, "grad_norm": 0.01993557997047901, "learning_rate": 7.138658724641417e-06, "loss": 0.001, "step": 351000 }, { "epoch": 41.41104294478528, "eval_cer": 0.08347676419965576, "eval_loss": 6.95917769917287e-05, "eval_runtime": 2.0374, "eval_samples_per_second": 49.081, "eval_steps_per_second": 1.963, "eval_wer": 0.26, "step": 351000 }, { "epoch": 41.41694195375177, "grad_norm": 0.0054412586614489555, "learning_rate": 7.129096116645384e-06, "loss": 0.0007, "step": 351050 }, { "epoch": 41.42284096271826, "grad_norm": 0.004358604084700346, "learning_rate": 7.119539426069849e-06, "loss": 0.0005, "step": 351100 }, { "epoch": 41.428739971684756, "grad_norm": 0.005451158620417118, "learning_rate": 7.109988654233918e-06, "loss": 0.0009, "step": 351150 }, { "epoch": 41.43463898065125, "grad_norm": 0.0017486072611063719, "learning_rate": 7.10044380245587e-06, "loss": 0.0005, "step": 351200 }, { "epoch": 41.440537989617745, "grad_norm": 0.0003974183928221464, "learning_rate": 7.090904872053167e-06, "loss": 0.0007, "step": 351250 }, { "epoch": 41.44643699858424, "grad_norm": 0.010026858188211918, "learning_rate": 7.081371864342456e-06, "loss": 0.0004, "step": 351300 }, { "epoch": 41.452336007550734, "grad_norm": 0.08205172419548035, "learning_rate": 7.071844780639569e-06, "loss": 0.0007, "step": 351350 }, { "epoch": 41.45823501651722, "grad_norm": 0.021598976105451584, "learning_rate": 7.0623236222595144e-06, "loss": 0.0007, "step": 351400 }, { "epoch": 41.464134025483716, "grad_norm": 0.018877269700169563, "learning_rate": 7.05280839051648e-06, "loss": 0.0011, "step": 351450 }, { "epoch": 41.47003303445021, "grad_norm": 0.000168826591107063, "learning_rate": 7.0432990867238644e-06, "loss": 0.0011, "step": 351500 }, { "epoch": 41.475932043416705, "grad_norm": 0.0004840423061978072, "learning_rate": 7.033795712194214e-06, "loss": 0.001, "step": 351550 }, { "epoch": 41.4818310523832, "grad_norm": 0.04198811948299408, "learning_rate": 7.0242982682392655e-06, "loss": 0.0008, "step": 351600 }, { "epoch": 41.487730061349694, "grad_norm": 0.18169385194778442, "learning_rate": 7.014806756169945e-06, "loss": 0.0007, "step": 351650 }, { "epoch": 41.49362907031619, "grad_norm": 0.003833583788946271, "learning_rate": 7.005321177296348e-06, "loss": 0.0009, "step": 351700 }, { "epoch": 41.49952807928268, "grad_norm": 0.07688494026660919, "learning_rate": 6.995841532927761e-06, "loss": 0.0012, "step": 351750 }, { "epoch": 41.50542708824917, "grad_norm": 0.0023335956502705812, "learning_rate": 6.986367824372647e-06, "loss": 0.001, "step": 351800 }, { "epoch": 41.511326097215665, "grad_norm": 0.002733763074502349, "learning_rate": 6.976900052938656e-06, "loss": 0.0006, "step": 351850 }, { "epoch": 41.51722510618216, "grad_norm": 0.0031012187246233225, "learning_rate": 6.96743821993261e-06, "loss": 0.0004, "step": 351900 }, { "epoch": 41.523124115148654, "grad_norm": 0.0015807247254997492, "learning_rate": 6.957982326660512e-06, "loss": 0.0008, "step": 351950 }, { "epoch": 41.52902312411515, "grad_norm": 0.00017719513562042266, "learning_rate": 6.948532374427541e-06, "loss": 0.0012, "step": 352000 }, { "epoch": 41.52902312411515, "eval_cer": 0.08347676419965576, "eval_loss": 8.793516462901607e-05, "eval_runtime": 2.0989, "eval_samples_per_second": 47.644, "eval_steps_per_second": 1.906, "eval_wer": 0.26, "step": 352000 }, { "epoch": 41.53492213308164, "grad_norm": 0.14451883733272552, "learning_rate": 6.939088364538082e-06, "loss": 0.0006, "step": 352050 }, { "epoch": 41.54082114204814, "grad_norm": 0.13360495865345, "learning_rate": 6.92965029829567e-06, "loss": 0.0005, "step": 352100 }, { "epoch": 41.54672015101463, "grad_norm": 0.028464114293456078, "learning_rate": 6.920218177003035e-06, "loss": 0.0005, "step": 352150 }, { "epoch": 41.55261915998112, "grad_norm": 0.08045613020658493, "learning_rate": 6.9107920019620635e-06, "loss": 0.0009, "step": 352200 }, { "epoch": 41.558518168947614, "grad_norm": 0.014884008094668388, "learning_rate": 6.901371774473864e-06, "loss": 0.0006, "step": 352250 }, { "epoch": 41.56441717791411, "grad_norm": 0.03229975327849388, "learning_rate": 6.891957495838685e-06, "loss": 0.001, "step": 352300 }, { "epoch": 41.5703161868806, "grad_norm": 0.007127933204174042, "learning_rate": 6.882549167355978e-06, "loss": 0.0007, "step": 352350 }, { "epoch": 41.5762151958471, "grad_norm": 0.001682017114944756, "learning_rate": 6.8731467903243586e-06, "loss": 0.0008, "step": 352400 }, { "epoch": 41.58211420481359, "grad_norm": 0.0038364259526133537, "learning_rate": 6.8637503660416215e-06, "loss": 0.0007, "step": 352450 }, { "epoch": 41.58801321378009, "grad_norm": 0.024098053574562073, "learning_rate": 6.854359895804746e-06, "loss": 0.0007, "step": 352500 }, { "epoch": 41.59391222274658, "grad_norm": 0.035721659660339355, "learning_rate": 6.84497538090989e-06, "loss": 0.0008, "step": 352550 }, { "epoch": 41.59981123171307, "grad_norm": 6.508450314868242e-05, "learning_rate": 6.835596822652391e-06, "loss": 0.0005, "step": 352600 }, { "epoch": 41.605710240679564, "grad_norm": 0.032231416553258896, "learning_rate": 6.826224222326749e-06, "loss": 0.0007, "step": 352650 }, { "epoch": 41.61160924964606, "grad_norm": 0.0014910395257174969, "learning_rate": 6.816857581226665e-06, "loss": 0.0006, "step": 352700 }, { "epoch": 41.61750825861255, "grad_norm": 0.008076024241745472, "learning_rate": 6.807496900644999e-06, "loss": 0.0005, "step": 352750 }, { "epoch": 41.62340726757905, "grad_norm": 0.11915381997823715, "learning_rate": 6.7981421818737846e-06, "loss": 0.0007, "step": 352800 }, { "epoch": 41.62930627654554, "grad_norm": 0.011552863754332066, "learning_rate": 6.788793426204265e-06, "loss": 0.0008, "step": 352850 }, { "epoch": 41.635205285512036, "grad_norm": 0.021463483572006226, "learning_rate": 6.779450634926826e-06, "loss": 0.0008, "step": 352900 }, { "epoch": 41.64110429447853, "grad_norm": 0.0013977477792650461, "learning_rate": 6.770113809331041e-06, "loss": 0.0008, "step": 352950 }, { "epoch": 41.64700330344502, "grad_norm": 0.0018811410991474986, "learning_rate": 6.760782950705663e-06, "loss": 0.0011, "step": 353000 }, { "epoch": 41.64700330344502, "eval_cer": 0.08347676419965576, "eval_loss": 8.495435758959502e-05, "eval_runtime": 2.0609, "eval_samples_per_second": 48.523, "eval_steps_per_second": 1.941, "eval_wer": 0.26, "step": 353000 }, { "epoch": 41.65290231241151, "grad_norm": 0.03396933153271675, "learning_rate": 6.751458060338622e-06, "loss": 0.0006, "step": 353050 }, { "epoch": 41.65880132137801, "grad_norm": 0.00016813365800771862, "learning_rate": 6.742139139517023e-06, "loss": 0.0011, "step": 353100 }, { "epoch": 41.6647003303445, "grad_norm": 0.021483203396201134, "learning_rate": 6.732826189527136e-06, "loss": 0.0005, "step": 353150 }, { "epoch": 41.670599339310996, "grad_norm": 0.03014030121266842, "learning_rate": 6.723519211654422e-06, "loss": 0.0008, "step": 353200 }, { "epoch": 41.67649834827749, "grad_norm": 0.0007151723839342594, "learning_rate": 6.7142182071835145e-06, "loss": 0.0009, "step": 353250 }, { "epoch": 41.682397357243985, "grad_norm": 0.00224220659583807, "learning_rate": 6.70492317739822e-06, "loss": 0.0009, "step": 353300 }, { "epoch": 41.68829636621048, "grad_norm": 0.0016826341161504388, "learning_rate": 6.695634123581518e-06, "loss": 0.0009, "step": 353350 }, { "epoch": 41.69419537517697, "grad_norm": 0.008265402168035507, "learning_rate": 6.686351047015554e-06, "loss": 0.0007, "step": 353400 }, { "epoch": 41.70009438414346, "grad_norm": 0.0004506933328229934, "learning_rate": 6.677073948981682e-06, "loss": 0.0005, "step": 353450 }, { "epoch": 41.70599339310996, "grad_norm": 0.2706994414329529, "learning_rate": 6.6678028307604054e-06, "loss": 0.0007, "step": 353500 }, { "epoch": 41.71189240207645, "grad_norm": 0.009250911884009838, "learning_rate": 6.6585376936313946e-06, "loss": 0.0002, "step": 353550 }, { "epoch": 41.717791411042946, "grad_norm": 0.23959340155124664, "learning_rate": 6.649278538873515e-06, "loss": 0.0015, "step": 353600 }, { "epoch": 41.72369042000944, "grad_norm": 0.0015280463267117739, "learning_rate": 6.6400253677647794e-06, "loss": 0.0013, "step": 353650 }, { "epoch": 41.729589428975935, "grad_norm": 0.0005718116881325841, "learning_rate": 6.630778181582414e-06, "loss": 0.0008, "step": 353700 }, { "epoch": 41.73548843794243, "grad_norm": 0.0004086898115929216, "learning_rate": 6.621536981602794e-06, "loss": 0.0006, "step": 353750 }, { "epoch": 41.74138744690892, "grad_norm": 0.06607384979724884, "learning_rate": 6.6123017691014645e-06, "loss": 0.0006, "step": 353800 }, { "epoch": 41.74728645587541, "grad_norm": 0.010943342931568623, "learning_rate": 6.60307254535315e-06, "loss": 0.0006, "step": 353850 }, { "epoch": 41.753185464841906, "grad_norm": 0.12698519229888916, "learning_rate": 6.593849311631755e-06, "loss": 0.0005, "step": 353900 }, { "epoch": 41.7590844738084, "grad_norm": 0.010583851486444473, "learning_rate": 6.58463206921035e-06, "loss": 0.0011, "step": 353950 }, { "epoch": 41.764983482774895, "grad_norm": 0.023415882140398026, "learning_rate": 6.575420819361177e-06, "loss": 0.0007, "step": 354000 }, { "epoch": 41.764983482774895, "eval_cer": 0.08347676419965576, "eval_loss": 0.00010429498797748238, "eval_runtime": 2.0415, "eval_samples_per_second": 48.983, "eval_steps_per_second": 1.959, "eval_wer": 0.26, "step": 354000 }, { "epoch": 41.77088249174139, "grad_norm": 0.06868886202573776, "learning_rate": 6.5662155633556545e-06, "loss": 0.0005, "step": 354050 }, { "epoch": 41.776781500707884, "grad_norm": 0.0012095881393179297, "learning_rate": 6.557016302464375e-06, "loss": 0.0009, "step": 354100 }, { "epoch": 41.78268050967438, "grad_norm": 0.0028435993008315563, "learning_rate": 6.54782303795709e-06, "loss": 0.0008, "step": 354150 }, { "epoch": 41.788579518640866, "grad_norm": 0.0017401310615241528, "learning_rate": 6.538635771102758e-06, "loss": 0.0008, "step": 354200 }, { "epoch": 41.79447852760736, "grad_norm": 0.0033562486059963703, "learning_rate": 6.529454503169474e-06, "loss": 0.0004, "step": 354250 }, { "epoch": 41.800377536573855, "grad_norm": 0.004526746459305286, "learning_rate": 6.5202792354245145e-06, "loss": 0.001, "step": 354300 }, { "epoch": 41.80627654554035, "grad_norm": 0.14899230003356934, "learning_rate": 6.511109969134338e-06, "loss": 0.0004, "step": 354350 }, { "epoch": 41.812175554506844, "grad_norm": 3.894860128639266e-05, "learning_rate": 6.501946705564565e-06, "loss": 0.0005, "step": 354400 }, { "epoch": 41.81807456347334, "grad_norm": 0.023896599188447, "learning_rate": 6.492789445979991e-06, "loss": 0.0007, "step": 354450 }, { "epoch": 41.82397357243983, "grad_norm": 0.003957816399633884, "learning_rate": 6.483638191644575e-06, "loss": 0.0008, "step": 354500 }, { "epoch": 41.82987258140632, "grad_norm": 0.13099540770053864, "learning_rate": 6.474492943821464e-06, "loss": 0.001, "step": 354550 }, { "epoch": 41.835771590372815, "grad_norm": 0.07097536325454712, "learning_rate": 6.4653537037729585e-06, "loss": 0.0011, "step": 354600 }, { "epoch": 41.84167059933931, "grad_norm": 0.0005154450773261487, "learning_rate": 6.456220472760544e-06, "loss": 0.0007, "step": 354650 }, { "epoch": 41.847569608305804, "grad_norm": 0.0027872039936482906, "learning_rate": 6.447093252044856e-06, "loss": 0.0007, "step": 354700 }, { "epoch": 41.8534686172723, "grad_norm": 0.011372909881174564, "learning_rate": 6.43797204288572e-06, "loss": 0.0007, "step": 354750 }, { "epoch": 41.85936762623879, "grad_norm": 0.009000582620501518, "learning_rate": 6.428856846542136e-06, "loss": 0.0009, "step": 354800 }, { "epoch": 41.86526663520529, "grad_norm": 0.1476360708475113, "learning_rate": 6.419747664272257e-06, "loss": 0.0004, "step": 354850 }, { "epoch": 41.87116564417178, "grad_norm": 0.05471205338835716, "learning_rate": 6.4106444973334125e-06, "loss": 0.001, "step": 354900 }, { "epoch": 41.87706465313827, "grad_norm": 0.01944637857377529, "learning_rate": 6.401547346982106e-06, "loss": 0.0008, "step": 354950 }, { "epoch": 41.882963662104764, "grad_norm": 0.09213699400424957, "learning_rate": 6.392456214473996e-06, "loss": 0.0008, "step": 355000 }, { "epoch": 41.882963662104764, "eval_cer": 0.08347676419965576, "eval_loss": 0.00013757171109318733, "eval_runtime": 2.0844, "eval_samples_per_second": 47.976, "eval_steps_per_second": 1.919, "eval_wer": 0.26, "step": 355000 }, { "epoch": 41.88886267107126, "grad_norm": 0.0010726008331403136, "learning_rate": 6.383371101063929e-06, "loss": 0.0006, "step": 355050 }, { "epoch": 41.89476168003775, "grad_norm": 0.0017097811214625835, "learning_rate": 6.3742920080059164e-06, "loss": 0.0007, "step": 355100 }, { "epoch": 41.90066068900425, "grad_norm": 0.18403509259223938, "learning_rate": 6.365218936553124e-06, "loss": 0.001, "step": 355150 }, { "epoch": 41.90655969797074, "grad_norm": 0.0889328271150589, "learning_rate": 6.356151887957901e-06, "loss": 0.0008, "step": 355200 }, { "epoch": 41.91245870693724, "grad_norm": 0.11759788542985916, "learning_rate": 6.347090863471766e-06, "loss": 0.0006, "step": 355250 }, { "epoch": 41.91835771590373, "grad_norm": 0.14871817827224731, "learning_rate": 6.338035864345382e-06, "loss": 0.0008, "step": 355300 }, { "epoch": 41.92425672487022, "grad_norm": 0.007972517050802708, "learning_rate": 6.328986891828631e-06, "loss": 0.0009, "step": 355350 }, { "epoch": 41.93015573383671, "grad_norm": 0.0059228516183793545, "learning_rate": 6.31994394717052e-06, "loss": 0.0006, "step": 355400 }, { "epoch": 41.93605474280321, "grad_norm": 0.006505509372800589, "learning_rate": 6.310907031619228e-06, "loss": 0.0007, "step": 355450 }, { "epoch": 41.9419537517697, "grad_norm": 0.005984521936625242, "learning_rate": 6.301876146422109e-06, "loss": 0.0007, "step": 355500 }, { "epoch": 41.9478527607362, "grad_norm": 0.026963844895362854, "learning_rate": 6.292851292825702e-06, "loss": 0.0005, "step": 355550 }, { "epoch": 41.95375176970269, "grad_norm": 0.014307653531432152, "learning_rate": 6.283832472075685e-06, "loss": 0.0009, "step": 355600 }, { "epoch": 41.959650778669186, "grad_norm": 0.0033476860262453556, "learning_rate": 6.274819685416916e-06, "loss": 0.0006, "step": 355650 }, { "epoch": 41.96554978763568, "grad_norm": 0.20679974555969238, "learning_rate": 6.265812934093418e-06, "loss": 0.0008, "step": 355700 }, { "epoch": 41.97144879660217, "grad_norm": 0.003203749656677246, "learning_rate": 6.25681221934839e-06, "loss": 0.0013, "step": 355750 }, { "epoch": 41.97734780556866, "grad_norm": 0.0013350594090297818, "learning_rate": 6.247817542424178e-06, "loss": 0.001, "step": 355800 }, { "epoch": 41.98324681453516, "grad_norm": 0.0020646289922297, "learning_rate": 6.238828904562316e-06, "loss": 0.0006, "step": 355850 }, { "epoch": 41.98914582350165, "grad_norm": 0.00820358470082283, "learning_rate": 6.229846307003495e-06, "loss": 0.0004, "step": 355900 }, { "epoch": 41.995044832468146, "grad_norm": 0.16177724301815033, "learning_rate": 6.220869750987568e-06, "loss": 0.0005, "step": 355950 }, { "epoch": 42.00094384143464, "grad_norm": 0.1288089156150818, "learning_rate": 6.21189923775356e-06, "loss": 0.0007, "step": 356000 }, { "epoch": 42.00094384143464, "eval_cer": 0.08347676419965576, "eval_loss": 0.0001126666393247433, "eval_runtime": 2.1004, "eval_samples_per_second": 47.61, "eval_steps_per_second": 1.904, "eval_wer": 0.26, "step": 356000 }, { "epoch": 42.006842850401135, "grad_norm": 0.0861755833029747, "learning_rate": 6.2029347685396585e-06, "loss": 0.0004, "step": 356050 }, { "epoch": 42.01274185936763, "grad_norm": 0.0002759938652161509, "learning_rate": 6.193976344583214e-06, "loss": 0.001, "step": 356100 }, { "epoch": 42.01864086833412, "grad_norm": 0.011736605316400528, "learning_rate": 6.185023967120762e-06, "loss": 0.0013, "step": 356150 }, { "epoch": 42.02453987730061, "grad_norm": 0.02971830777823925, "learning_rate": 6.1760776373879836e-06, "loss": 0.0005, "step": 356200 }, { "epoch": 42.030438886267106, "grad_norm": 0.006069419905543327, "learning_rate": 6.167137356619723e-06, "loss": 0.0006, "step": 356250 }, { "epoch": 42.0363378952336, "grad_norm": 0.041457049548625946, "learning_rate": 6.158203126050005e-06, "loss": 0.0008, "step": 356300 }, { "epoch": 42.042236904200095, "grad_norm": 0.0012610642006620765, "learning_rate": 6.149274946912004e-06, "loss": 0.0006, "step": 356350 }, { "epoch": 42.04813591316659, "grad_norm": 0.002452430548146367, "learning_rate": 6.140352820438067e-06, "loss": 0.0011, "step": 356400 }, { "epoch": 42.054034922133084, "grad_norm": 0.0017716914881020784, "learning_rate": 6.131436747859709e-06, "loss": 0.0007, "step": 356450 }, { "epoch": 42.05993393109958, "grad_norm": 0.00013154212501831353, "learning_rate": 6.122526730407602e-06, "loss": 0.0005, "step": 356500 }, { "epoch": 42.065832940066066, "grad_norm": 0.09932184964418411, "learning_rate": 6.113622769311589e-06, "loss": 0.0004, "step": 356550 }, { "epoch": 42.07173194903256, "grad_norm": 0.03032097779214382, "learning_rate": 6.104724865800665e-06, "loss": 0.0008, "step": 356600 }, { "epoch": 42.077630957999055, "grad_norm": 0.01459240261465311, "learning_rate": 6.095833021103004e-06, "loss": 0.0005, "step": 356650 }, { "epoch": 42.08352996696555, "grad_norm": 0.0023799468763172626, "learning_rate": 6.086947236445928e-06, "loss": 0.0007, "step": 356700 }, { "epoch": 42.089428975932044, "grad_norm": 0.0023096916265785694, "learning_rate": 6.078067513055946e-06, "loss": 0.0008, "step": 356750 }, { "epoch": 42.09532798489854, "grad_norm": 0.09687239676713943, "learning_rate": 6.069193852158711e-06, "loss": 0.001, "step": 356800 }, { "epoch": 42.10122699386503, "grad_norm": 0.10998747497797012, "learning_rate": 6.060326254979037e-06, "loss": 0.0009, "step": 356850 }, { "epoch": 42.10712600283152, "grad_norm": 0.0004130793095100671, "learning_rate": 6.0514647227409126e-06, "loss": 0.0007, "step": 356900 }, { "epoch": 42.113025011798015, "grad_norm": 0.0230236928910017, "learning_rate": 6.042609256667476e-06, "loss": 0.0005, "step": 356950 }, { "epoch": 42.11892402076451, "grad_norm": 0.024055741727352142, "learning_rate": 6.033759857981058e-06, "loss": 0.0008, "step": 357000 }, { "epoch": 42.11892402076451, "eval_cer": 0.08347676419965576, "eval_loss": 0.00010053720325231552, "eval_runtime": 2.1112, "eval_samples_per_second": 47.367, "eval_steps_per_second": 1.895, "eval_wer": 0.26, "step": 357000 }, { "epoch": 42.124823029731004, "grad_norm": 0.0015384727157652378, "learning_rate": 6.0249165279031125e-06, "loss": 0.0006, "step": 357050 }, { "epoch": 42.1307220386975, "grad_norm": 0.22152434289455414, "learning_rate": 6.016079267654284e-06, "loss": 0.0007, "step": 357100 }, { "epoch": 42.13662104766399, "grad_norm": 0.026582298800349236, "learning_rate": 6.007248078454364e-06, "loss": 0.0008, "step": 357150 }, { "epoch": 42.14252005663049, "grad_norm": 0.05805143341422081, "learning_rate": 5.99842296152231e-06, "loss": 0.0006, "step": 357200 }, { "epoch": 42.14841906559698, "grad_norm": 0.0003207428671885282, "learning_rate": 5.989603918076242e-06, "loss": 0.0004, "step": 357250 }, { "epoch": 42.15431807456347, "grad_norm": 0.08255381137132645, "learning_rate": 5.980790949333448e-06, "loss": 0.0004, "step": 357300 }, { "epoch": 42.160217083529965, "grad_norm": 0.0007432557758875191, "learning_rate": 5.971984056510366e-06, "loss": 0.001, "step": 357350 }, { "epoch": 42.16611609249646, "grad_norm": 0.1502711921930313, "learning_rate": 5.963183240822606e-06, "loss": 0.0012, "step": 357400 }, { "epoch": 42.172015101462954, "grad_norm": 0.3338984549045563, "learning_rate": 5.95438850348492e-06, "loss": 0.0011, "step": 357450 }, { "epoch": 42.17791411042945, "grad_norm": 0.013228045776486397, "learning_rate": 5.945599845711258e-06, "loss": 0.0008, "step": 357500 }, { "epoch": 42.18381311939594, "grad_norm": 0.13961003720760345, "learning_rate": 5.936817268714695e-06, "loss": 0.0007, "step": 357550 }, { "epoch": 42.18971212836244, "grad_norm": 0.00018984002235811204, "learning_rate": 5.928040773707483e-06, "loss": 0.0005, "step": 357600 }, { "epoch": 42.19561113732893, "grad_norm": 0.0056249964982271194, "learning_rate": 5.919270361901025e-06, "loss": 0.0004, "step": 357650 }, { "epoch": 42.20151014629542, "grad_norm": 0.005267569795250893, "learning_rate": 5.910506034505902e-06, "loss": 0.0006, "step": 357700 }, { "epoch": 42.207409155261914, "grad_norm": 0.05472957342863083, "learning_rate": 5.901747792731832e-06, "loss": 0.0006, "step": 357750 }, { "epoch": 42.21330816422841, "grad_norm": 0.001422689063474536, "learning_rate": 5.892995637787713e-06, "loss": 0.0005, "step": 357800 }, { "epoch": 42.2192071731949, "grad_norm": 0.001616882043890655, "learning_rate": 5.884249570881595e-06, "loss": 0.0009, "step": 357850 }, { "epoch": 42.2251061821614, "grad_norm": 0.018962522968649864, "learning_rate": 5.8755095932206795e-06, "loss": 0.0007, "step": 357900 }, { "epoch": 42.23100519112789, "grad_norm": 0.046085093170404434, "learning_rate": 5.86677570601134e-06, "loss": 0.0002, "step": 357950 }, { "epoch": 42.236904200094386, "grad_norm": 0.0007042813231237233, "learning_rate": 5.858047910459108e-06, "loss": 0.0005, "step": 358000 }, { "epoch": 42.236904200094386, "eval_cer": 0.08347676419965576, "eval_loss": 0.00011562215513549745, "eval_runtime": 2.0644, "eval_samples_per_second": 48.44, "eval_steps_per_second": 1.938, "eval_wer": 0.26, "step": 358000 }, { "epoch": 42.24280320906088, "grad_norm": 0.01589987426996231, "learning_rate": 5.849326207768657e-06, "loss": 0.001, "step": 358050 }, { "epoch": 42.24870221802737, "grad_norm": 0.1023288294672966, "learning_rate": 5.840610599143853e-06, "loss": 0.0006, "step": 358100 }, { "epoch": 42.25460122699386, "grad_norm": 0.0003444976464379579, "learning_rate": 5.8319010857876945e-06, "loss": 0.0006, "step": 358150 }, { "epoch": 42.26050023596036, "grad_norm": 0.008526530116796494, "learning_rate": 5.823197668902342e-06, "loss": 0.0006, "step": 358200 }, { "epoch": 42.26639924492685, "grad_norm": 0.0016974257305264473, "learning_rate": 5.814500349689117e-06, "loss": 0.0005, "step": 358250 }, { "epoch": 42.27229825389335, "grad_norm": 0.3070315420627594, "learning_rate": 5.8058091293485e-06, "loss": 0.0007, "step": 358300 }, { "epoch": 42.27819726285984, "grad_norm": 0.001570550724864006, "learning_rate": 5.7971240090801345e-06, "loss": 0.0003, "step": 358350 }, { "epoch": 42.284096271826336, "grad_norm": 0.000132379776914604, "learning_rate": 5.78844499008282e-06, "loss": 0.0006, "step": 358400 }, { "epoch": 42.28999528079283, "grad_norm": 0.027816705405712128, "learning_rate": 5.779772073554502e-06, "loss": 0.0008, "step": 358450 }, { "epoch": 42.29589428975932, "grad_norm": 0.0024327028077095747, "learning_rate": 5.771105260692294e-06, "loss": 0.0004, "step": 358500 }, { "epoch": 42.30179329872581, "grad_norm": 0.0577823631465435, "learning_rate": 5.762444552692476e-06, "loss": 0.0008, "step": 358550 }, { "epoch": 42.30769230769231, "grad_norm": 0.029231639578938484, "learning_rate": 5.753789950750454e-06, "loss": 0.0006, "step": 358600 }, { "epoch": 42.3135913166588, "grad_norm": 0.009563441388309002, "learning_rate": 5.745141456060843e-06, "loss": 0.0005, "step": 358650 }, { "epoch": 42.319490325625296, "grad_norm": 0.008473214693367481, "learning_rate": 5.736499069817364e-06, "loss": 0.0005, "step": 358700 }, { "epoch": 42.32538933459179, "grad_norm": 0.0008540074340999126, "learning_rate": 5.727862793212924e-06, "loss": 0.0006, "step": 358750 }, { "epoch": 42.331288343558285, "grad_norm": 0.003339333925396204, "learning_rate": 5.719232627439558e-06, "loss": 0.0011, "step": 358800 }, { "epoch": 42.33718735252478, "grad_norm": 0.06561356782913208, "learning_rate": 5.710608573688509e-06, "loss": 0.0009, "step": 358850 }, { "epoch": 42.34308636149127, "grad_norm": 0.0015656081959605217, "learning_rate": 5.701990633150128e-06, "loss": 0.0008, "step": 358900 }, { "epoch": 42.34898537045776, "grad_norm": 0.042087242007255554, "learning_rate": 5.693378807013944e-06, "loss": 0.0008, "step": 358950 }, { "epoch": 42.354884379424256, "grad_norm": 2.781903458526358e-05, "learning_rate": 5.684773096468632e-06, "loss": 0.0005, "step": 359000 }, { "epoch": 42.354884379424256, "eval_cer": 0.08347676419965576, "eval_loss": 0.00012730751768685877, "eval_runtime": 2.0518, "eval_samples_per_second": 48.739, "eval_steps_per_second": 1.95, "eval_wer": 0.26, "step": 359000 }, { "epoch": 42.36078338839075, "grad_norm": 0.0006819998379796743, "learning_rate": 5.676173502702032e-06, "loss": 0.0006, "step": 359050 }, { "epoch": 42.366682397357245, "grad_norm": 0.19033296406269073, "learning_rate": 5.667580026901137e-06, "loss": 0.0004, "step": 359100 }, { "epoch": 42.37258140632374, "grad_norm": 0.03152892738580704, "learning_rate": 5.658992670252089e-06, "loss": 0.0008, "step": 359150 }, { "epoch": 42.378480415290234, "grad_norm": 0.00019063804938923568, "learning_rate": 5.650411433940189e-06, "loss": 0.001, "step": 359200 }, { "epoch": 42.38437942425672, "grad_norm": 0.00441023288294673, "learning_rate": 5.641836319149907e-06, "loss": 0.0008, "step": 359250 }, { "epoch": 42.390278433223216, "grad_norm": 0.10214363783597946, "learning_rate": 5.633267327064845e-06, "loss": 0.0007, "step": 359300 }, { "epoch": 42.39617744218971, "grad_norm": 0.0025906653609126806, "learning_rate": 5.624704458867775e-06, "loss": 0.0006, "step": 359350 }, { "epoch": 42.402076451156205, "grad_norm": 0.002141450298950076, "learning_rate": 5.616147715740611e-06, "loss": 0.0002, "step": 359400 }, { "epoch": 42.4079754601227, "grad_norm": 0.00017779225890990347, "learning_rate": 5.607597098864442e-06, "loss": 0.0003, "step": 359450 }, { "epoch": 42.413874469089194, "grad_norm": 0.003841706784442067, "learning_rate": 5.599052609419503e-06, "loss": 0.0006, "step": 359500 }, { "epoch": 42.41977347805569, "grad_norm": 0.00015428320330101997, "learning_rate": 5.590514248585171e-06, "loss": 0.001, "step": 359550 }, { "epoch": 42.42567248702218, "grad_norm": 0.09418334811925888, "learning_rate": 5.581982017539988e-06, "loss": 0.0007, "step": 359600 }, { "epoch": 42.43157149598867, "grad_norm": 0.13196565210819244, "learning_rate": 5.573455917461645e-06, "loss": 0.0006, "step": 359650 }, { "epoch": 42.437470504955165, "grad_norm": 0.34113994240760803, "learning_rate": 5.564935949526995e-06, "loss": 0.0006, "step": 359700 }, { "epoch": 42.44336951392166, "grad_norm": 0.000596985628362745, "learning_rate": 5.556422114912035e-06, "loss": 0.0008, "step": 359750 }, { "epoch": 42.449268522888154, "grad_norm": 0.0003904924087692052, "learning_rate": 5.5479144147919214e-06, "loss": 0.0006, "step": 359800 }, { "epoch": 42.45516753185465, "grad_norm": 0.00033851427724584937, "learning_rate": 5.539412850340958e-06, "loss": 0.0007, "step": 359850 }, { "epoch": 42.46106654082114, "grad_norm": 0.0046649472787976265, "learning_rate": 5.530917422732612e-06, "loss": 0.0008, "step": 359900 }, { "epoch": 42.46696554978764, "grad_norm": 0.025359585881233215, "learning_rate": 5.5224281331394955e-06, "loss": 0.0007, "step": 359950 }, { "epoch": 42.47286455875413, "grad_norm": 0.07757598161697388, "learning_rate": 5.51394498273336e-06, "loss": 0.0008, "step": 360000 }, { "epoch": 42.47286455875413, "eval_cer": 0.08347676419965576, "eval_loss": 9.460789442528039e-05, "eval_runtime": 2.0465, "eval_samples_per_second": 48.863, "eval_steps_per_second": 1.955, "eval_wer": 0.26, "step": 360000 }, { "epoch": 42.47876356772062, "grad_norm": 0.0018043607706204057, "learning_rate": 5.505467972685152e-06, "loss": 0.0006, "step": 360050 }, { "epoch": 42.484662576687114, "grad_norm": 0.0003246448468416929, "learning_rate": 5.496997104164931e-06, "loss": 0.0011, "step": 360100 }, { "epoch": 42.49056158565361, "grad_norm": 0.006570787634700537, "learning_rate": 5.4885323783419205e-06, "loss": 0.0006, "step": 360150 }, { "epoch": 42.4964605946201, "grad_norm": 0.2833688259124756, "learning_rate": 5.480073796384494e-06, "loss": 0.0006, "step": 360200 }, { "epoch": 42.5023596035866, "grad_norm": 0.0034370911307632923, "learning_rate": 5.471621359460177e-06, "loss": 0.0009, "step": 360250 }, { "epoch": 42.50825861255309, "grad_norm": 0.000473577092634514, "learning_rate": 5.463175068735659e-06, "loss": 0.0005, "step": 360300 }, { "epoch": 42.51415762151959, "grad_norm": 0.0012337856460362673, "learning_rate": 5.45473492537677e-06, "loss": 0.0005, "step": 360350 }, { "epoch": 42.52005663048608, "grad_norm": 0.021688729524612427, "learning_rate": 5.446300930548492e-06, "loss": 0.0016, "step": 360400 }, { "epoch": 42.52595563945257, "grad_norm": 0.014723514206707478, "learning_rate": 5.437873085414952e-06, "loss": 0.0011, "step": 360450 }, { "epoch": 42.53185464841906, "grad_norm": 0.0029089918825775385, "learning_rate": 5.429451391139445e-06, "loss": 0.0007, "step": 360500 }, { "epoch": 42.53775365738556, "grad_norm": 0.1746932417154312, "learning_rate": 5.421035848884404e-06, "loss": 0.001, "step": 360550 }, { "epoch": 42.54365266635205, "grad_norm": 0.0014847596175968647, "learning_rate": 5.412626459811415e-06, "loss": 0.0006, "step": 360600 }, { "epoch": 42.54955167531855, "grad_norm": 0.037100255489349365, "learning_rate": 5.4042232250812175e-06, "loss": 0.0013, "step": 360650 }, { "epoch": 42.55545068428504, "grad_norm": 0.003343473421409726, "learning_rate": 5.3958261458536995e-06, "loss": 0.0008, "step": 360700 }, { "epoch": 42.561349693251536, "grad_norm": 0.326979398727417, "learning_rate": 5.38743522328789e-06, "loss": 0.0009, "step": 360750 }, { "epoch": 42.56724870221803, "grad_norm": 0.04419347271323204, "learning_rate": 5.379050458541996e-06, "loss": 0.0006, "step": 360800 }, { "epoch": 42.57314771118452, "grad_norm": 0.0026144981384277344, "learning_rate": 5.370671852773351e-06, "loss": 0.0003, "step": 360850 }, { "epoch": 42.57904672015101, "grad_norm": 0.03918226808309555, "learning_rate": 5.362299407138444e-06, "loss": 0.0008, "step": 360900 }, { "epoch": 42.58494572911751, "grad_norm": 0.13008220493793488, "learning_rate": 5.3539331227929145e-06, "loss": 0.0006, "step": 360950 }, { "epoch": 42.590844738084, "grad_norm": 0.0016228220192715526, "learning_rate": 5.345573000891541e-06, "loss": 0.0006, "step": 361000 }, { "epoch": 42.590844738084, "eval_cer": 0.08347676419965576, "eval_loss": 9.575896547175944e-05, "eval_runtime": 2.0704, "eval_samples_per_second": 48.3, "eval_steps_per_second": 1.932, "eval_wer": 0.26, "step": 361000 }, { "epoch": 42.596743747050496, "grad_norm": 0.00034972329740412533, "learning_rate": 5.337219042588276e-06, "loss": 0.0009, "step": 361050 }, { "epoch": 42.60264275601699, "grad_norm": 0.037531349807977676, "learning_rate": 5.328871249036194e-06, "loss": 0.0006, "step": 361100 }, { "epoch": 42.608541764983485, "grad_norm": 0.0009596789604984224, "learning_rate": 5.320529621387543e-06, "loss": 0.0007, "step": 361150 }, { "epoch": 42.61444077394998, "grad_norm": 0.022505611181259155, "learning_rate": 5.312194160793693e-06, "loss": 0.0011, "step": 361200 }, { "epoch": 42.62033978291647, "grad_norm": 0.0010504414094612002, "learning_rate": 5.303864868405195e-06, "loss": 0.0006, "step": 361250 }, { "epoch": 42.62623879188296, "grad_norm": 0.003875792259350419, "learning_rate": 5.295541745371718e-06, "loss": 0.0007, "step": 361300 }, { "epoch": 42.632137800849456, "grad_norm": 0.0046899402514100075, "learning_rate": 5.287224792842088e-06, "loss": 0.0005, "step": 361350 }, { "epoch": 42.63803680981595, "grad_norm": 0.0007563629187643528, "learning_rate": 5.2789140119643035e-06, "loss": 0.0007, "step": 361400 }, { "epoch": 42.643935818782445, "grad_norm": 0.00020657585992012173, "learning_rate": 5.270609403885484e-06, "loss": 0.0011, "step": 361450 }, { "epoch": 42.64983482774894, "grad_norm": 0.07162259519100189, "learning_rate": 5.262310969751904e-06, "loss": 0.0009, "step": 361500 }, { "epoch": 42.655733836715434, "grad_norm": 0.1302977353334427, "learning_rate": 5.2540187107089855e-06, "loss": 0.0008, "step": 361550 }, { "epoch": 42.66163284568193, "grad_norm": 0.031395815312862396, "learning_rate": 5.245732627901301e-06, "loss": 0.0007, "step": 361600 }, { "epoch": 42.667531854648416, "grad_norm": 0.010132408700883389, "learning_rate": 5.2374527224725636e-06, "loss": 0.0004, "step": 361650 }, { "epoch": 42.67343086361491, "grad_norm": 0.004577077459543943, "learning_rate": 5.229178995565643e-06, "loss": 0.0007, "step": 361700 }, { "epoch": 42.679329872581405, "grad_norm": 0.07722111791372299, "learning_rate": 5.220911448322552e-06, "loss": 0.0007, "step": 361750 }, { "epoch": 42.6852288815479, "grad_norm": 0.20194481313228607, "learning_rate": 5.212650081884451e-06, "loss": 0.0006, "step": 361800 }, { "epoch": 42.691127890514394, "grad_norm": 0.0007905585807748139, "learning_rate": 5.204394897391645e-06, "loss": 0.0005, "step": 361850 }, { "epoch": 42.69702689948089, "grad_norm": 0.017640121281147003, "learning_rate": 5.196145895983578e-06, "loss": 0.0007, "step": 361900 }, { "epoch": 42.702925908447384, "grad_norm": 0.0257077906280756, "learning_rate": 5.187903078798867e-06, "loss": 0.0006, "step": 361950 }, { "epoch": 42.70882491741387, "grad_norm": 0.0016497302567586303, "learning_rate": 5.179666446975256e-06, "loss": 0.0009, "step": 362000 }, { "epoch": 42.70882491741387, "eval_cer": 0.08347676419965576, "eval_loss": 7.581377576570958e-05, "eval_runtime": 2.0445, "eval_samples_per_second": 48.913, "eval_steps_per_second": 1.957, "eval_wer": 0.26, "step": 362000 }, { "epoch": 42.714723926380366, "grad_norm": 0.004839599598199129, "learning_rate": 5.1714360016496325e-06, "loss": 0.0008, "step": 362050 }, { "epoch": 42.72062293534686, "grad_norm": 0.009075704962015152, "learning_rate": 5.163211743958024e-06, "loss": 0.0006, "step": 362100 }, { "epoch": 42.726521944313355, "grad_norm": 0.12963831424713135, "learning_rate": 5.154993675035641e-06, "loss": 0.001, "step": 362150 }, { "epoch": 42.73242095327985, "grad_norm": 0.03986460715532303, "learning_rate": 5.1467817960167975e-06, "loss": 0.0005, "step": 362200 }, { "epoch": 42.738319962246344, "grad_norm": 0.01779225841164589, "learning_rate": 5.13857610803497e-06, "loss": 0.0003, "step": 362250 }, { "epoch": 42.74421897121284, "grad_norm": 0.11989954113960266, "learning_rate": 5.130376612222782e-06, "loss": 0.001, "step": 362300 }, { "epoch": 42.75011798017933, "grad_norm": 0.014851143583655357, "learning_rate": 5.122183309712003e-06, "loss": 0.0009, "step": 362350 }, { "epoch": 42.75601698914582, "grad_norm": 0.0933140218257904, "learning_rate": 5.113996201633536e-06, "loss": 0.0005, "step": 362400 }, { "epoch": 42.761915998112315, "grad_norm": 0.186708003282547, "learning_rate": 5.105815289117449e-06, "loss": 0.0009, "step": 362450 }, { "epoch": 42.76781500707881, "grad_norm": 0.00036935595562681556, "learning_rate": 5.0976405732929385e-06, "loss": 0.0003, "step": 362500 }, { "epoch": 42.773714016045304, "grad_norm": 0.00022586819250136614, "learning_rate": 5.089472055288352e-06, "loss": 0.001, "step": 362550 }, { "epoch": 42.7796130250118, "grad_norm": 9.910711378324777e-05, "learning_rate": 5.081309736231177e-06, "loss": 0.0005, "step": 362600 }, { "epoch": 42.78551203397829, "grad_norm": 0.04791362211108208, "learning_rate": 5.07315361724805e-06, "loss": 0.0003, "step": 362650 }, { "epoch": 42.79141104294479, "grad_norm": 0.09789086878299713, "learning_rate": 5.065003699464743e-06, "loss": 0.0007, "step": 362700 }, { "epoch": 42.79731005191128, "grad_norm": 0.21433937549591064, "learning_rate": 5.0568599840062015e-06, "loss": 0.0007, "step": 362750 }, { "epoch": 42.80320906087777, "grad_norm": 0.06681317836046219, "learning_rate": 5.048722471996475e-06, "loss": 0.0009, "step": 362800 }, { "epoch": 42.809108069844264, "grad_norm": 0.0291910357773304, "learning_rate": 5.040591164558784e-06, "loss": 0.0005, "step": 362850 }, { "epoch": 42.81500707881076, "grad_norm": 0.021031051874160767, "learning_rate": 5.032466062815477e-06, "loss": 0.0008, "step": 362900 }, { "epoch": 42.82090608777725, "grad_norm": 0.020715132355690002, "learning_rate": 5.024347167888055e-06, "loss": 0.0008, "step": 362950 }, { "epoch": 42.82680509674375, "grad_norm": 0.0636184886097908, "learning_rate": 5.016234480897159e-06, "loss": 0.0008, "step": 363000 }, { "epoch": 42.82680509674375, "eval_cer": 0.08347676419965576, "eval_loss": 7.327509229071438e-05, "eval_runtime": 2.0336, "eval_samples_per_second": 49.175, "eval_steps_per_second": 1.967, "eval_wer": 0.26, "step": 363000 }, { "epoch": 42.83270410571024, "grad_norm": 0.02632753551006317, "learning_rate": 5.008128002962575e-06, "loss": 0.0008, "step": 363050 }, { "epoch": 42.83860311467674, "grad_norm": 0.0016935147577896714, "learning_rate": 5.000027735203222e-06, "loss": 0.0005, "step": 363100 }, { "epoch": 42.84450212364323, "grad_norm": 0.0006764804711565375, "learning_rate": 4.991933678737187e-06, "loss": 0.0006, "step": 363150 }, { "epoch": 42.85040113260972, "grad_norm": 0.03379947319626808, "learning_rate": 4.983845834681666e-06, "loss": 0.0008, "step": 363200 }, { "epoch": 42.85630014157621, "grad_norm": 0.0560733824968338, "learning_rate": 4.9757642041530275e-06, "loss": 0.0007, "step": 363250 }, { "epoch": 42.86219915054271, "grad_norm": 0.04013439640402794, "learning_rate": 4.9676887882667615e-06, "loss": 0.0008, "step": 363300 }, { "epoch": 42.8680981595092, "grad_norm": 0.002000299980863929, "learning_rate": 4.959619588137498e-06, "loss": 0.0004, "step": 363350 }, { "epoch": 42.8739971684757, "grad_norm": 0.03297140821814537, "learning_rate": 4.951556604879048e-06, "loss": 0.0004, "step": 363400 }, { "epoch": 42.87989617744219, "grad_norm": 0.21924565732479095, "learning_rate": 4.943499839604315e-06, "loss": 0.0004, "step": 363450 }, { "epoch": 42.885795186408686, "grad_norm": 0.0006898167775943875, "learning_rate": 4.935449293425371e-06, "loss": 0.0008, "step": 363500 }, { "epoch": 42.89169419537518, "grad_norm": 0.00047177085070870817, "learning_rate": 4.927404967453414e-06, "loss": 0.0007, "step": 363550 }, { "epoch": 42.89759320434167, "grad_norm": 0.0025894518475979567, "learning_rate": 4.919366862798807e-06, "loss": 0.0004, "step": 363600 }, { "epoch": 42.90349221330816, "grad_norm": 0.012542558833956718, "learning_rate": 4.911334980571036e-06, "loss": 0.0006, "step": 363650 }, { "epoch": 42.90939122227466, "grad_norm": 0.00434571597725153, "learning_rate": 4.903309321878729e-06, "loss": 0.0006, "step": 363700 }, { "epoch": 42.91529023124115, "grad_norm": 0.0037993472069501877, "learning_rate": 4.895289887829663e-06, "loss": 0.0008, "step": 363750 }, { "epoch": 42.921189240207646, "grad_norm": 0.12748533487319946, "learning_rate": 4.8872766795307445e-06, "loss": 0.0009, "step": 363800 }, { "epoch": 42.92708824917414, "grad_norm": 0.012844588607549667, "learning_rate": 4.87926969808803e-06, "loss": 0.0007, "step": 363850 }, { "epoch": 42.932987258140635, "grad_norm": 0.10049664229154587, "learning_rate": 4.8712689446067105e-06, "loss": 0.0006, "step": 363900 }, { "epoch": 42.93888626710713, "grad_norm": 0.0014984968584030867, "learning_rate": 4.8632744201911275e-06, "loss": 0.0004, "step": 363950 }, { "epoch": 42.94478527607362, "grad_norm": 4.2402392864460126e-05, "learning_rate": 4.855286125944752e-06, "loss": 0.0006, "step": 364000 }, { "epoch": 42.94478527607362, "eval_cer": 0.08347676419965576, "eval_loss": 8.948002505348995e-05, "eval_runtime": 2.0804, "eval_samples_per_second": 48.067, "eval_steps_per_second": 1.923, "eval_wer": 0.26, "step": 364000 }, { "epoch": 42.95068428504011, "grad_norm": 0.05031605437397957, "learning_rate": 4.847304062970193e-06, "loss": 0.0005, "step": 364050 }, { "epoch": 42.956583294006606, "grad_norm": 0.05379495397210121, "learning_rate": 4.839328232369217e-06, "loss": 0.0008, "step": 364100 }, { "epoch": 42.9624823029731, "grad_norm": 0.00607313634827733, "learning_rate": 4.8313586352427164e-06, "loss": 0.001, "step": 364150 }, { "epoch": 42.968381311939595, "grad_norm": 0.0016821997705847025, "learning_rate": 4.823395272690722e-06, "loss": 0.0006, "step": 364200 }, { "epoch": 42.97428032090609, "grad_norm": 0.013562419451773167, "learning_rate": 4.815438145812401e-06, "loss": 0.0006, "step": 364250 }, { "epoch": 42.980179329872584, "grad_norm": 0.0008957590907812119, "learning_rate": 4.8074872557060775e-06, "loss": 0.0006, "step": 364300 }, { "epoch": 42.98607833883908, "grad_norm": 0.008703524246811867, "learning_rate": 4.799542603469198e-06, "loss": 0.0008, "step": 364350 }, { "epoch": 42.991977347805566, "grad_norm": 0.0028339517302811146, "learning_rate": 4.791604190198357e-06, "loss": 0.0006, "step": 364400 }, { "epoch": 42.99787635677206, "grad_norm": 0.0027694515883922577, "learning_rate": 4.783672016989277e-06, "loss": 0.0007, "step": 364450 }, { "epoch": 43.003775365738555, "grad_norm": 0.005368968937546015, "learning_rate": 4.775746084936833e-06, "loss": 0.0008, "step": 364500 }, { "epoch": 43.00967437470505, "grad_norm": 0.002591226017102599, "learning_rate": 4.767826395135034e-06, "loss": 0.0011, "step": 364550 }, { "epoch": 43.015573383671544, "grad_norm": 0.0009027054766193032, "learning_rate": 4.759912948677015e-06, "loss": 0.0005, "step": 364600 }, { "epoch": 43.02147239263804, "grad_norm": 9.706212586024776e-05, "learning_rate": 4.752005746655064e-06, "loss": 0.0004, "step": 364650 }, { "epoch": 43.02737140160453, "grad_norm": 0.026958171278238297, "learning_rate": 4.744104790160608e-06, "loss": 0.0007, "step": 364700 }, { "epoch": 43.03327041057102, "grad_norm": 0.21342431008815765, "learning_rate": 4.736210080284204e-06, "loss": 0.0006, "step": 364750 }, { "epoch": 43.039169419537515, "grad_norm": 0.02766277827322483, "learning_rate": 4.728321618115555e-06, "loss": 0.0006, "step": 364800 }, { "epoch": 43.04506842850401, "grad_norm": 0.04070170968770981, "learning_rate": 4.720439404743487e-06, "loss": 0.0005, "step": 364850 }, { "epoch": 43.050967437470504, "grad_norm": 0.046365510672330856, "learning_rate": 4.712563441255974e-06, "loss": 0.0003, "step": 364900 }, { "epoch": 43.056866446437, "grad_norm": 0.005567751359194517, "learning_rate": 4.704693728740134e-06, "loss": 0.0008, "step": 364950 }, { "epoch": 43.06276545540349, "grad_norm": 0.004497955087572336, "learning_rate": 4.696830268282204e-06, "loss": 0.0004, "step": 365000 }, { "epoch": 43.06276545540349, "eval_cer": 0.08347676419965576, "eval_loss": 8.550368511350825e-05, "eval_runtime": 2.0595, "eval_samples_per_second": 48.556, "eval_steps_per_second": 1.942, "eval_wer": 0.26, "step": 365000 }, { "epoch": 43.06866446436999, "grad_norm": 0.0006970983231440187, "learning_rate": 4.688973060967572e-06, "loss": 0.0004, "step": 365050 }, { "epoch": 43.07456347333648, "grad_norm": 0.17589125037193298, "learning_rate": 4.681122107880764e-06, "loss": 0.0005, "step": 365100 }, { "epoch": 43.08046248230297, "grad_norm": 0.012033062987029552, "learning_rate": 4.673277410105431e-06, "loss": 0.0006, "step": 365150 }, { "epoch": 43.086361491269464, "grad_norm": 0.0003560251207090914, "learning_rate": 4.6654389687243616e-06, "loss": 0.0014, "step": 365200 }, { "epoch": 43.09226050023596, "grad_norm": 0.0015239043859764934, "learning_rate": 4.657606784819501e-06, "loss": 0.0004, "step": 365250 }, { "epoch": 43.09815950920245, "grad_norm": 0.10314656794071198, "learning_rate": 4.649780859471914e-06, "loss": 0.0006, "step": 365300 }, { "epoch": 43.10405851816895, "grad_norm": 0.011581871658563614, "learning_rate": 4.641961193761801e-06, "loss": 0.0009, "step": 365350 }, { "epoch": 43.10995752713544, "grad_norm": 0.21328362822532654, "learning_rate": 4.634147788768489e-06, "loss": 0.0012, "step": 365400 }, { "epoch": 43.11585653610194, "grad_norm": 0.09540670365095139, "learning_rate": 4.626340645570476e-06, "loss": 0.0005, "step": 365450 }, { "epoch": 43.12175554506843, "grad_norm": 0.00029280074522830546, "learning_rate": 4.618539765245361e-06, "loss": 0.0005, "step": 365500 }, { "epoch": 43.12765455403492, "grad_norm": 0.011564034037292004, "learning_rate": 4.610745148869888e-06, "loss": 0.0006, "step": 365550 }, { "epoch": 43.13355356300141, "grad_norm": 0.0008336169412359595, "learning_rate": 4.6029567975199415e-06, "loss": 0.0008, "step": 365600 }, { "epoch": 43.13945257196791, "grad_norm": 0.02462979406118393, "learning_rate": 4.595174712270539e-06, "loss": 0.0008, "step": 365650 }, { "epoch": 43.1453515809344, "grad_norm": 0.006237931549549103, "learning_rate": 4.587398894195838e-06, "loss": 0.001, "step": 365700 }, { "epoch": 43.1512505899009, "grad_norm": 0.03806406259536743, "learning_rate": 4.579629344369113e-06, "loss": 0.0008, "step": 365750 }, { "epoch": 43.15714959886739, "grad_norm": 0.18666818737983704, "learning_rate": 4.571866063862795e-06, "loss": 0.0005, "step": 365800 }, { "epoch": 43.163048607833886, "grad_norm": 0.009748422540724277, "learning_rate": 4.564109053748439e-06, "loss": 0.0007, "step": 365850 }, { "epoch": 43.16894761680038, "grad_norm": 0.0021367024164646864, "learning_rate": 4.556358315096732e-06, "loss": 0.0006, "step": 365900 }, { "epoch": 43.17484662576687, "grad_norm": 0.004846243653446436, "learning_rate": 4.548613848977501e-06, "loss": 0.0007, "step": 365950 }, { "epoch": 43.18074563473336, "grad_norm": 0.0015236904146149755, "learning_rate": 4.540875656459703e-06, "loss": 0.0007, "step": 366000 }, { "epoch": 43.18074563473336, "eval_cer": 0.08347676419965576, "eval_loss": 9.016906551551074e-05, "eval_runtime": 2.0486, "eval_samples_per_second": 48.814, "eval_steps_per_second": 1.953, "eval_wer": 0.26, "step": 366000 }, { "epoch": 43.18664464369986, "grad_norm": 0.0006718502263538539, "learning_rate": 4.533143738611439e-06, "loss": 0.0008, "step": 366050 }, { "epoch": 43.19254365266635, "grad_norm": 0.015550358220934868, "learning_rate": 4.5254180964999395e-06, "loss": 0.0004, "step": 366100 }, { "epoch": 43.198442661632846, "grad_norm": 0.23154912889003754, "learning_rate": 4.517698731191555e-06, "loss": 0.0007, "step": 366150 }, { "epoch": 43.20434167059934, "grad_norm": 0.003616697620600462, "learning_rate": 4.5099856437517855e-06, "loss": 0.0007, "step": 366200 }, { "epoch": 43.210240679565835, "grad_norm": 0.022630935534834862, "learning_rate": 4.5022788352452606e-06, "loss": 0.0008, "step": 366250 }, { "epoch": 43.21613968853233, "grad_norm": 0.0009520413586869836, "learning_rate": 4.494578306735736e-06, "loss": 0.0004, "step": 366300 }, { "epoch": 43.22203869749882, "grad_norm": 0.007869434542953968, "learning_rate": 4.486884059286118e-06, "loss": 0.0009, "step": 366350 }, { "epoch": 43.22793770646531, "grad_norm": 0.00030082016019150615, "learning_rate": 4.479196093958421e-06, "loss": 0.0005, "step": 366400 }, { "epoch": 43.233836715431806, "grad_norm": 0.030535515397787094, "learning_rate": 4.47151441181381e-06, "loss": 0.0006, "step": 366450 }, { "epoch": 43.2397357243983, "grad_norm": 0.0656963586807251, "learning_rate": 4.463839013912585e-06, "loss": 0.0004, "step": 366500 }, { "epoch": 43.245634733364795, "grad_norm": 0.0021118037402629852, "learning_rate": 4.456169901314167e-06, "loss": 0.0004, "step": 366550 }, { "epoch": 43.25153374233129, "grad_norm": 0.0006059024599380791, "learning_rate": 4.448507075077118e-06, "loss": 0.0007, "step": 366600 }, { "epoch": 43.257432751297785, "grad_norm": 0.00032162884599529207, "learning_rate": 4.440850536259111e-06, "loss": 0.0009, "step": 366650 }, { "epoch": 43.26333176026428, "grad_norm": 0.0016488892724737525, "learning_rate": 4.433200285917e-06, "loss": 0.0005, "step": 366700 }, { "epoch": 43.26923076923077, "grad_norm": 0.13497750461101532, "learning_rate": 4.425556325106722e-06, "loss": 0.0008, "step": 366750 }, { "epoch": 43.27512977819726, "grad_norm": 0.015585221350193024, "learning_rate": 4.417918654883363e-06, "loss": 0.0007, "step": 366800 }, { "epoch": 43.281028787163756, "grad_norm": 0.0015904228202998638, "learning_rate": 4.41028727630114e-06, "loss": 0.0007, "step": 366850 }, { "epoch": 43.28692779613025, "grad_norm": 0.07631365209817886, "learning_rate": 4.402662190413414e-06, "loss": 0.0005, "step": 366900 }, { "epoch": 43.292826805096745, "grad_norm": 0.16436418890953064, "learning_rate": 4.395043398272664e-06, "loss": 0.001, "step": 366950 }, { "epoch": 43.29872581406324, "grad_norm": 0.0008904424030333757, "learning_rate": 4.3874309009305e-06, "loss": 0.0008, "step": 367000 }, { "epoch": 43.29872581406324, "eval_cer": 0.08347676419965576, "eval_loss": 8.558564150007442e-05, "eval_runtime": 2.0738, "eval_samples_per_second": 48.221, "eval_steps_per_second": 1.929, "eval_wer": 0.26, "step": 367000 }, { "epoch": 43.304624823029734, "grad_norm": 0.1597326397895813, "learning_rate": 4.379824699437663e-06, "loss": 0.0007, "step": 367050 }, { "epoch": 43.31052383199622, "grad_norm": 0.008640331216156483, "learning_rate": 4.372224794844032e-06, "loss": 0.0007, "step": 367100 }, { "epoch": 43.316422840962716, "grad_norm": 0.02819150499999523, "learning_rate": 4.364631188198609e-06, "loss": 0.0007, "step": 367150 }, { "epoch": 43.32232184992921, "grad_norm": 0.0354984775185585, "learning_rate": 4.357043880549539e-06, "loss": 0.0009, "step": 367200 }, { "epoch": 43.328220858895705, "grad_norm": 0.013653476722538471, "learning_rate": 4.349462872944082e-06, "loss": 0.0009, "step": 367250 }, { "epoch": 43.3341198678622, "grad_norm": 0.05269848555326462, "learning_rate": 4.341888166428637e-06, "loss": 0.001, "step": 367300 }, { "epoch": 43.340018876828694, "grad_norm": 0.00782681629061699, "learning_rate": 4.334319762048722e-06, "loss": 0.0006, "step": 367350 }, { "epoch": 43.34591788579519, "grad_norm": 0.0020241173915565014, "learning_rate": 4.326757660849012e-06, "loss": 0.0008, "step": 367400 }, { "epoch": 43.35181689476168, "grad_norm": 0.00028534233570098877, "learning_rate": 4.31920186387329e-06, "loss": 0.0007, "step": 367450 }, { "epoch": 43.35771590372817, "grad_norm": 0.0016773717943578959, "learning_rate": 4.311652372164471e-06, "loss": 0.0006, "step": 367500 }, { "epoch": 43.363614912694665, "grad_norm": 0.07394498586654663, "learning_rate": 4.304109186764604e-06, "loss": 0.0009, "step": 367550 }, { "epoch": 43.36951392166116, "grad_norm": 0.0001377730368403718, "learning_rate": 4.296572308714863e-06, "loss": 0.0003, "step": 367600 }, { "epoch": 43.375412930627654, "grad_norm": 0.012744656763970852, "learning_rate": 4.28904173905556e-06, "loss": 0.0006, "step": 367650 }, { "epoch": 43.38131193959415, "grad_norm": 0.005356610752642155, "learning_rate": 4.2815174788261245e-06, "loss": 0.0008, "step": 367700 }, { "epoch": 43.38721094856064, "grad_norm": 0.06646151095628738, "learning_rate": 4.273999529065126e-06, "loss": 0.0007, "step": 367750 }, { "epoch": 43.39310995752714, "grad_norm": 0.018971383571624756, "learning_rate": 4.266487890810256e-06, "loss": 0.0004, "step": 367800 }, { "epoch": 43.39900896649363, "grad_norm": 0.013556561432778835, "learning_rate": 4.258982565098341e-06, "loss": 0.0007, "step": 367850 }, { "epoch": 43.40490797546012, "grad_norm": 0.0008428796427324414, "learning_rate": 4.251483552965324e-06, "loss": 0.0007, "step": 367900 }, { "epoch": 43.410806984426614, "grad_norm": 0.012772956863045692, "learning_rate": 4.243990855446289e-06, "loss": 0.0016, "step": 367950 }, { "epoch": 43.41670599339311, "grad_norm": 0.002530799712985754, "learning_rate": 4.2365044735754365e-06, "loss": 0.0004, "step": 368000 }, { "epoch": 43.41670599339311, "eval_cer": 0.08347676419965576, "eval_loss": 8.107546454994008e-05, "eval_runtime": 2.1038, "eval_samples_per_second": 47.533, "eval_steps_per_second": 1.901, "eval_wer": 0.26, "step": 368000 }, { "epoch": 43.4226050023596, "grad_norm": 0.08615703880786896, "learning_rate": 4.229024408386123e-06, "loss": 0.0007, "step": 368050 }, { "epoch": 43.4285040113261, "grad_norm": 0.00027054152451455593, "learning_rate": 4.2215506609108e-06, "loss": 0.0008, "step": 368100 }, { "epoch": 43.43440302029259, "grad_norm": 0.014209160581231117, "learning_rate": 4.214083232181059e-06, "loss": 0.0007, "step": 368150 }, { "epoch": 43.44030202925909, "grad_norm": 0.007585580460727215, "learning_rate": 4.206622123227627e-06, "loss": 0.0005, "step": 368200 }, { "epoch": 43.44620103822558, "grad_norm": 0.022733839228749275, "learning_rate": 4.1991673350803455e-06, "loss": 0.0007, "step": 368250 }, { "epoch": 43.45210004719207, "grad_norm": 0.0038666727486997843, "learning_rate": 4.1917188687681906e-06, "loss": 0.0005, "step": 368300 }, { "epoch": 43.45799905615856, "grad_norm": 0.0006589122931472957, "learning_rate": 4.18427672531927e-06, "loss": 0.0005, "step": 368350 }, { "epoch": 43.46389806512506, "grad_norm": 0.04169965907931328, "learning_rate": 4.176840905760815e-06, "loss": 0.0009, "step": 368400 }, { "epoch": 43.46979707409155, "grad_norm": 0.009682324714958668, "learning_rate": 4.169411411119173e-06, "loss": 0.0009, "step": 368450 }, { "epoch": 43.47569608305805, "grad_norm": 0.002504851436242461, "learning_rate": 4.161988242419829e-06, "loss": 0.0009, "step": 368500 }, { "epoch": 43.48159509202454, "grad_norm": 0.22025735676288605, "learning_rate": 4.1545714006874126e-06, "loss": 0.0009, "step": 368550 }, { "epoch": 43.487494100991036, "grad_norm": 0.018619000911712646, "learning_rate": 4.147160886945645e-06, "loss": 0.0006, "step": 368600 }, { "epoch": 43.49339310995753, "grad_norm": 2.7363823392079212e-05, "learning_rate": 4.139756702217396e-06, "loss": 0.0004, "step": 368650 }, { "epoch": 43.49929211892402, "grad_norm": 0.0226820670068264, "learning_rate": 4.132358847524642e-06, "loss": 0.0006, "step": 368700 }, { "epoch": 43.50519112789051, "grad_norm": 0.0009632749133743346, "learning_rate": 4.124967323888529e-06, "loss": 0.0008, "step": 368750 }, { "epoch": 43.51109013685701, "grad_norm": 0.009152178652584553, "learning_rate": 4.117582132329284e-06, "loss": 0.0004, "step": 368800 }, { "epoch": 43.5169891458235, "grad_norm": 0.01804252900183201, "learning_rate": 4.110203273866275e-06, "loss": 0.0011, "step": 368850 }, { "epoch": 43.522888154789996, "grad_norm": 0.009077264927327633, "learning_rate": 4.102830749518005e-06, "loss": 0.0007, "step": 368900 }, { "epoch": 43.52878716375649, "grad_norm": 5.2150688134133816e-05, "learning_rate": 4.095464560302081e-06, "loss": 0.0008, "step": 368950 }, { "epoch": 43.534686172722985, "grad_norm": 0.00438406877219677, "learning_rate": 4.088104707235263e-06, "loss": 0.0005, "step": 369000 }, { "epoch": 43.534686172722985, "eval_cer": 0.08347676419965576, "eval_loss": 8.772162982495502e-05, "eval_runtime": 2.0747, "eval_samples_per_second": 48.201, "eval_steps_per_second": 1.928, "eval_wer": 0.26, "step": 369000 }, { "epoch": 43.54058518168948, "grad_norm": 0.21830755472183228, "learning_rate": 4.080751191333421e-06, "loss": 0.001, "step": 369050 }, { "epoch": 43.54648419065597, "grad_norm": 0.19766345620155334, "learning_rate": 4.073404013611542e-06, "loss": 0.001, "step": 369100 }, { "epoch": 43.55238319962246, "grad_norm": 0.26718512177467346, "learning_rate": 4.066063175083756e-06, "loss": 0.0006, "step": 369150 }, { "epoch": 43.558282208588956, "grad_norm": 0.04274751991033554, "learning_rate": 4.058728676763312e-06, "loss": 0.0007, "step": 369200 }, { "epoch": 43.56418121755545, "grad_norm": 0.2222556322813034, "learning_rate": 4.0514005196625845e-06, "loss": 0.0005, "step": 369250 }, { "epoch": 43.570080226521945, "grad_norm": 0.0027285628020763397, "learning_rate": 4.044078704793047e-06, "loss": 0.0004, "step": 369300 }, { "epoch": 43.57597923548844, "grad_norm": 0.0034088140819221735, "learning_rate": 4.036763233165359e-06, "loss": 0.0004, "step": 369350 }, { "epoch": 43.581878244454934, "grad_norm": 0.0163972657173872, "learning_rate": 4.029454105789237e-06, "loss": 0.0006, "step": 369400 }, { "epoch": 43.58777725342142, "grad_norm": 0.03920627012848854, "learning_rate": 4.022151323673567e-06, "loss": 0.0004, "step": 369450 }, { "epoch": 43.593676262387916, "grad_norm": 0.11297088116407394, "learning_rate": 4.0148548878263355e-06, "loss": 0.0006, "step": 369500 }, { "epoch": 43.59957527135441, "grad_norm": 0.02286255918443203, "learning_rate": 4.007564799254665e-06, "loss": 0.0012, "step": 369550 }, { "epoch": 43.605474280320905, "grad_norm": 0.001353873172774911, "learning_rate": 4.000281058964794e-06, "loss": 0.0007, "step": 369600 }, { "epoch": 43.6113732892874, "grad_norm": 0.00582945067435503, "learning_rate": 3.993003667962092e-06, "loss": 0.0003, "step": 369650 }, { "epoch": 43.617272298253894, "grad_norm": 0.0018210418056696653, "learning_rate": 3.985732627251048e-06, "loss": 0.0006, "step": 369700 }, { "epoch": 43.62317130722039, "grad_norm": 0.0010342200985178351, "learning_rate": 3.978467937835273e-06, "loss": 0.0004, "step": 369750 }, { "epoch": 43.62907031618688, "grad_norm": 0.004255283623933792, "learning_rate": 3.971209600717507e-06, "loss": 0.0006, "step": 369800 }, { "epoch": 43.63496932515337, "grad_norm": 0.05256995931267738, "learning_rate": 3.963957616899611e-06, "loss": 0.0006, "step": 369850 }, { "epoch": 43.640868334119865, "grad_norm": 0.005502045154571533, "learning_rate": 3.956711987382555e-06, "loss": 0.0008, "step": 369900 }, { "epoch": 43.64676734308636, "grad_norm": 0.004448542837053537, "learning_rate": 3.949472713166452e-06, "loss": 0.0005, "step": 369950 }, { "epoch": 43.652666352052854, "grad_norm": 0.05227859318256378, "learning_rate": 3.942239795250546e-06, "loss": 0.0005, "step": 370000 }, { "epoch": 43.652666352052854, "eval_cer": 0.08347676419965576, "eval_loss": 8.335252641700208e-05, "eval_runtime": 2.0404, "eval_samples_per_second": 49.01, "eval_steps_per_second": 1.96, "eval_wer": 0.26, "step": 370000 }, { "epoch": 43.65856536101935, "grad_norm": 0.10448852181434631, "learning_rate": 3.935013234633167e-06, "loss": 0.0006, "step": 370050 }, { "epoch": 43.66446436998584, "grad_norm": 0.0009271327289752662, "learning_rate": 3.927793032311805e-06, "loss": 0.0006, "step": 370100 }, { "epoch": 43.67036337895234, "grad_norm": 0.04845656827092171, "learning_rate": 3.92057918928303e-06, "loss": 0.0009, "step": 370150 }, { "epoch": 43.67626238791883, "grad_norm": 0.0031983277294784784, "learning_rate": 3.913371706542596e-06, "loss": 0.0004, "step": 370200 }, { "epoch": 43.68216139688532, "grad_norm": 0.17826426029205322, "learning_rate": 3.906170585085323e-06, "loss": 0.0005, "step": 370250 }, { "epoch": 43.688060405851814, "grad_norm": 0.004102855455130339, "learning_rate": 3.898975825905171e-06, "loss": 0.0004, "step": 370300 }, { "epoch": 43.69395941481831, "grad_norm": 0.2299359291791916, "learning_rate": 3.8917874299952304e-06, "loss": 0.0005, "step": 370350 }, { "epoch": 43.699858423784804, "grad_norm": 0.012315557338297367, "learning_rate": 3.884605398347707e-06, "loss": 0.0007, "step": 370400 }, { "epoch": 43.7057574327513, "grad_norm": 0.038811709731817245, "learning_rate": 3.87742973195393e-06, "loss": 0.0006, "step": 370450 }, { "epoch": 43.71165644171779, "grad_norm": 0.004556043539196253, "learning_rate": 3.87026043180434e-06, "loss": 0.0006, "step": 370500 }, { "epoch": 43.71755545068429, "grad_norm": 8.580350549891591e-05, "learning_rate": 3.8630974988885184e-06, "loss": 0.0006, "step": 370550 }, { "epoch": 43.72345445965078, "grad_norm": 0.05743179842829704, "learning_rate": 3.855940934195146e-06, "loss": 0.0005, "step": 370600 }, { "epoch": 43.72935346861727, "grad_norm": 0.001270546461455524, "learning_rate": 3.848790738712027e-06, "loss": 0.0005, "step": 370650 }, { "epoch": 43.735252477583764, "grad_norm": 0.00027007897733710706, "learning_rate": 3.841646913426122e-06, "loss": 0.0008, "step": 370700 }, { "epoch": 43.74115148655026, "grad_norm": 0.07925485074520111, "learning_rate": 3.834509459323466e-06, "loss": 0.0004, "step": 370750 }, { "epoch": 43.74705049551675, "grad_norm": 0.04332619532942772, "learning_rate": 3.82737837738924e-06, "loss": 0.0006, "step": 370800 }, { "epoch": 43.75294950448325, "grad_norm": 0.13706117868423462, "learning_rate": 3.8202536686077315e-06, "loss": 0.0006, "step": 370850 }, { "epoch": 43.75884851344974, "grad_norm": 0.0006502308533526957, "learning_rate": 3.8131353339623642e-06, "loss": 0.0011, "step": 370900 }, { "epoch": 43.764747522416236, "grad_norm": 0.10281660407781601, "learning_rate": 3.8060233744356633e-06, "loss": 0.0007, "step": 370950 }, { "epoch": 43.77064653138273, "grad_norm": 0.008260001428425312, "learning_rate": 3.7989177910092932e-06, "loss": 0.0007, "step": 371000 }, { "epoch": 43.77064653138273, "eval_cer": 0.08347676419965576, "eval_loss": 8.167148189386353e-05, "eval_runtime": 2.1032, "eval_samples_per_second": 47.546, "eval_steps_per_second": 1.902, "eval_wer": 0.26, "step": 371000 }, { "epoch": 43.77654554034922, "grad_norm": 0.019845934584736824, "learning_rate": 3.791818584664031e-06, "loss": 0.0005, "step": 371050 }, { "epoch": 43.78244454931571, "grad_norm": 0.00238299323245883, "learning_rate": 3.784725756379759e-06, "loss": 0.0006, "step": 371100 }, { "epoch": 43.78834355828221, "grad_norm": 0.00018891994841396809, "learning_rate": 3.7776393071355054e-06, "loss": 0.0009, "step": 371150 }, { "epoch": 43.7942425672487, "grad_norm": 4.087808702024631e-05, "learning_rate": 3.7705592379093934e-06, "loss": 0.0005, "step": 371200 }, { "epoch": 43.800141576215196, "grad_norm": 0.031961627304553986, "learning_rate": 3.763485549678686e-06, "loss": 0.0007, "step": 371250 }, { "epoch": 43.80604058518169, "grad_norm": 0.0006565938238054514, "learning_rate": 3.756418243419746e-06, "loss": 0.0005, "step": 371300 }, { "epoch": 43.811939594148186, "grad_norm": 0.14058001339435577, "learning_rate": 3.7493573201080766e-06, "loss": 0.0005, "step": 371350 }, { "epoch": 43.81783860311468, "grad_norm": 0.06902094930410385, "learning_rate": 3.7423027807182874e-06, "loss": 0.0003, "step": 371400 }, { "epoch": 43.82373761208117, "grad_norm": 0.024913964793086052, "learning_rate": 3.7352546262240995e-06, "loss": 0.0009, "step": 371450 }, { "epoch": 43.82963662104766, "grad_norm": 0.0023117633536458015, "learning_rate": 3.7282128575983734e-06, "loss": 0.0009, "step": 371500 }, { "epoch": 43.83553563001416, "grad_norm": 0.20165321230888367, "learning_rate": 3.7211774758130646e-06, "loss": 0.0004, "step": 371550 }, { "epoch": 43.84143463898065, "grad_norm": 0.10264545679092407, "learning_rate": 3.7141484818392637e-06, "loss": 0.001, "step": 371600 }, { "epoch": 43.847333647947146, "grad_norm": 0.0014711181866005063, "learning_rate": 3.7071258766471716e-06, "loss": 0.0004, "step": 371650 }, { "epoch": 43.85323265691364, "grad_norm": 0.00025294182705692947, "learning_rate": 3.7001096612061193e-06, "loss": 0.0004, "step": 371700 }, { "epoch": 43.859131665880135, "grad_norm": 0.0005179871805012226, "learning_rate": 3.6930998364845313e-06, "loss": 0.0007, "step": 371750 }, { "epoch": 43.86503067484663, "grad_norm": 0.00024129674420692027, "learning_rate": 3.6860964034499735e-06, "loss": 0.001, "step": 371800 }, { "epoch": 43.87092968381312, "grad_norm": 0.11477266252040863, "learning_rate": 3.679099363069127e-06, "loss": 0.0009, "step": 371850 }, { "epoch": 43.87682869277961, "grad_norm": 0.012793152593076229, "learning_rate": 3.6721087163077815e-06, "loss": 0.0007, "step": 371900 }, { "epoch": 43.882727701746106, "grad_norm": 0.006895734928548336, "learning_rate": 3.665124464130848e-06, "loss": 0.0008, "step": 371950 }, { "epoch": 43.8886267107126, "grad_norm": 0.04955296218395233, "learning_rate": 3.6581466075023443e-06, "loss": 0.0005, "step": 372000 }, { "epoch": 43.8886267107126, "eval_cer": 0.08347676419965576, "eval_loss": 8.085439912974834e-05, "eval_runtime": 2.1042, "eval_samples_per_second": 47.524, "eval_steps_per_second": 1.901, "eval_wer": 0.26, "step": 372000 }, { "epoch": 43.894525719679095, "grad_norm": 0.07650383561849594, "learning_rate": 3.6511751473854326e-06, "loss": 0.0005, "step": 372050 }, { "epoch": 43.90042472864559, "grad_norm": 0.008918561972677708, "learning_rate": 3.6442100847423667e-06, "loss": 0.0011, "step": 372100 }, { "epoch": 43.906323737612084, "grad_norm": 0.008704498410224915, "learning_rate": 3.6372514205345263e-06, "loss": 0.001, "step": 372150 }, { "epoch": 43.91222274657858, "grad_norm": 0.001986871240660548, "learning_rate": 3.630299155722411e-06, "loss": 0.0009, "step": 372200 }, { "epoch": 43.918121755545066, "grad_norm": 0.10076872259378433, "learning_rate": 3.6233532912656244e-06, "loss": 0.0008, "step": 372250 }, { "epoch": 43.92402076451156, "grad_norm": 0.0024116714484989643, "learning_rate": 3.616413828122911e-06, "loss": 0.0005, "step": 372300 }, { "epoch": 43.929919773478055, "grad_norm": 0.0028584341052919626, "learning_rate": 3.6094807672521047e-06, "loss": 0.0008, "step": 372350 }, { "epoch": 43.93581878244455, "grad_norm": 0.021300433203577995, "learning_rate": 3.6025541096101677e-06, "loss": 0.0004, "step": 372400 }, { "epoch": 43.941717791411044, "grad_norm": 0.0017061458202078938, "learning_rate": 3.5956338561531854e-06, "loss": 0.0005, "step": 372450 }, { "epoch": 43.94761680037754, "grad_norm": 0.0010841258335858583, "learning_rate": 3.588720007836349e-06, "loss": 0.0008, "step": 372500 }, { "epoch": 43.95351580934403, "grad_norm": 0.0004974070470780134, "learning_rate": 3.581812565613968e-06, "loss": 0.0006, "step": 372550 }, { "epoch": 43.95941481831052, "grad_norm": 0.030979448929429054, "learning_rate": 3.5749115304394733e-06, "loss": 0.0008, "step": 372600 }, { "epoch": 43.965313827277015, "grad_norm": 0.0014949231408536434, "learning_rate": 3.568016903265392e-06, "loss": 0.0006, "step": 372650 }, { "epoch": 43.97121283624351, "grad_norm": 0.00041085618431679904, "learning_rate": 3.5611286850433967e-06, "loss": 0.0009, "step": 372700 }, { "epoch": 43.977111845210004, "grad_norm": 0.00013981496158521622, "learning_rate": 3.5542468767242598e-06, "loss": 0.0009, "step": 372750 }, { "epoch": 43.9830108541765, "grad_norm": 0.09250283241271973, "learning_rate": 3.5473714792578607e-06, "loss": 0.0005, "step": 372800 }, { "epoch": 43.98890986314299, "grad_norm": 0.00012720527593046427, "learning_rate": 3.5405024935932118e-06, "loss": 0.0004, "step": 372850 }, { "epoch": 43.99480887210949, "grad_norm": 0.0015308576403185725, "learning_rate": 3.533639920678422e-06, "loss": 0.0008, "step": 372900 }, { "epoch": 44.00070788107598, "grad_norm": 0.0007834278512746096, "learning_rate": 3.526783761460728e-06, "loss": 0.0006, "step": 372950 }, { "epoch": 44.00660689004247, "grad_norm": 0.1084897443652153, "learning_rate": 3.519934016886478e-06, "loss": 0.0008, "step": 373000 }, { "epoch": 44.00660689004247, "eval_cer": 0.08347676419965576, "eval_loss": 8.083045395324007e-05, "eval_runtime": 2.0776, "eval_samples_per_second": 48.133, "eval_steps_per_second": 1.925, "eval_wer": 0.26, "step": 373000 }, { "epoch": 44.012505899008964, "grad_norm": 0.030902260914444923, "learning_rate": 3.5130906879011326e-06, "loss": 0.0005, "step": 373050 }, { "epoch": 44.01840490797546, "grad_norm": 0.009651189669966698, "learning_rate": 3.506253775449275e-06, "loss": 0.0005, "step": 373100 }, { "epoch": 44.02430391694195, "grad_norm": 0.0031389554496854544, "learning_rate": 3.499423280474584e-06, "loss": 0.0007, "step": 373150 }, { "epoch": 44.03020292590845, "grad_norm": 0.0014628426870331168, "learning_rate": 3.4925992039198774e-06, "loss": 0.0005, "step": 373200 }, { "epoch": 44.03610193487494, "grad_norm": 0.0759911760687828, "learning_rate": 3.485781546727057e-06, "loss": 0.0007, "step": 373250 }, { "epoch": 44.04200094384144, "grad_norm": 0.008536137640476227, "learning_rate": 3.4789703098371763e-06, "loss": 0.0003, "step": 373300 }, { "epoch": 44.04789995280793, "grad_norm": 0.17444881796836853, "learning_rate": 3.4721654941903715e-06, "loss": 0.001, "step": 373350 }, { "epoch": 44.05379896177442, "grad_norm": 0.004635790828615427, "learning_rate": 3.465367100725908e-06, "loss": 0.0006, "step": 373400 }, { "epoch": 44.05969797074091, "grad_norm": 0.0027770595625042915, "learning_rate": 3.4585751303821465e-06, "loss": 0.0008, "step": 373450 }, { "epoch": 44.06559697970741, "grad_norm": 0.12474976480007172, "learning_rate": 3.4517895840965986e-06, "loss": 0.0011, "step": 373500 }, { "epoch": 44.0714959886739, "grad_norm": 0.007882867008447647, "learning_rate": 3.4450104628058478e-06, "loss": 0.0006, "step": 373550 }, { "epoch": 44.0773949976404, "grad_norm": 0.10098337382078171, "learning_rate": 3.438237767445618e-06, "loss": 0.0004, "step": 373600 }, { "epoch": 44.08329400660689, "grad_norm": 0.01246635988354683, "learning_rate": 3.431471498950728e-06, "loss": 0.0006, "step": 373650 }, { "epoch": 44.089193015573386, "grad_norm": 0.0007132140453904867, "learning_rate": 3.4247116582551197e-06, "loss": 0.0006, "step": 373700 }, { "epoch": 44.09509202453988, "grad_norm": 0.03937361761927605, "learning_rate": 3.417958246291847e-06, "loss": 0.0005, "step": 373750 }, { "epoch": 44.10099103350637, "grad_norm": 0.014902769587934017, "learning_rate": 3.4112112639930804e-06, "loss": 0.0004, "step": 373800 }, { "epoch": 44.10689004247286, "grad_norm": 0.07239362597465515, "learning_rate": 3.404470712290092e-06, "loss": 0.0007, "step": 373850 }, { "epoch": 44.11278905143936, "grad_norm": 0.04315505549311638, "learning_rate": 3.397736592113271e-06, "loss": 0.0007, "step": 373900 }, { "epoch": 44.11868806040585, "grad_norm": 0.0034259359817951918, "learning_rate": 3.3910089043921177e-06, "loss": 0.0005, "step": 373950 }, { "epoch": 44.124587069372346, "grad_norm": 0.29671552777290344, "learning_rate": 3.384287650055257e-06, "loss": 0.0008, "step": 374000 }, { "epoch": 44.124587069372346, "eval_cer": 0.08347676419965576, "eval_loss": 8.513601642334834e-05, "eval_runtime": 2.0599, "eval_samples_per_second": 48.547, "eval_steps_per_second": 1.942, "eval_wer": 0.26, "step": 374000 }, { "epoch": 44.13048607833884, "grad_norm": 0.00025735373492352664, "learning_rate": 3.3775728300304123e-06, "loss": 0.0006, "step": 374050 }, { "epoch": 44.136385087305335, "grad_norm": 0.03657231479883194, "learning_rate": 3.3708644452444205e-06, "loss": 0.0005, "step": 374100 }, { "epoch": 44.14228409627183, "grad_norm": 0.0024653919972479343, "learning_rate": 3.3641624966232297e-06, "loss": 0.0004, "step": 374150 }, { "epoch": 44.14818310523832, "grad_norm": 0.004949683789163828, "learning_rate": 3.357466985091906e-06, "loss": 0.0007, "step": 374200 }, { "epoch": 44.15408211420481, "grad_norm": 0.02930867113173008, "learning_rate": 3.3507779115746206e-06, "loss": 0.0007, "step": 374250 }, { "epoch": 44.159981123171306, "grad_norm": 0.12344911694526672, "learning_rate": 3.3440952769946633e-06, "loss": 0.0009, "step": 374300 }, { "epoch": 44.1658801321378, "grad_norm": 0.002090041758492589, "learning_rate": 3.3374190822744234e-06, "loss": 0.0011, "step": 374350 }, { "epoch": 44.171779141104295, "grad_norm": 0.0013619392411783338, "learning_rate": 3.330749328335414e-06, "loss": 0.0006, "step": 374400 }, { "epoch": 44.17767815007079, "grad_norm": 0.07975800335407257, "learning_rate": 3.3240860160982433e-06, "loss": 0.0006, "step": 374450 }, { "epoch": 44.183577159037284, "grad_norm": 0.0025874795392155647, "learning_rate": 3.3174291464826536e-06, "loss": 0.0007, "step": 374500 }, { "epoch": 44.18947616800378, "grad_norm": 0.0007085720426402986, "learning_rate": 3.3107787204074813e-06, "loss": 0.0007, "step": 374550 }, { "epoch": 44.195375176970266, "grad_norm": 0.0012917949352413416, "learning_rate": 3.30413473879066e-06, "loss": 0.0002, "step": 374600 }, { "epoch": 44.20127418593676, "grad_norm": 0.0003980888577643782, "learning_rate": 3.2974972025492767e-06, "loss": 0.0007, "step": 374650 }, { "epoch": 44.207173194903255, "grad_norm": 0.027861451730132103, "learning_rate": 3.2908661125994944e-06, "loss": 0.0004, "step": 374700 }, { "epoch": 44.21307220386975, "grad_norm": 0.08969929814338684, "learning_rate": 3.284241469856586e-06, "loss": 0.0001, "step": 374750 }, { "epoch": 44.218971212836244, "grad_norm": 0.11932247877120972, "learning_rate": 3.2776232752349534e-06, "loss": 0.0007, "step": 374800 }, { "epoch": 44.22487022180274, "grad_norm": 2.5531666324241087e-05, "learning_rate": 3.271011529648088e-06, "loss": 0.0006, "step": 374850 }, { "epoch": 44.23076923076923, "grad_norm": 0.0014923012349754572, "learning_rate": 3.26440623400861e-06, "loss": 0.0005, "step": 374900 }, { "epoch": 44.23666823973572, "grad_norm": 0.000828639545943588, "learning_rate": 3.25780738922824e-06, "loss": 0.0008, "step": 374950 }, { "epoch": 44.242567248702215, "grad_norm": 0.0019516339525580406, "learning_rate": 3.2512149962177998e-06, "loss": 0.0006, "step": 375000 }, { "epoch": 44.242567248702215, "eval_cer": 0.08347676419965576, "eval_loss": 9.629131091060117e-05, "eval_runtime": 2.0597, "eval_samples_per_second": 48.55, "eval_steps_per_second": 1.942, "eval_wer": 0.26, "step": 375000 }, { "epoch": 44.24846625766871, "grad_norm": 0.0028024485800415277, "learning_rate": 3.2446290558872385e-06, "loss": 0.0007, "step": 375050 }, { "epoch": 44.254365266635205, "grad_norm": 0.00057672627735883, "learning_rate": 3.238049569145596e-06, "loss": 0.001, "step": 375100 }, { "epoch": 44.2602642756017, "grad_norm": 0.02755916491150856, "learning_rate": 3.2314765369010515e-06, "loss": 0.0006, "step": 375150 }, { "epoch": 44.266163284568194, "grad_norm": 0.0016735087847337127, "learning_rate": 3.2249099600608512e-06, "loss": 0.0011, "step": 375200 }, { "epoch": 44.27206229353469, "grad_norm": 0.00025753056979738176, "learning_rate": 3.218349839531387e-06, "loss": 0.0008, "step": 375250 }, { "epoch": 44.27796130250118, "grad_norm": 0.001362983719445765, "learning_rate": 3.2117961762181237e-06, "loss": 0.0004, "step": 375300 }, { "epoch": 44.28386031146767, "grad_norm": 0.002516302280128002, "learning_rate": 3.2052489710256817e-06, "loss": 0.0008, "step": 375350 }, { "epoch": 44.289759320434165, "grad_norm": 0.005732832010835409, "learning_rate": 3.198708224857755e-06, "loss": 0.0006, "step": 375400 }, { "epoch": 44.29565832940066, "grad_norm": 0.008156060241162777, "learning_rate": 3.192173938617149e-06, "loss": 0.0012, "step": 375450 }, { "epoch": 44.301557338367154, "grad_norm": 0.002545031486079097, "learning_rate": 3.185646113205787e-06, "loss": 0.001, "step": 375500 }, { "epoch": 44.30745634733365, "grad_norm": 0.12904773652553558, "learning_rate": 3.1791247495247035e-06, "loss": 0.0004, "step": 375550 }, { "epoch": 44.31335535630014, "grad_norm": 0.017314434051513672, "learning_rate": 3.172609848474023e-06, "loss": 0.0007, "step": 375600 }, { "epoch": 44.31925436526664, "grad_norm": 0.0061763799749314785, "learning_rate": 3.166101410952993e-06, "loss": 0.0005, "step": 375650 }, { "epoch": 44.32515337423313, "grad_norm": 0.13812342286109924, "learning_rate": 3.1595994378599724e-06, "loss": 0.0004, "step": 375700 }, { "epoch": 44.33105238319962, "grad_norm": 0.003629809245467186, "learning_rate": 3.153103930092416e-06, "loss": 0.0003, "step": 375750 }, { "epoch": 44.336951392166114, "grad_norm": 0.025036465376615524, "learning_rate": 3.1466148885468892e-06, "loss": 0.0005, "step": 375800 }, { "epoch": 44.34285040113261, "grad_norm": 0.039734866470098495, "learning_rate": 3.1401323141190652e-06, "loss": 0.0003, "step": 375850 }, { "epoch": 44.3487494100991, "grad_norm": 0.03959456831216812, "learning_rate": 3.133656207703739e-06, "loss": 0.0005, "step": 375900 }, { "epoch": 44.3546484190656, "grad_norm": 0.0019842975307255983, "learning_rate": 3.127186570194773e-06, "loss": 0.0006, "step": 375950 }, { "epoch": 44.36054742803209, "grad_norm": 0.07054618746042252, "learning_rate": 3.1207234024851983e-06, "loss": 0.0006, "step": 376000 }, { "epoch": 44.36054742803209, "eval_cer": 0.08347676419965576, "eval_loss": 0.00010423904313938692, "eval_runtime": 2.049, "eval_samples_per_second": 48.805, "eval_steps_per_second": 1.952, "eval_wer": 0.26, "step": 376000 }, { "epoch": 44.36644643699859, "grad_norm": 0.00017983568250201643, "learning_rate": 3.114266705467095e-06, "loss": 0.0005, "step": 376050 }, { "epoch": 44.37234544596508, "grad_norm": 0.14883635938167572, "learning_rate": 3.1078164800316835e-06, "loss": 0.0009, "step": 376100 }, { "epoch": 44.37824445493157, "grad_norm": 0.0011939260875806212, "learning_rate": 3.1013727270692737e-06, "loss": 0.0004, "step": 376150 }, { "epoch": 44.38414346389806, "grad_norm": 0.00020451107411645353, "learning_rate": 3.0949354474692937e-06, "loss": 0.0007, "step": 376200 }, { "epoch": 44.39004247286456, "grad_norm": 0.007420320063829422, "learning_rate": 3.0885046421202713e-06, "loss": 0.0005, "step": 376250 }, { "epoch": 44.39594148183105, "grad_norm": 0.0009701527887955308, "learning_rate": 3.0820803119098463e-06, "loss": 0.0005, "step": 376300 }, { "epoch": 44.40184049079755, "grad_norm": 0.18370875716209412, "learning_rate": 3.0756624577247596e-06, "loss": 0.0006, "step": 376350 }, { "epoch": 44.40773949976404, "grad_norm": 0.0005620031151920557, "learning_rate": 3.0692510804508633e-06, "loss": 0.0007, "step": 376400 }, { "epoch": 44.413638508730536, "grad_norm": 0.0016104409005492926, "learning_rate": 3.0628461809731057e-06, "loss": 0.0003, "step": 376450 }, { "epoch": 44.41953751769703, "grad_norm": 0.00024063163436949253, "learning_rate": 3.0564477601755515e-06, "loss": 0.0008, "step": 376500 }, { "epoch": 44.42543652666352, "grad_norm": 0.05468415468931198, "learning_rate": 3.0500558189413554e-06, "loss": 0.0004, "step": 376550 }, { "epoch": 44.43133553563001, "grad_norm": 0.0007590539753437042, "learning_rate": 3.0436703581528115e-06, "loss": 0.0005, "step": 376600 }, { "epoch": 44.43723454459651, "grad_norm": 0.017488988116383553, "learning_rate": 3.0372913786912873e-06, "loss": 0.0007, "step": 376650 }, { "epoch": 44.443133553563, "grad_norm": 0.035248272120952606, "learning_rate": 3.030918881437261e-06, "loss": 0.0006, "step": 376700 }, { "epoch": 44.449032562529496, "grad_norm": 0.01850847899913788, "learning_rate": 3.0245528672703196e-06, "loss": 0.0007, "step": 376750 }, { "epoch": 44.45493157149599, "grad_norm": 0.004803983960300684, "learning_rate": 3.0181933370691696e-06, "loss": 0.0007, "step": 376800 }, { "epoch": 44.460830580462485, "grad_norm": 0.006841767113655806, "learning_rate": 3.011840291711604e-06, "loss": 0.0007, "step": 376850 }, { "epoch": 44.46672958942898, "grad_norm": 0.0005936798406764865, "learning_rate": 3.0054937320745215e-06, "loss": 0.0005, "step": 376900 }, { "epoch": 44.47262859839547, "grad_norm": 0.07825291901826859, "learning_rate": 2.9991536590339374e-06, "loss": 0.0008, "step": 376950 }, { "epoch": 44.47852760736196, "grad_norm": 0.003278691554442048, "learning_rate": 2.9928200734649527e-06, "loss": 0.0006, "step": 377000 }, { "epoch": 44.47852760736196, "eval_cer": 0.08347676419965576, "eval_loss": 9.687611600384116e-05, "eval_runtime": 2.0381, "eval_samples_per_second": 49.065, "eval_steps_per_second": 1.963, "eval_wer": 0.26, "step": 377000 }, { "epoch": 44.484426616328456, "grad_norm": 0.0006034767138771713, "learning_rate": 2.9864929762418004e-06, "loss": 0.0004, "step": 377050 }, { "epoch": 44.49032562529495, "grad_norm": 0.0022470024414360523, "learning_rate": 2.980172368237788e-06, "loss": 0.0011, "step": 377100 }, { "epoch": 44.496224634261445, "grad_norm": 0.000474826549179852, "learning_rate": 2.973858250325351e-06, "loss": 0.0006, "step": 377150 }, { "epoch": 44.50212364322794, "grad_norm": 0.081855408847332, "learning_rate": 2.9675506233760143e-06, "loss": 0.0006, "step": 377200 }, { "epoch": 44.508022652194434, "grad_norm": 0.0031288249883800745, "learning_rate": 2.9612494882604093e-06, "loss": 0.0006, "step": 377250 }, { "epoch": 44.51392166116092, "grad_norm": 0.11443479359149933, "learning_rate": 2.954954845848279e-06, "loss": 0.0005, "step": 377300 }, { "epoch": 44.519820670127416, "grad_norm": 0.0023491610772907734, "learning_rate": 2.9486666970084674e-06, "loss": 0.0002, "step": 377350 }, { "epoch": 44.52571967909391, "grad_norm": 0.0004484114469960332, "learning_rate": 2.942385042608925e-06, "loss": 0.0005, "step": 377400 }, { "epoch": 44.531618688060405, "grad_norm": 0.0030345036648213863, "learning_rate": 2.936109883516691e-06, "loss": 0.0003, "step": 377450 }, { "epoch": 44.5375176970269, "grad_norm": 0.0355311781167984, "learning_rate": 2.929841220597923e-06, "loss": 0.0006, "step": 377500 }, { "epoch": 44.543416705993394, "grad_norm": 0.0025132291484624147, "learning_rate": 2.923579054717873e-06, "loss": 0.0011, "step": 377550 }, { "epoch": 44.54931571495989, "grad_norm": 0.11446583271026611, "learning_rate": 2.9173233867409056e-06, "loss": 0.001, "step": 377600 }, { "epoch": 44.55521472392638, "grad_norm": 0.0004217594105284661, "learning_rate": 2.9110742175304852e-06, "loss": 0.0007, "step": 377650 }, { "epoch": 44.56111373289287, "grad_norm": 0.000600805738940835, "learning_rate": 2.904831547949166e-06, "loss": 0.0008, "step": 377700 }, { "epoch": 44.567012741859365, "grad_norm": 0.0053298030979931355, "learning_rate": 2.898595378858632e-06, "loss": 0.0005, "step": 377750 }, { "epoch": 44.57291175082586, "grad_norm": 0.003547846572473645, "learning_rate": 2.8923657111196378e-06, "loss": 0.0005, "step": 377800 }, { "epoch": 44.578810759792354, "grad_norm": 0.0007265795138664544, "learning_rate": 2.8861425455920687e-06, "loss": 0.0007, "step": 377850 }, { "epoch": 44.58470976875885, "grad_norm": 0.004149621818214655, "learning_rate": 2.879925883134893e-06, "loss": 0.0005, "step": 377900 }, { "epoch": 44.59060877772534, "grad_norm": 0.009246665984392166, "learning_rate": 2.8737157246061965e-06, "loss": 0.0007, "step": 377950 }, { "epoch": 44.59650778669184, "grad_norm": 0.0032136207446455956, "learning_rate": 2.8675120708631596e-06, "loss": 0.0004, "step": 378000 }, { "epoch": 44.59650778669184, "eval_cer": 0.08347676419965576, "eval_loss": 8.51048607728444e-05, "eval_runtime": 2.1071, "eval_samples_per_second": 47.46, "eval_steps_per_second": 1.898, "eval_wer": 0.26, "step": 378000 }, { "epoch": 44.60240679565833, "grad_norm": 0.0007079975330270827, "learning_rate": 2.861314922762065e-06, "loss": 0.0005, "step": 378050 }, { "epoch": 44.60830580462482, "grad_norm": 0.0023529953323304653, "learning_rate": 2.8551242811582947e-06, "loss": 0.0005, "step": 378100 }, { "epoch": 44.614204813591314, "grad_norm": 0.10090924054384232, "learning_rate": 2.848940146906337e-06, "loss": 0.0004, "step": 378150 }, { "epoch": 44.62010382255781, "grad_norm": 0.0007363816257566214, "learning_rate": 2.8427625208597762e-06, "loss": 0.0008, "step": 378200 }, { "epoch": 44.6260028315243, "grad_norm": 7.367832586169243e-05, "learning_rate": 2.836591403871314e-06, "loss": 0.0007, "step": 378250 }, { "epoch": 44.6319018404908, "grad_norm": 0.0010187389561906457, "learning_rate": 2.830426796792729e-06, "loss": 0.0004, "step": 378300 }, { "epoch": 44.63780084945729, "grad_norm": 0.3027372360229492, "learning_rate": 2.8242687004749248e-06, "loss": 0.0005, "step": 378350 }, { "epoch": 44.64369985842379, "grad_norm": 0.0015283836983144283, "learning_rate": 2.8181171157678876e-06, "loss": 0.0006, "step": 378400 }, { "epoch": 44.64959886739028, "grad_norm": 0.024129187688231468, "learning_rate": 2.811972043520722e-06, "loss": 0.001, "step": 378450 }, { "epoch": 44.65549787635677, "grad_norm": 0.008870191872119904, "learning_rate": 2.8058334845816213e-06, "loss": 0.0009, "step": 378500 }, { "epoch": 44.66139688532326, "grad_norm": 0.001423164620064199, "learning_rate": 2.799701439797886e-06, "loss": 0.0003, "step": 378550 }, { "epoch": 44.66729589428976, "grad_norm": 0.001797966775484383, "learning_rate": 2.7935759100159055e-06, "loss": 0.0002, "step": 378600 }, { "epoch": 44.67319490325625, "grad_norm": 0.009015189483761787, "learning_rate": 2.7874568960811973e-06, "loss": 0.0006, "step": 378650 }, { "epoch": 44.67909391222275, "grad_norm": 0.0006962902843952179, "learning_rate": 2.781344398838348e-06, "loss": 0.0003, "step": 378700 }, { "epoch": 44.68499292118924, "grad_norm": 0.0008224067278206348, "learning_rate": 2.77523841913106e-06, "loss": 0.0005, "step": 378750 }, { "epoch": 44.690891930155736, "grad_norm": 0.27990156412124634, "learning_rate": 2.7691389578021364e-06, "loss": 0.0009, "step": 378800 }, { "epoch": 44.69679093912223, "grad_norm": 0.054947059601545334, "learning_rate": 2.763046015693482e-06, "loss": 0.0007, "step": 378850 }, { "epoch": 44.70268994808872, "grad_norm": 0.03504790738224983, "learning_rate": 2.756959593646091e-06, "loss": 0.0003, "step": 378900 }, { "epoch": 44.70858895705521, "grad_norm": 0.010073438286781311, "learning_rate": 2.750879692500069e-06, "loss": 0.0007, "step": 378950 }, { "epoch": 44.71448796602171, "grad_norm": 0.008721857331693172, "learning_rate": 2.7448063130946224e-06, "loss": 0.0006, "step": 379000 }, { "epoch": 44.71448796602171, "eval_cer": 0.08347676419965576, "eval_loss": 8.841666567604989e-05, "eval_runtime": 2.0989, "eval_samples_per_second": 47.645, "eval_steps_per_second": 1.906, "eval_wer": 0.26, "step": 379000 }, { "epoch": 44.7203869749882, "grad_norm": 0.021313656121492386, "learning_rate": 2.738739456268041e-06, "loss": 0.0004, "step": 379050 }, { "epoch": 44.726285983954696, "grad_norm": 0.05704435333609581, "learning_rate": 2.73267912285774e-06, "loss": 0.0005, "step": 379100 }, { "epoch": 44.73218499292119, "grad_norm": 0.0008270292310044169, "learning_rate": 2.7266253137002096e-06, "loss": 0.0007, "step": 379150 }, { "epoch": 44.738084001887685, "grad_norm": 0.005519216880202293, "learning_rate": 2.7205780296310544e-06, "loss": 0.0007, "step": 379200 }, { "epoch": 44.74398301085418, "grad_norm": 0.014831465668976307, "learning_rate": 2.7145372714849626e-06, "loss": 0.0004, "step": 379250 }, { "epoch": 44.74988201982067, "grad_norm": 0.013009740971028805, "learning_rate": 2.70850304009575e-06, "loss": 0.0009, "step": 379300 }, { "epoch": 44.75578102878716, "grad_norm": 0.0012234095484018326, "learning_rate": 2.702475336296312e-06, "loss": 0.0004, "step": 379350 }, { "epoch": 44.761680037753656, "grad_norm": 0.0007467932882718742, "learning_rate": 2.696454160918638e-06, "loss": 0.0003, "step": 379400 }, { "epoch": 44.76757904672015, "grad_norm": 0.07766613364219666, "learning_rate": 2.6904395147938245e-06, "loss": 0.0005, "step": 379450 }, { "epoch": 44.773478055686645, "grad_norm": 0.010947728529572487, "learning_rate": 2.684431398752074e-06, "loss": 0.0008, "step": 379500 }, { "epoch": 44.77937706465314, "grad_norm": 0.0011026932625100017, "learning_rate": 2.678429813622668e-06, "loss": 0.0012, "step": 379550 }, { "epoch": 44.785276073619634, "grad_norm": 0.004535157233476639, "learning_rate": 2.6724347602340106e-06, "loss": 0.0006, "step": 379600 }, { "epoch": 44.79117508258613, "grad_norm": 0.06872441619634628, "learning_rate": 2.6664462394135834e-06, "loss": 0.0007, "step": 379650 }, { "epoch": 44.797074091552616, "grad_norm": 0.034662358462810516, "learning_rate": 2.6604642519879765e-06, "loss": 0.0007, "step": 379700 }, { "epoch": 44.80297310051911, "grad_norm": 0.07564420253038406, "learning_rate": 2.6544887987828782e-06, "loss": 0.0006, "step": 379750 }, { "epoch": 44.808872109485606, "grad_norm": 0.02991127036511898, "learning_rate": 2.6485198806230684e-06, "loss": 0.001, "step": 379800 }, { "epoch": 44.8147711184521, "grad_norm": 0.004068123642355204, "learning_rate": 2.6425574983324317e-06, "loss": 0.0005, "step": 379850 }, { "epoch": 44.820670127418595, "grad_norm": 0.00038821896305307746, "learning_rate": 2.6366016527339553e-06, "loss": 0.0005, "step": 379900 }, { "epoch": 44.82656913638509, "grad_norm": 0.042983196675777435, "learning_rate": 2.6306523446497145e-06, "loss": 0.0008, "step": 379950 }, { "epoch": 44.832468145351584, "grad_norm": 0.2851257622241974, "learning_rate": 2.62470957490088e-06, "loss": 0.0007, "step": 380000 }, { "epoch": 44.832468145351584, "eval_cer": 0.08347676419965576, "eval_loss": 7.665443990845233e-05, "eval_runtime": 2.1233, "eval_samples_per_second": 47.097, "eval_steps_per_second": 1.884, "eval_wer": 0.26, "step": 380000 }, { "epoch": 44.83836715431807, "grad_norm": 0.13924524188041687, "learning_rate": 2.6187733443077234e-06, "loss": 0.0009, "step": 380050 }, { "epoch": 44.844266163284566, "grad_norm": 0.00020436881459318101, "learning_rate": 2.612843653689634e-06, "loss": 0.001, "step": 380100 }, { "epoch": 44.85016517225106, "grad_norm": 0.0004859567270614207, "learning_rate": 2.6069205038650624e-06, "loss": 0.0008, "step": 380150 }, { "epoch": 44.856064181217555, "grad_norm": 0.23775695264339447, "learning_rate": 2.6010038956515826e-06, "loss": 0.0011, "step": 380200 }, { "epoch": 44.86196319018405, "grad_norm": 0.004085718188434839, "learning_rate": 2.5950938298658522e-06, "loss": 0.001, "step": 380250 }, { "epoch": 44.867862199150544, "grad_norm": 0.0028680383693426847, "learning_rate": 2.58919030732363e-06, "loss": 0.0008, "step": 380300 }, { "epoch": 44.87376120811704, "grad_norm": 0.005890652537345886, "learning_rate": 2.5832933288397754e-06, "loss": 0.0004, "step": 380350 }, { "epoch": 44.87966021708353, "grad_norm": 0.10811260342597961, "learning_rate": 2.5774028952282425e-06, "loss": 0.0006, "step": 380400 }, { "epoch": 44.88555922605002, "grad_norm": 0.11968667060136795, "learning_rate": 2.5715190073020755e-06, "loss": 0.001, "step": 380450 }, { "epoch": 44.891458235016515, "grad_norm": 0.3064870536327362, "learning_rate": 2.565641665873425e-06, "loss": 0.0003, "step": 380500 }, { "epoch": 44.89735724398301, "grad_norm": 0.01420088391751051, "learning_rate": 2.5597708717535306e-06, "loss": 0.0006, "step": 380550 }, { "epoch": 44.903256252949504, "grad_norm": 0.02644484117627144, "learning_rate": 2.5539066257527277e-06, "loss": 0.0003, "step": 380600 }, { "epoch": 44.909155261916, "grad_norm": 0.09039627760648727, "learning_rate": 2.548048928680463e-06, "loss": 0.001, "step": 380650 }, { "epoch": 44.91505427088249, "grad_norm": 0.0013114424655213952, "learning_rate": 2.5421977813452624e-06, "loss": 0.0003, "step": 380700 }, { "epoch": 44.92095327984899, "grad_norm": 0.0005029579042457044, "learning_rate": 2.5363531845547515e-06, "loss": 0.0006, "step": 380750 }, { "epoch": 44.92685228881548, "grad_norm": 0.00529295951128006, "learning_rate": 2.5305151391156523e-06, "loss": 0.0007, "step": 380800 }, { "epoch": 44.93275129778197, "grad_norm": 0.0029999883845448494, "learning_rate": 2.5246836458337862e-06, "loss": 0.0011, "step": 380850 }, { "epoch": 44.938650306748464, "grad_norm": 0.0003104041388723999, "learning_rate": 2.51885870551406e-06, "loss": 0.0006, "step": 380900 }, { "epoch": 44.94454931571496, "grad_norm": 0.22418153285980225, "learning_rate": 2.513040318960491e-06, "loss": 0.001, "step": 380950 }, { "epoch": 44.95044832468145, "grad_norm": 0.00019033100397791713, "learning_rate": 2.5072284869761874e-06, "loss": 0.0006, "step": 381000 }, { "epoch": 44.95044832468145, "eval_cer": 0.08347676419965576, "eval_loss": 9.082109318114817e-05, "eval_runtime": 2.0724, "eval_samples_per_second": 48.254, "eval_steps_per_second": 1.93, "eval_wer": 0.26, "step": 381000 }, { "epoch": 44.95634733364795, "grad_norm": 0.0012879977002739906, "learning_rate": 2.5014232103633352e-06, "loss": 0.0008, "step": 381050 }, { "epoch": 44.96224634261444, "grad_norm": 0.0004880302876699716, "learning_rate": 2.4956244899232427e-06, "loss": 0.0004, "step": 381100 }, { "epoch": 44.96814535158094, "grad_norm": 0.0009384313016198575, "learning_rate": 2.4898323264562984e-06, "loss": 0.0009, "step": 381150 }, { "epoch": 44.97404436054743, "grad_norm": 0.024271396920084953, "learning_rate": 2.484046720761979e-06, "loss": 0.0006, "step": 381200 }, { "epoch": 44.97994336951392, "grad_norm": 0.0043169348500669, "learning_rate": 2.478267673638879e-06, "loss": 0.0004, "step": 381250 }, { "epoch": 44.98584237848041, "grad_norm": 0.0033148922957479954, "learning_rate": 2.4724951858846656e-06, "loss": 0.0008, "step": 381300 }, { "epoch": 44.99174138744691, "grad_norm": 0.006027838680893183, "learning_rate": 2.4667292582961075e-06, "loss": 0.0004, "step": 381350 }, { "epoch": 44.9976403964134, "grad_norm": 0.00021539459703490138, "learning_rate": 2.4609698916690683e-06, "loss": 0.0007, "step": 381400 }, { "epoch": 45.0035394053799, "grad_norm": 0.015359053388237953, "learning_rate": 2.4552170867985112e-06, "loss": 0.0003, "step": 381450 }, { "epoch": 45.00943841434639, "grad_norm": 1.2853063344955444, "learning_rate": 2.4494708444784907e-06, "loss": 0.001, "step": 381500 }, { "epoch": 45.015337423312886, "grad_norm": 0.007389581296592951, "learning_rate": 2.44373116550215e-06, "loss": 0.0008, "step": 381550 }, { "epoch": 45.02123643227938, "grad_norm": 0.10072583705186844, "learning_rate": 2.4379980506617272e-06, "loss": 0.0002, "step": 381600 }, { "epoch": 45.02713544124587, "grad_norm": 0.0018980354070663452, "learning_rate": 2.4322715007485564e-06, "loss": 0.0002, "step": 381650 }, { "epoch": 45.03303445021236, "grad_norm": 0.0003603186341933906, "learning_rate": 2.4265515165530715e-06, "loss": 0.0007, "step": 381700 }, { "epoch": 45.03893345917886, "grad_norm": 0.003052011365070939, "learning_rate": 2.420838098864797e-06, "loss": 0.0009, "step": 381750 }, { "epoch": 45.04483246814535, "grad_norm": 0.07429304718971252, "learning_rate": 2.4151312484723465e-06, "loss": 0.0004, "step": 381800 }, { "epoch": 45.050731477111846, "grad_norm": 0.007091850973665714, "learning_rate": 2.409430966163434e-06, "loss": 0.0005, "step": 381850 }, { "epoch": 45.05663048607834, "grad_norm": 3.1992334697861224e-05, "learning_rate": 2.4037372527248527e-06, "loss": 0.0004, "step": 381900 }, { "epoch": 45.062529495044835, "grad_norm": 0.013953405432403088, "learning_rate": 2.398050108942501e-06, "loss": 0.0007, "step": 381950 }, { "epoch": 45.06842850401133, "grad_norm": 0.11733491718769073, "learning_rate": 2.3923695356013797e-06, "loss": 0.0004, "step": 382000 }, { "epoch": 45.06842850401133, "eval_cer": 0.08347676419965576, "eval_loss": 0.00010140923404833302, "eval_runtime": 2.0462, "eval_samples_per_second": 48.871, "eval_steps_per_second": 1.955, "eval_wer": 0.26, "step": 382000 }, { "epoch": 45.07432751297782, "grad_norm": 0.008608397096395493, "learning_rate": 2.386695533485567e-06, "loss": 0.001, "step": 382050 }, { "epoch": 45.08022652194431, "grad_norm": 0.003051695879548788, "learning_rate": 2.3810281033782355e-06, "loss": 0.0008, "step": 382100 }, { "epoch": 45.086125530910806, "grad_norm": 0.0023970995098352432, "learning_rate": 2.375367246061655e-06, "loss": 0.0003, "step": 382150 }, { "epoch": 45.0920245398773, "grad_norm": 0.09841691702604294, "learning_rate": 2.369712962317183e-06, "loss": 0.0008, "step": 382200 }, { "epoch": 45.097923548843795, "grad_norm": 0.0003557931340765208, "learning_rate": 2.3640652529252794e-06, "loss": 0.0005, "step": 382250 }, { "epoch": 45.10382255781029, "grad_norm": 0.002910632872954011, "learning_rate": 2.358424118665492e-06, "loss": 0.0006, "step": 382300 }, { "epoch": 45.109721566776784, "grad_norm": 0.009079230017960072, "learning_rate": 2.3527895603164595e-06, "loss": 0.0003, "step": 382350 }, { "epoch": 45.11562057574328, "grad_norm": 0.14338849484920502, "learning_rate": 2.347161578655904e-06, "loss": 0.0006, "step": 382400 }, { "epoch": 45.121519584709766, "grad_norm": 0.00010328411008231342, "learning_rate": 2.3415401744606603e-06, "loss": 0.0006, "step": 382450 }, { "epoch": 45.12741859367626, "grad_norm": 0.03694716468453407, "learning_rate": 2.33592534850664e-06, "loss": 0.0003, "step": 382500 }, { "epoch": 45.133317602642755, "grad_norm": 0.001297657028771937, "learning_rate": 2.3303171015688407e-06, "loss": 0.0007, "step": 382550 }, { "epoch": 45.13921661160925, "grad_norm": 0.003305834950879216, "learning_rate": 2.3247154344213818e-06, "loss": 0.0002, "step": 382600 }, { "epoch": 45.145115620575744, "grad_norm": 0.0016203010454773903, "learning_rate": 2.3191203478374447e-06, "loss": 0.0007, "step": 382650 }, { "epoch": 45.15101462954224, "grad_norm": 0.004614623729139566, "learning_rate": 2.3135318425893114e-06, "loss": 0.0007, "step": 382700 }, { "epoch": 45.15691363850873, "grad_norm": 0.003901686519384384, "learning_rate": 2.3079499194483658e-06, "loss": 0.0009, "step": 382750 }, { "epoch": 45.16281264747522, "grad_norm": 0.0006473986431956291, "learning_rate": 2.3023745791850627e-06, "loss": 0.0005, "step": 382800 }, { "epoch": 45.168711656441715, "grad_norm": 0.03272537514567375, "learning_rate": 2.296805822568959e-06, "loss": 0.0007, "step": 382850 }, { "epoch": 45.17461066540821, "grad_norm": 0.002429455751553178, "learning_rate": 2.2912436503687184e-06, "loss": 0.0003, "step": 382900 }, { "epoch": 45.180509674374704, "grad_norm": 0.001566107734106481, "learning_rate": 2.285688063352065e-06, "loss": 0.0005, "step": 382950 }, { "epoch": 45.1864086833412, "grad_norm": 0.1672799438238144, "learning_rate": 2.2801390622858355e-06, "loss": 0.0004, "step": 383000 }, { "epoch": 45.1864086833412, "eval_cer": 0.08347676419965576, "eval_loss": 9.23091865843162e-05, "eval_runtime": 2.0509, "eval_samples_per_second": 48.759, "eval_steps_per_second": 1.95, "eval_wer": 0.26, "step": 383000 }, { "epoch": 45.19230769230769, "grad_norm": 0.00039820020901970565, "learning_rate": 2.2745966479359557e-06, "loss": 0.0011, "step": 383050 }, { "epoch": 45.19820670127419, "grad_norm": 0.2578714191913605, "learning_rate": 2.269060821067437e-06, "loss": 0.0005, "step": 383100 }, { "epoch": 45.20410571024068, "grad_norm": 0.015037360601127148, "learning_rate": 2.2635315824443726e-06, "loss": 0.0007, "step": 383150 }, { "epoch": 45.21000471920717, "grad_norm": 0.00022892783454153687, "learning_rate": 2.2580089328299746e-06, "loss": 0.0006, "step": 383200 }, { "epoch": 45.215903728173664, "grad_norm": 0.4172196388244629, "learning_rate": 2.252492872986517e-06, "loss": 0.001, "step": 383250 }, { "epoch": 45.22180273714016, "grad_norm": 5.043917553848587e-05, "learning_rate": 2.246983403675379e-06, "loss": 0.0008, "step": 383300 }, { "epoch": 45.22770174610665, "grad_norm": 0.0029292788822203875, "learning_rate": 2.241480525657014e-06, "loss": 0.0005, "step": 383350 }, { "epoch": 45.23360075507315, "grad_norm": 0.045443419367074966, "learning_rate": 2.2359842396909967e-06, "loss": 0.0007, "step": 383400 }, { "epoch": 45.23949976403964, "grad_norm": 0.00044381944462656975, "learning_rate": 2.230494546535966e-06, "loss": 0.0005, "step": 383450 }, { "epoch": 45.24539877300614, "grad_norm": 0.12078763544559479, "learning_rate": 2.2250114469496596e-06, "loss": 0.0005, "step": 383500 }, { "epoch": 45.25129778197263, "grad_norm": 8.461080142296851e-05, "learning_rate": 2.2195349416888945e-06, "loss": 0.0005, "step": 383550 }, { "epoch": 45.25719679093912, "grad_norm": 0.0020661952439695597, "learning_rate": 2.2140650315095932e-06, "loss": 0.0009, "step": 383600 }, { "epoch": 45.263095799905614, "grad_norm": 0.00016400762251578271, "learning_rate": 2.2086017171667584e-06, "loss": 0.0008, "step": 383650 }, { "epoch": 45.26899480887211, "grad_norm": 0.01529918983578682, "learning_rate": 2.203144999414486e-06, "loss": 0.0006, "step": 383700 }, { "epoch": 45.2748938178386, "grad_norm": 0.0016944087110459805, "learning_rate": 2.1976948790059626e-06, "loss": 0.0007, "step": 383750 }, { "epoch": 45.2807928268051, "grad_norm": 0.00011913665366591886, "learning_rate": 2.192251356693459e-06, "loss": 0.0006, "step": 383800 }, { "epoch": 45.28669183577159, "grad_norm": 0.11617850512266159, "learning_rate": 2.1868144332283403e-06, "loss": 0.0006, "step": 383850 }, { "epoch": 45.292590844738086, "grad_norm": 0.001698676380328834, "learning_rate": 2.181384109361051e-06, "loss": 0.0002, "step": 383900 }, { "epoch": 45.29848985370458, "grad_norm": 0.0033664428628981113, "learning_rate": 2.1759603858411416e-06, "loss": 0.001, "step": 383950 }, { "epoch": 45.30438886267107, "grad_norm": 0.0036396540235728025, "learning_rate": 2.170543263417246e-06, "loss": 0.0004, "step": 384000 }, { "epoch": 45.30438886267107, "eval_cer": 0.08347676419965576, "eval_loss": 9.552405390422791e-05, "eval_runtime": 2.0988, "eval_samples_per_second": 47.646, "eval_steps_per_second": 1.906, "eval_wer": 0.26, "step": 384000 }, { "epoch": 45.31028787163756, "grad_norm": 0.005042297299951315, "learning_rate": 2.165132742837067e-06, "loss": 0.0005, "step": 384050 }, { "epoch": 45.31618688060406, "grad_norm": 0.08063346147537231, "learning_rate": 2.159728824847429e-06, "loss": 0.0004, "step": 384100 }, { "epoch": 45.32208588957055, "grad_norm": 0.0037951935082674026, "learning_rate": 2.1543315101942183e-06, "loss": 0.0007, "step": 384150 }, { "epoch": 45.327984898537046, "grad_norm": 0.18736542761325836, "learning_rate": 2.1489407996224288e-06, "loss": 0.0005, "step": 384200 }, { "epoch": 45.33388390750354, "grad_norm": 0.04190235584974289, "learning_rate": 2.1435566938761197e-06, "loss": 0.0008, "step": 384250 }, { "epoch": 45.339782916470035, "grad_norm": 0.008869574405252934, "learning_rate": 2.1381791936984697e-06, "loss": 0.0007, "step": 384300 }, { "epoch": 45.34568192543653, "grad_norm": 0.20369355380535126, "learning_rate": 2.132808299831712e-06, "loss": 0.0007, "step": 384350 }, { "epoch": 45.35158093440302, "grad_norm": 0.06789351254701614, "learning_rate": 2.127444013017199e-06, "loss": 0.0007, "step": 384400 }, { "epoch": 45.35747994336951, "grad_norm": 0.0754002034664154, "learning_rate": 2.122086333995349e-06, "loss": 0.0009, "step": 384450 }, { "epoch": 45.36337895233601, "grad_norm": 0.003770796349272132, "learning_rate": 2.1167352635056703e-06, "loss": 0.0003, "step": 384500 }, { "epoch": 45.3692779613025, "grad_norm": 0.0017412127926945686, "learning_rate": 2.1113908022867834e-06, "loss": 0.001, "step": 384550 }, { "epoch": 45.375176970268996, "grad_norm": 0.010832671076059341, "learning_rate": 2.106052951076365e-06, "loss": 0.0009, "step": 384600 }, { "epoch": 45.38107597923549, "grad_norm": 0.0008429831359535456, "learning_rate": 2.100721710611192e-06, "loss": 0.0005, "step": 384650 }, { "epoch": 45.386974988201985, "grad_norm": 0.004247075412422419, "learning_rate": 2.0953970816271375e-06, "loss": 0.0008, "step": 384700 }, { "epoch": 45.39287399716848, "grad_norm": 0.19710196554660797, "learning_rate": 2.0900790648591463e-06, "loss": 0.0007, "step": 384750 }, { "epoch": 45.39877300613497, "grad_norm": 0.009056998416781425, "learning_rate": 2.0847676610412592e-06, "loss": 0.0007, "step": 384800 }, { "epoch": 45.40467201510146, "grad_norm": 0.1229257583618164, "learning_rate": 2.079462870906607e-06, "loss": 0.0008, "step": 384850 }, { "epoch": 45.410571024067956, "grad_norm": 0.0003162536886520684, "learning_rate": 2.074164695187397e-06, "loss": 0.0007, "step": 384900 }, { "epoch": 45.41647003303445, "grad_norm": 7.190783799160272e-05, "learning_rate": 2.068873134614935e-06, "loss": 0.0005, "step": 384950 }, { "epoch": 45.422369042000945, "grad_norm": 0.13578251004219055, "learning_rate": 2.0635881899195964e-06, "loss": 0.0008, "step": 385000 }, { "epoch": 45.422369042000945, "eval_cer": 0.08347676419965576, "eval_loss": 9.772593330126256e-05, "eval_runtime": 2.1233, "eval_samples_per_second": 47.096, "eval_steps_per_second": 1.884, "eval_wer": 0.26, "step": 385000 }, { "epoch": 45.42826805096744, "grad_norm": 0.005674295127391815, "learning_rate": 2.0583098618308817e-06, "loss": 0.0004, "step": 385050 }, { "epoch": 45.434167059933934, "grad_norm": 0.1725594848394394, "learning_rate": 2.0530381510773355e-06, "loss": 0.001, "step": 385100 }, { "epoch": 45.44006606890042, "grad_norm": 0.1460387259721756, "learning_rate": 2.047773058386604e-06, "loss": 0.0012, "step": 385150 }, { "epoch": 45.445965077866916, "grad_norm": 0.000925138418097049, "learning_rate": 2.0425145844854277e-06, "loss": 0.0006, "step": 385200 }, { "epoch": 45.45186408683341, "grad_norm": 0.03128691017627716, "learning_rate": 2.0372627300996205e-06, "loss": 0.0006, "step": 385250 }, { "epoch": 45.457763095799905, "grad_norm": 0.0021764931734651327, "learning_rate": 2.032017495954103e-06, "loss": 0.0008, "step": 385300 }, { "epoch": 45.4636621047664, "grad_norm": 0.0009663484524935484, "learning_rate": 2.0267788827728562e-06, "loss": 0.0005, "step": 385350 }, { "epoch": 45.469561113732894, "grad_norm": 0.017846213653683662, "learning_rate": 2.0215468912789694e-06, "loss": 0.0004, "step": 385400 }, { "epoch": 45.47546012269939, "grad_norm": 0.07489661872386932, "learning_rate": 2.0163215221945974e-06, "loss": 0.0009, "step": 385450 }, { "epoch": 45.48135913166588, "grad_norm": 0.11626558750867844, "learning_rate": 2.011102776241003e-06, "loss": 0.0005, "step": 385500 }, { "epoch": 45.48725814063237, "grad_norm": 0.0006657298654317856, "learning_rate": 2.0058906541385148e-06, "loss": 0.0003, "step": 385550 }, { "epoch": 45.493157149598865, "grad_norm": 0.0036484398879110813, "learning_rate": 2.0006851566065574e-06, "loss": 0.0007, "step": 385600 }, { "epoch": 45.49905615856536, "grad_norm": 0.0008816429181024432, "learning_rate": 1.9954862843636456e-06, "loss": 0.0009, "step": 385650 }, { "epoch": 45.504955167531854, "grad_norm": 0.006688814610242844, "learning_rate": 1.9902940381273662e-06, "loss": 0.001, "step": 385700 }, { "epoch": 45.51085417649835, "grad_norm": 0.0018368404125794768, "learning_rate": 1.985108418614401e-06, "loss": 0.0007, "step": 385750 }, { "epoch": 45.51675318546484, "grad_norm": 0.0044574555940926075, "learning_rate": 1.9799294265405164e-06, "loss": 0.0006, "step": 385800 }, { "epoch": 45.52265219443134, "grad_norm": 0.09442447125911713, "learning_rate": 1.974757062620558e-06, "loss": 0.0005, "step": 385850 }, { "epoch": 45.52855120339783, "grad_norm": 0.0011511550983414054, "learning_rate": 1.9695913275684706e-06, "loss": 0.0004, "step": 385900 }, { "epoch": 45.53445021236432, "grad_norm": 0.009877748787403107, "learning_rate": 1.9644322220972677e-06, "loss": 0.0005, "step": 385950 }, { "epoch": 45.540349221330814, "grad_norm": 0.013989625498652458, "learning_rate": 1.959279746919057e-06, "loss": 0.0009, "step": 386000 }, { "epoch": 45.540349221330814, "eval_cer": 0.08347676419965576, "eval_loss": 8.352837903657928e-05, "eval_runtime": 2.1038, "eval_samples_per_second": 47.532, "eval_steps_per_second": 1.901, "eval_wer": 0.26, "step": 386000 }, { "epoch": 45.54624823029731, "grad_norm": 0.18159092962741852, "learning_rate": 1.9541339027450256e-06, "loss": 0.0006, "step": 386050 }, { "epoch": 45.5521472392638, "grad_norm": 0.0122260432690382, "learning_rate": 1.9489946902854496e-06, "loss": 0.0008, "step": 386100 }, { "epoch": 45.5580462482303, "grad_norm": 0.0046470267698168755, "learning_rate": 1.943862110249689e-06, "loss": 0.0002, "step": 386150 }, { "epoch": 45.56394525719679, "grad_norm": 0.045718926936388016, "learning_rate": 1.9387361633461944e-06, "loss": 0.0008, "step": 386200 }, { "epoch": 45.56984426616329, "grad_norm": 0.011409120634198189, "learning_rate": 1.9336168502824825e-06, "loss": 0.0005, "step": 386250 }, { "epoch": 45.57574327512978, "grad_norm": 0.01023282390087843, "learning_rate": 1.928504171765172e-06, "loss": 0.0008, "step": 386300 }, { "epoch": 45.58164228409627, "grad_norm": 0.0004954442265443504, "learning_rate": 1.923398128499959e-06, "loss": 0.0005, "step": 386350 }, { "epoch": 45.58754129306276, "grad_norm": 0.0879334807395935, "learning_rate": 1.918298721191625e-06, "loss": 0.0005, "step": 386400 }, { "epoch": 45.59344030202926, "grad_norm": 0.011669166386127472, "learning_rate": 1.913205950544028e-06, "loss": 0.0008, "step": 386450 }, { "epoch": 45.59933931099575, "grad_norm": 0.006675116252154112, "learning_rate": 1.908119817260129e-06, "loss": 0.001, "step": 386500 }, { "epoch": 45.60523831996225, "grad_norm": 0.13258545100688934, "learning_rate": 1.90304032204196e-06, "loss": 0.0003, "step": 386550 }, { "epoch": 45.61113732892874, "grad_norm": 0.12161311507225037, "learning_rate": 1.8979674655906332e-06, "loss": 0.0006, "step": 386600 }, { "epoch": 45.617036337895236, "grad_norm": 0.08838234096765518, "learning_rate": 1.892901248606338e-06, "loss": 0.0004, "step": 386650 }, { "epoch": 45.62293534686173, "grad_norm": 0.09464873373508453, "learning_rate": 1.8878416717883819e-06, "loss": 0.001, "step": 386700 }, { "epoch": 45.62883435582822, "grad_norm": 0.0006735546630807221, "learning_rate": 1.8827887358351225e-06, "loss": 0.0004, "step": 386750 }, { "epoch": 45.63473336479471, "grad_norm": 0.02134411409497261, "learning_rate": 1.8777424414440025e-06, "loss": 0.0005, "step": 386800 }, { "epoch": 45.64063237376121, "grad_norm": 0.0022409935481846333, "learning_rate": 1.8727027893115645e-06, "loss": 0.0007, "step": 386850 }, { "epoch": 45.6465313827277, "grad_norm": 0.0021883887238800526, "learning_rate": 1.8676697801334242e-06, "loss": 0.0005, "step": 386900 }, { "epoch": 45.652430391694196, "grad_norm": 0.09758642315864563, "learning_rate": 1.8626434146042815e-06, "loss": 0.0004, "step": 386950 }, { "epoch": 45.65832940066069, "grad_norm": 0.0002843897382263094, "learning_rate": 1.8576236934179202e-06, "loss": 0.0003, "step": 387000 }, { "epoch": 45.65832940066069, "eval_cer": 0.08347676419965576, "eval_loss": 8.376423647860065e-05, "eval_runtime": 2.0627, "eval_samples_per_second": 48.48, "eval_steps_per_second": 1.939, "eval_wer": 0.26, "step": 387000 }, { "epoch": 45.664228409627185, "grad_norm": 0.0016739044804126024, "learning_rate": 1.8526106172672085e-06, "loss": 0.0007, "step": 387050 }, { "epoch": 45.67012741859368, "grad_norm": 0.0015523339388892055, "learning_rate": 1.8476041868440874e-06, "loss": 0.0004, "step": 387100 }, { "epoch": 45.67602642756017, "grad_norm": 0.07297275215387344, "learning_rate": 1.8426044028395929e-06, "loss": 0.0008, "step": 387150 }, { "epoch": 45.68192543652666, "grad_norm": 0.0015619603218510747, "learning_rate": 1.8376112659438394e-06, "loss": 0.0005, "step": 387200 }, { "epoch": 45.687824445493156, "grad_norm": 0.0024305623956024647, "learning_rate": 1.832624776846026e-06, "loss": 0.0009, "step": 387250 }, { "epoch": 45.69372345445965, "grad_norm": 0.014788770116865635, "learning_rate": 1.827644936234435e-06, "loss": 0.0009, "step": 387300 }, { "epoch": 45.699622463426145, "grad_norm": 0.0012181248748674989, "learning_rate": 1.822671744796417e-06, "loss": 0.0009, "step": 387350 }, { "epoch": 45.70552147239264, "grad_norm": 0.00910109467804432, "learning_rate": 1.8177052032184283e-06, "loss": 0.0004, "step": 387400 }, { "epoch": 45.711420481359134, "grad_norm": 0.00011454449850134552, "learning_rate": 1.8127453121859871e-06, "loss": 0.0006, "step": 387450 }, { "epoch": 45.71731949032562, "grad_norm": 0.000179273120011203, "learning_rate": 1.8077920723837016e-06, "loss": 0.0003, "step": 387500 }, { "epoch": 45.723218499292116, "grad_norm": 0.10524773597717285, "learning_rate": 1.8028454844952635e-06, "loss": 0.0004, "step": 387550 }, { "epoch": 45.72911750825861, "grad_norm": 0.045051541179418564, "learning_rate": 1.7979055492034436e-06, "loss": 0.0008, "step": 387600 }, { "epoch": 45.735016517225105, "grad_norm": 0.0016303882002830505, "learning_rate": 1.7929722671900961e-06, "loss": 0.0007, "step": 387650 }, { "epoch": 45.7409155261916, "grad_norm": 0.0022094431333243847, "learning_rate": 1.78804563913616e-06, "loss": 0.0006, "step": 387700 }, { "epoch": 45.746814535158094, "grad_norm": 0.11834200471639633, "learning_rate": 1.783125665721641e-06, "loss": 0.0005, "step": 387750 }, { "epoch": 45.75271354412459, "grad_norm": 0.13334111869335175, "learning_rate": 1.7782123476256407e-06, "loss": 0.0008, "step": 387800 }, { "epoch": 45.75861255309108, "grad_norm": 0.28246229887008667, "learning_rate": 1.7733056855263497e-06, "loss": 0.0008, "step": 387850 }, { "epoch": 45.76451156205757, "grad_norm": 0.030528558418154716, "learning_rate": 1.7684056801010207e-06, "loss": 0.0006, "step": 387900 }, { "epoch": 45.770410571024065, "grad_norm": 0.00018503212777432054, "learning_rate": 1.763512332025996e-06, "loss": 0.0008, "step": 387950 }, { "epoch": 45.77630957999056, "grad_norm": 0.0013249729527160525, "learning_rate": 1.7586256419766967e-06, "loss": 0.0004, "step": 388000 }, { "epoch": 45.77630957999056, "eval_cer": 0.08347676419965576, "eval_loss": 7.873445429140702e-05, "eval_runtime": 2.0474, "eval_samples_per_second": 48.842, "eval_steps_per_second": 1.954, "eval_wer": 0.26, "step": 388000 }, { "epoch": 45.782208588957054, "grad_norm": 0.00011604304745560512, "learning_rate": 1.7537456106276329e-06, "loss": 0.0004, "step": 388050 }, { "epoch": 45.78810759792355, "grad_norm": 0.018161877989768982, "learning_rate": 1.7488722386523826e-06, "loss": 0.0006, "step": 388100 }, { "epoch": 45.79400660689004, "grad_norm": 0.0075698187574744225, "learning_rate": 1.7440055267236133e-06, "loss": 0.0005, "step": 388150 }, { "epoch": 45.79990561585654, "grad_norm": 0.011884881183505058, "learning_rate": 1.7391454755130766e-06, "loss": 0.0007, "step": 388200 }, { "epoch": 45.80580462482303, "grad_norm": 0.05312211811542511, "learning_rate": 1.7342920856915967e-06, "loss": 0.0004, "step": 388250 }, { "epoch": 45.81170363378952, "grad_norm": 0.0015120262978598475, "learning_rate": 1.7294453579290715e-06, "loss": 0.0004, "step": 388300 }, { "epoch": 45.817602642756015, "grad_norm": 0.17148366570472717, "learning_rate": 1.7246052928945044e-06, "loss": 0.0006, "step": 388350 }, { "epoch": 45.82350165172251, "grad_norm": 0.019620563834905624, "learning_rate": 1.7197718912559558e-06, "loss": 0.0006, "step": 388400 }, { "epoch": 45.829400660689004, "grad_norm": 0.026161201298236847, "learning_rate": 1.7149451536805805e-06, "loss": 0.0004, "step": 388450 }, { "epoch": 45.8352996696555, "grad_norm": 0.0060072122141718864, "learning_rate": 1.7101250808346013e-06, "loss": 0.0006, "step": 388500 }, { "epoch": 45.84119867862199, "grad_norm": 0.0009958007140085101, "learning_rate": 1.7053116733833253e-06, "loss": 0.0009, "step": 388550 }, { "epoch": 45.84709768758849, "grad_norm": 0.007259554695338011, "learning_rate": 1.700504931991148e-06, "loss": 0.0007, "step": 388600 }, { "epoch": 45.85299669655498, "grad_norm": 0.005232810042798519, "learning_rate": 1.6957048573215395e-06, "loss": 0.001, "step": 388650 }, { "epoch": 45.85889570552147, "grad_norm": 0.002788910875096917, "learning_rate": 1.6909114500370416e-06, "loss": 0.0005, "step": 388700 }, { "epoch": 45.864794714487964, "grad_norm": 0.07412877678871155, "learning_rate": 1.6861247107992862e-06, "loss": 0.0008, "step": 388750 }, { "epoch": 45.87069372345446, "grad_norm": 1.3181828260421753, "learning_rate": 1.6813446402689781e-06, "loss": 0.0005, "step": 388800 }, { "epoch": 45.87659273242095, "grad_norm": 0.2237810343503952, "learning_rate": 1.6765712391059119e-06, "loss": 0.0004, "step": 388850 }, { "epoch": 45.88249174138745, "grad_norm": 0.046479128301143646, "learning_rate": 1.6718045079689493e-06, "loss": 0.0012, "step": 388900 }, { "epoch": 45.88839075035394, "grad_norm": 0.012683531269431114, "learning_rate": 1.6670444475160363e-06, "loss": 0.0012, "step": 388950 }, { "epoch": 45.894289759320436, "grad_norm": 0.14465802907943726, "learning_rate": 1.6622910584041973e-06, "loss": 0.0006, "step": 389000 }, { "epoch": 45.894289759320436, "eval_cer": 0.08347676419965576, "eval_loss": 7.165929855545983e-05, "eval_runtime": 2.1118, "eval_samples_per_second": 47.353, "eval_steps_per_second": 1.894, "eval_wer": 0.26, "step": 389000 }, { "epoch": 45.90018876828693, "grad_norm": 0.015005979686975479, "learning_rate": 1.6575443412895409e-06, "loss": 0.0006, "step": 389050 }, { "epoch": 45.90608777725342, "grad_norm": 0.14954079687595367, "learning_rate": 1.6528042968272539e-06, "loss": 0.0006, "step": 389100 }, { "epoch": 45.91198678621991, "grad_norm": 0.04655086249113083, "learning_rate": 1.6480709256715798e-06, "loss": 0.0004, "step": 389150 }, { "epoch": 45.91788579518641, "grad_norm": 0.024312524124979973, "learning_rate": 1.6433442284758904e-06, "loss": 0.0014, "step": 389200 }, { "epoch": 45.9237848041529, "grad_norm": 0.13477455079555511, "learning_rate": 1.638624205892586e-06, "loss": 0.0007, "step": 389250 }, { "epoch": 45.9296838131194, "grad_norm": 0.0026848679408431053, "learning_rate": 1.6339108585731676e-06, "loss": 0.0004, "step": 389300 }, { "epoch": 45.93558282208589, "grad_norm": 0.011975702829658985, "learning_rate": 1.6292041871682206e-06, "loss": 0.0011, "step": 389350 }, { "epoch": 45.941481831052386, "grad_norm": 0.0022704650182276964, "learning_rate": 1.6245041923273918e-06, "loss": 0.0008, "step": 389400 }, { "epoch": 45.94738084001888, "grad_norm": 0.0005291531560942531, "learning_rate": 1.6198108746994235e-06, "loss": 0.0007, "step": 389450 }, { "epoch": 45.95327984898537, "grad_norm": 0.18324129283428192, "learning_rate": 1.6151242349321249e-06, "loss": 0.0006, "step": 389500 }, { "epoch": 45.95917885795186, "grad_norm": 0.07395844906568527, "learning_rate": 1.61044427367239e-06, "loss": 0.0006, "step": 389550 }, { "epoch": 45.96507786691836, "grad_norm": 0.0009711016900837421, "learning_rate": 1.6057709915661857e-06, "loss": 0.0008, "step": 389600 }, { "epoch": 45.97097687588485, "grad_norm": 0.0016047768294811249, "learning_rate": 1.601104389258562e-06, "loss": 0.0004, "step": 389650 }, { "epoch": 45.976875884851346, "grad_norm": 0.06194920837879181, "learning_rate": 1.596444467393643e-06, "loss": 0.0002, "step": 389700 }, { "epoch": 45.98277489381784, "grad_norm": 0.00598002877086401, "learning_rate": 1.5917912266146307e-06, "loss": 0.0003, "step": 389750 }, { "epoch": 45.988673902784335, "grad_norm": 0.001485988381318748, "learning_rate": 1.5871446675638058e-06, "loss": 0.0008, "step": 389800 }, { "epoch": 45.99457291175083, "grad_norm": 0.005398478824645281, "learning_rate": 1.5825047908825386e-06, "loss": 0.0005, "step": 389850 }, { "epoch": 46.00047192071732, "grad_norm": 0.0006816668901592493, "learning_rate": 1.5778715972112502e-06, "loss": 0.0004, "step": 389900 }, { "epoch": 46.00637092968381, "grad_norm": 0.05094901844859123, "learning_rate": 1.5732450871894567e-06, "loss": 0.0007, "step": 389950 }, { "epoch": 46.012269938650306, "grad_norm": 0.0002615033008623868, "learning_rate": 1.568625261455764e-06, "loss": 0.0006, "step": 390000 }, { "epoch": 46.012269938650306, "eval_cer": 0.08347676419965576, "eval_loss": 7.06866048858501e-05, "eval_runtime": 2.0511, "eval_samples_per_second": 48.755, "eval_steps_per_second": 1.95, "eval_wer": 0.26, "step": 390000 }, { "epoch": 46.0181689476168, "grad_norm": 0.0001900464849313721, "learning_rate": 1.5640121206478342e-06, "loss": 0.0009, "step": 390050 }, { "epoch": 46.024067956583295, "grad_norm": 0.015352548100054264, "learning_rate": 1.5594056654024082e-06, "loss": 0.0007, "step": 390100 }, { "epoch": 46.02996696554979, "grad_norm": 0.022068530321121216, "learning_rate": 1.5548058963553103e-06, "loss": 0.0007, "step": 390150 }, { "epoch": 46.035865974516284, "grad_norm": 0.00021673388255294412, "learning_rate": 1.5502128141414495e-06, "loss": 0.0007, "step": 390200 }, { "epoch": 46.04176498348277, "grad_norm": 0.008888046257197857, "learning_rate": 1.5456264193947966e-06, "loss": 0.0008, "step": 390250 }, { "epoch": 46.047663992449266, "grad_norm": 0.06052935868501663, "learning_rate": 1.5410467127484063e-06, "loss": 0.0006, "step": 390300 }, { "epoch": 46.05356300141576, "grad_norm": 0.0030067008920013905, "learning_rate": 1.5364736948344116e-06, "loss": 0.0006, "step": 390350 }, { "epoch": 46.059462010382255, "grad_norm": 0.06808806210756302, "learning_rate": 1.531907366284019e-06, "loss": 0.0005, "step": 390400 }, { "epoch": 46.06536101934875, "grad_norm": 0.295801043510437, "learning_rate": 1.5273477277275183e-06, "loss": 0.0007, "step": 390450 }, { "epoch": 46.071260028315244, "grad_norm": 0.11741465330123901, "learning_rate": 1.522794779794262e-06, "loss": 0.0007, "step": 390500 }, { "epoch": 46.07715903728174, "grad_norm": 0.0008174305548891425, "learning_rate": 1.5182485231126976e-06, "loss": 0.0007, "step": 390550 }, { "epoch": 46.08305804624823, "grad_norm": 0.001445303438231349, "learning_rate": 1.5137089583103392e-06, "loss": 0.0014, "step": 390600 }, { "epoch": 46.08895705521472, "grad_norm": 0.00040536493179388344, "learning_rate": 1.5091760860137748e-06, "loss": 0.0006, "step": 390650 }, { "epoch": 46.094856064181215, "grad_norm": 0.015994533896446228, "learning_rate": 1.5046499068486653e-06, "loss": 0.0006, "step": 390700 }, { "epoch": 46.10075507314771, "grad_norm": 0.039069194346666336, "learning_rate": 1.500130421439766e-06, "loss": 0.0002, "step": 390750 }, { "epoch": 46.106654082114204, "grad_norm": 0.05621403455734253, "learning_rate": 1.4956176304108893e-06, "loss": 0.0003, "step": 390800 }, { "epoch": 46.1125530910807, "grad_norm": 0.002774950349703431, "learning_rate": 1.4911115343849313e-06, "loss": 0.0004, "step": 390850 }, { "epoch": 46.11845210004719, "grad_norm": 0.00010172995825996622, "learning_rate": 1.4866121339838613e-06, "loss": 0.0006, "step": 390900 }, { "epoch": 46.12435110901369, "grad_norm": 0.006889644544571638, "learning_rate": 1.4821194298287267e-06, "loss": 0.0007, "step": 390950 }, { "epoch": 46.13025011798018, "grad_norm": 0.0011137161636725068, "learning_rate": 1.4776334225396482e-06, "loss": 0.0008, "step": 391000 }, { "epoch": 46.13025011798018, "eval_cer": 0.08347676419965576, "eval_loss": 7.111430750228465e-05, "eval_runtime": 2.0702, "eval_samples_per_second": 48.304, "eval_steps_per_second": 1.932, "eval_wer": 0.26, "step": 391000 }, { "epoch": 46.13614912694667, "grad_norm": 0.06319254636764526, "learning_rate": 1.473154112735836e-06, "loss": 0.0009, "step": 391050 }, { "epoch": 46.142048135913164, "grad_norm": 0.002480181399732828, "learning_rate": 1.4686815010355404e-06, "loss": 0.0008, "step": 391100 }, { "epoch": 46.14794714487966, "grad_norm": 0.022684216499328613, "learning_rate": 1.4642155880561393e-06, "loss": 0.0005, "step": 391150 }, { "epoch": 46.15384615384615, "grad_norm": 0.08549611270427704, "learning_rate": 1.4597563744140396e-06, "loss": 0.0005, "step": 391200 }, { "epoch": 46.15974516281265, "grad_norm": 0.042842939496040344, "learning_rate": 1.4553038607247437e-06, "loss": 0.0009, "step": 391250 }, { "epoch": 46.16564417177914, "grad_norm": 0.0001239198027178645, "learning_rate": 1.4508580476028322e-06, "loss": 0.0003, "step": 391300 }, { "epoch": 46.17154318074564, "grad_norm": 0.0121125727891922, "learning_rate": 1.4464189356619529e-06, "loss": 0.0008, "step": 391350 }, { "epoch": 46.17744218971213, "grad_norm": 0.003173399716615677, "learning_rate": 1.441986525514827e-06, "loss": 0.0007, "step": 391400 }, { "epoch": 46.18334119867862, "grad_norm": 0.13290593028068542, "learning_rate": 1.4375608177732535e-06, "loss": 0.0005, "step": 391450 }, { "epoch": 46.18924020764511, "grad_norm": 0.00872031506150961, "learning_rate": 1.4331418130481223e-06, "loss": 0.0007, "step": 391500 }, { "epoch": 46.19513921661161, "grad_norm": 0.0004921623622067273, "learning_rate": 1.428729511949367e-06, "loss": 0.0006, "step": 391550 }, { "epoch": 46.2010382255781, "grad_norm": 0.051685743033885956, "learning_rate": 1.424323915086012e-06, "loss": 0.0004, "step": 391600 }, { "epoch": 46.2069372345446, "grad_norm": 0.002509061712771654, "learning_rate": 1.4199250230661708e-06, "loss": 0.0003, "step": 391650 }, { "epoch": 46.21283624351109, "grad_norm": 0.138572558760643, "learning_rate": 1.4155328364970077e-06, "loss": 0.0005, "step": 391700 }, { "epoch": 46.218735252477586, "grad_norm": 0.055472634732723236, "learning_rate": 1.4111473559847765e-06, "loss": 0.0012, "step": 391750 }, { "epoch": 46.22463426144408, "grad_norm": 0.008775428868830204, "learning_rate": 1.4067685821347932e-06, "loss": 0.0008, "step": 391800 }, { "epoch": 46.23053327041057, "grad_norm": 0.0038905772380530834, "learning_rate": 1.4023965155514573e-06, "loss": 0.0003, "step": 391850 }, { "epoch": 46.23643227937706, "grad_norm": 0.0033210753463208675, "learning_rate": 1.398031156838242e-06, "loss": 0.0008, "step": 391900 }, { "epoch": 46.24233128834356, "grad_norm": 0.007711754646152258, "learning_rate": 1.393672506597693e-06, "loss": 0.001, "step": 391950 }, { "epoch": 46.24823029731005, "grad_norm": 0.0014485782012343407, "learning_rate": 1.389320565431429e-06, "loss": 0.0009, "step": 392000 }, { "epoch": 46.24823029731005, "eval_cer": 0.08347676419965576, "eval_loss": 7.799053855706006e-05, "eval_runtime": 2.1195, "eval_samples_per_second": 47.181, "eval_steps_per_second": 1.887, "eval_wer": 0.26, "step": 392000 }, { "epoch": 46.254129306276546, "grad_norm": 0.013108226470649242, "learning_rate": 1.3849753339401416e-06, "loss": 0.001, "step": 392050 }, { "epoch": 46.26002831524304, "grad_norm": 0.10880839824676514, "learning_rate": 1.3806368127236013e-06, "loss": 0.0007, "step": 392100 }, { "epoch": 46.265927324209535, "grad_norm": 0.0001872375578386709, "learning_rate": 1.37630500238064e-06, "loss": 0.0007, "step": 392150 }, { "epoch": 46.27182633317603, "grad_norm": 0.0007025608792901039, "learning_rate": 1.3719799035091852e-06, "loss": 0.0004, "step": 392200 }, { "epoch": 46.27772534214252, "grad_norm": 0.06750424951314926, "learning_rate": 1.3676615167062146e-06, "loss": 0.0006, "step": 392250 }, { "epoch": 46.28362435110901, "grad_norm": 0.0005307629471644759, "learning_rate": 1.3633498425678014e-06, "loss": 0.0007, "step": 392300 }, { "epoch": 46.289523360075506, "grad_norm": 0.15668360888957977, "learning_rate": 1.3590448816890644e-06, "loss": 0.0004, "step": 392350 }, { "epoch": 46.295422369042, "grad_norm": 0.07677529752254486, "learning_rate": 1.3547466346642279e-06, "loss": 0.0007, "step": 392400 }, { "epoch": 46.301321378008495, "grad_norm": 0.04374021291732788, "learning_rate": 1.350455102086562e-06, "loss": 0.0005, "step": 392450 }, { "epoch": 46.30722038697499, "grad_norm": 0.00016423314809799194, "learning_rate": 1.346170284548437e-06, "loss": 0.0001, "step": 392500 }, { "epoch": 46.313119395941484, "grad_norm": 2.191722705902066e-05, "learning_rate": 1.3418921826412633e-06, "loss": 0.0008, "step": 392550 }, { "epoch": 46.31901840490798, "grad_norm": 0.017242446541786194, "learning_rate": 1.3376207969555577e-06, "loss": 0.0009, "step": 392600 }, { "epoch": 46.324917413874466, "grad_norm": 0.02739832177758217, "learning_rate": 1.333356128080887e-06, "loss": 0.0006, "step": 392650 }, { "epoch": 46.33081642284096, "grad_norm": 0.0025929943658411503, "learning_rate": 1.3290981766058973e-06, "loss": 0.0002, "step": 392700 }, { "epoch": 46.336715431807455, "grad_norm": 0.0008913087076507509, "learning_rate": 1.324846943118313e-06, "loss": 0.0003, "step": 392750 }, { "epoch": 46.34261444077395, "grad_norm": 0.0018385617295280099, "learning_rate": 1.3206024282049202e-06, "loss": 0.0009, "step": 392800 }, { "epoch": 46.348513449740445, "grad_norm": 0.0006433789385482669, "learning_rate": 1.3163646324515889e-06, "loss": 0.0002, "step": 392850 }, { "epoch": 46.35441245870694, "grad_norm": 0.04524719715118408, "learning_rate": 1.3121335564432624e-06, "loss": 0.0008, "step": 392900 }, { "epoch": 46.360311467673434, "grad_norm": 0.001203825231641531, "learning_rate": 1.3079092007639404e-06, "loss": 0.0004, "step": 392950 }, { "epoch": 46.36621047663992, "grad_norm": 0.038983408361673355, "learning_rate": 1.303691565996712e-06, "loss": 0.0008, "step": 393000 }, { "epoch": 46.36621047663992, "eval_cer": 0.08347676419965576, "eval_loss": 7.83731447882019e-05, "eval_runtime": 2.094, "eval_samples_per_second": 47.755, "eval_steps_per_second": 1.91, "eval_wer": 0.26, "step": 393000 }, { "epoch": 46.372109485606416, "grad_norm": 0.3528129458427429, "learning_rate": 1.299480652723728e-06, "loss": 0.0005, "step": 393050 }, { "epoch": 46.37800849457291, "grad_norm": 0.07956081628799438, "learning_rate": 1.2952764615262236e-06, "loss": 0.0008, "step": 393100 }, { "epoch": 46.383907503539405, "grad_norm": 0.014481359161436558, "learning_rate": 1.2910789929844958e-06, "loss": 0.0005, "step": 393150 }, { "epoch": 46.3898065125059, "grad_norm": 0.006478253286331892, "learning_rate": 1.2868882476779088e-06, "loss": 0.0007, "step": 393200 }, { "epoch": 46.395705521472394, "grad_norm": 0.22849948704242706, "learning_rate": 1.282704226184911e-06, "loss": 0.0009, "step": 393250 }, { "epoch": 46.40160453043889, "grad_norm": 0.0006055666599422693, "learning_rate": 1.2785269290830237e-06, "loss": 0.0005, "step": 393300 }, { "epoch": 46.40750353940538, "grad_norm": 0.0003715317288879305, "learning_rate": 1.2743563569488248e-06, "loss": 0.0008, "step": 393350 }, { "epoch": 46.41340254837187, "grad_norm": 0.004104341845959425, "learning_rate": 1.2701925103579814e-06, "loss": 0.0008, "step": 393400 }, { "epoch": 46.419301557338365, "grad_norm": 0.003010945627465844, "learning_rate": 1.2660353898852229e-06, "loss": 0.0007, "step": 393450 }, { "epoch": 46.42520056630486, "grad_norm": 0.2242160588502884, "learning_rate": 1.2618849961043455e-06, "loss": 0.0005, "step": 393500 }, { "epoch": 46.431099575271354, "grad_norm": 0.001329541439190507, "learning_rate": 1.25774132958823e-06, "loss": 0.0006, "step": 393550 }, { "epoch": 46.43699858423785, "grad_norm": 0.004228598438203335, "learning_rate": 1.2536043909088191e-06, "loss": 0.0005, "step": 393600 }, { "epoch": 46.44289759320434, "grad_norm": 0.0011707848170772195, "learning_rate": 1.2494741806371335e-06, "loss": 0.0011, "step": 393650 }, { "epoch": 46.44879660217084, "grad_norm": 0.0012706458801403642, "learning_rate": 1.2453506993432507e-06, "loss": 0.0006, "step": 393700 }, { "epoch": 46.45469561113733, "grad_norm": 0.022395219653844833, "learning_rate": 1.241233947596343e-06, "loss": 0.0004, "step": 393750 }, { "epoch": 46.46059462010382, "grad_norm": 0.0021447886247187853, "learning_rate": 1.2371239259646228e-06, "loss": 0.0006, "step": 393800 }, { "epoch": 46.466493629070314, "grad_norm": 0.004747351631522179, "learning_rate": 1.233020635015414e-06, "loss": 0.0006, "step": 393850 }, { "epoch": 46.47239263803681, "grad_norm": 0.010122711770236492, "learning_rate": 1.2289240753150744e-06, "loss": 0.0007, "step": 393900 }, { "epoch": 46.4782916470033, "grad_norm": 0.07545987516641617, "learning_rate": 1.2248342474290575e-06, "loss": 0.0005, "step": 393950 }, { "epoch": 46.4841906559698, "grad_norm": 0.009103691205382347, "learning_rate": 1.2207511519218672e-06, "loss": 0.0005, "step": 394000 }, { "epoch": 46.4841906559698, "eval_cer": 0.08347676419965576, "eval_loss": 7.85375596024096e-05, "eval_runtime": 2.0831, "eval_samples_per_second": 48.004, "eval_steps_per_second": 1.92, "eval_wer": 0.26, "step": 394000 }, { "epoch": 46.49008966493629, "grad_norm": 0.011026897467672825, "learning_rate": 1.2166747893570918e-06, "loss": 0.0004, "step": 394050 }, { "epoch": 46.49598867390279, "grad_norm": 0.01984744518995285, "learning_rate": 1.2126051602973864e-06, "loss": 0.0006, "step": 394100 }, { "epoch": 46.50188768286928, "grad_norm": 0.013987681828439236, "learning_rate": 1.2085422653044798e-06, "loss": 0.0005, "step": 394150 }, { "epoch": 46.50778669183577, "grad_norm": 0.038661226630210876, "learning_rate": 1.2044861049391675e-06, "loss": 0.0009, "step": 394200 }, { "epoch": 46.51368570080226, "grad_norm": 0.0067283399403095245, "learning_rate": 1.2004366797613187e-06, "loss": 0.0004, "step": 394250 }, { "epoch": 46.51958470976876, "grad_norm": 0.022669371217489243, "learning_rate": 1.1963939903298637e-06, "loss": 0.0003, "step": 394300 }, { "epoch": 46.52548371873525, "grad_norm": 0.15252558887004852, "learning_rate": 1.1923580372028175e-06, "loss": 0.0006, "step": 394350 }, { "epoch": 46.53138272770175, "grad_norm": 0.3445530831813812, "learning_rate": 1.1883288209372511e-06, "loss": 0.0006, "step": 394400 }, { "epoch": 46.53728173666824, "grad_norm": 0.00471095833927393, "learning_rate": 1.1843063420893196e-06, "loss": 0.0004, "step": 394450 }, { "epoch": 46.543180745634736, "grad_norm": 0.06844998896121979, "learning_rate": 1.18029060121424e-06, "loss": 0.0007, "step": 394500 }, { "epoch": 46.54907975460123, "grad_norm": 0.0014950049808248878, "learning_rate": 1.1762815988662911e-06, "loss": 0.0006, "step": 394550 }, { "epoch": 46.55497876356772, "grad_norm": 0.010344707407057285, "learning_rate": 1.172279335598847e-06, "loss": 0.0006, "step": 394600 }, { "epoch": 46.56087777253421, "grad_norm": 0.0027624047361314297, "learning_rate": 1.1682838119643213e-06, "loss": 0.0004, "step": 394650 }, { "epoch": 46.56677678150071, "grad_norm": 0.0003670313744805753, "learning_rate": 1.1642950285142174e-06, "loss": 0.0007, "step": 394700 }, { "epoch": 46.5726757904672, "grad_norm": 0.3361494839191437, "learning_rate": 1.1603129857991001e-06, "loss": 0.0008, "step": 394750 }, { "epoch": 46.578574799433696, "grad_norm": 0.0010857325978577137, "learning_rate": 1.1563376843686137e-06, "loss": 0.0004, "step": 394800 }, { "epoch": 46.58447380840019, "grad_norm": 0.012506934814155102, "learning_rate": 1.152369124771452e-06, "loss": 0.001, "step": 394850 }, { "epoch": 46.590372817366685, "grad_norm": 0.05481722578406334, "learning_rate": 1.1484073075553936e-06, "loss": 0.0008, "step": 394900 }, { "epoch": 46.59627182633318, "grad_norm": 0.000964194186963141, "learning_rate": 1.1444522332672902e-06, "loss": 0.0007, "step": 394950 }, { "epoch": 46.60217083529967, "grad_norm": 0.1298053115606308, "learning_rate": 1.1405039024530552e-06, "loss": 0.0005, "step": 395000 }, { "epoch": 46.60217083529967, "eval_cer": 0.08347676419965576, "eval_loss": 8.243937918450683e-05, "eval_runtime": 2.1318, "eval_samples_per_second": 46.91, "eval_steps_per_second": 1.876, "eval_wer": 0.26, "step": 395000 }, { "epoch": 46.60806984426616, "grad_norm": 0.0005065826699137688, "learning_rate": 1.1365623156576688e-06, "loss": 0.0006, "step": 395050 }, { "epoch": 46.613968853232656, "grad_norm": 0.0005801382358185947, "learning_rate": 1.1326274734251796e-06, "loss": 0.0007, "step": 395100 }, { "epoch": 46.61986786219915, "grad_norm": 0.003915139473974705, "learning_rate": 1.1286993762987142e-06, "loss": 0.0007, "step": 395150 }, { "epoch": 46.625766871165645, "grad_norm": 0.017039868980646133, "learning_rate": 1.1247780248204665e-06, "loss": 0.0006, "step": 395200 }, { "epoch": 46.63166588013214, "grad_norm": 0.0013291159411892295, "learning_rate": 1.1208634195316924e-06, "loss": 0.0004, "step": 395250 }, { "epoch": 46.637564889098634, "grad_norm": 0.01110841054469347, "learning_rate": 1.1169555609727155e-06, "loss": 0.0006, "step": 395300 }, { "epoch": 46.64346389806512, "grad_norm": 0.000782645947765559, "learning_rate": 1.1130544496829432e-06, "loss": 0.0009, "step": 395350 }, { "epoch": 46.649362907031616, "grad_norm": 0.045940134674310684, "learning_rate": 1.1091600862008334e-06, "loss": 0.0012, "step": 395400 }, { "epoch": 46.65526191599811, "grad_norm": 0.012423469685018063, "learning_rate": 1.1052724710639283e-06, "loss": 0.0004, "step": 395450 }, { "epoch": 46.661160924964605, "grad_norm": 0.02332165464758873, "learning_rate": 1.1013916048088214e-06, "loss": 0.0008, "step": 395500 }, { "epoch": 46.6670599339311, "grad_norm": 0.04938233643770218, "learning_rate": 1.0975174879711835e-06, "loss": 0.0008, "step": 395550 }, { "epoch": 46.672958942897594, "grad_norm": 0.0001516212068963796, "learning_rate": 1.0936501210857653e-06, "loss": 0.0008, "step": 395600 }, { "epoch": 46.67885795186409, "grad_norm": 0.002234744606539607, "learning_rate": 1.089789504686367e-06, "loss": 0.0004, "step": 395650 }, { "epoch": 46.68475696083058, "grad_norm": 0.002521910471841693, "learning_rate": 1.0859356393058628e-06, "loss": 0.0007, "step": 395700 }, { "epoch": 46.69065596979707, "grad_norm": 0.13635483384132385, "learning_rate": 1.0820885254762049e-06, "loss": 0.0009, "step": 395750 }, { "epoch": 46.696554978763565, "grad_norm": 0.011489064432680607, "learning_rate": 1.0782481637284013e-06, "loss": 0.0004, "step": 395800 }, { "epoch": 46.70245398773006, "grad_norm": 5.18241613463033e-05, "learning_rate": 1.0744145545925289e-06, "loss": 0.0007, "step": 395850 }, { "epoch": 46.708352996696554, "grad_norm": 0.00012703514948952943, "learning_rate": 1.070587698597747e-06, "loss": 0.0006, "step": 395900 }, { "epoch": 46.71425200566305, "grad_norm": 0.012630414217710495, "learning_rate": 1.0667675962722668e-06, "loss": 0.0004, "step": 395950 }, { "epoch": 46.72015101462954, "grad_norm": 0.00028057023882865906, "learning_rate": 1.0629542481433663e-06, "loss": 0.0008, "step": 396000 }, { "epoch": 46.72015101462954, "eval_cer": 0.08347676419965576, "eval_loss": 8.55770631460473e-05, "eval_runtime": 2.0396, "eval_samples_per_second": 49.029, "eval_steps_per_second": 1.961, "eval_wer": 0.26, "step": 396000 }, { "epoch": 46.72605002359604, "grad_norm": 0.0122760571539402, "learning_rate": 1.059147654737408e-06, "loss": 0.0006, "step": 396050 }, { "epoch": 46.73194903256253, "grad_norm": 0.14457565546035767, "learning_rate": 1.0553478165797992e-06, "loss": 0.0008, "step": 396100 }, { "epoch": 46.73784804152902, "grad_norm": 0.08078669011592865, "learning_rate": 1.0515547341950426e-06, "loss": 0.001, "step": 396150 }, { "epoch": 46.743747050495514, "grad_norm": 0.016388561576604843, "learning_rate": 1.0477684081066752e-06, "loss": 0.0008, "step": 396200 }, { "epoch": 46.74964605946201, "grad_norm": 0.0900735855102539, "learning_rate": 1.0439888388373342e-06, "loss": 0.001, "step": 396250 }, { "epoch": 46.7555450684285, "grad_norm": 0.0007424736395478249, "learning_rate": 1.0402160269087024e-06, "loss": 0.0006, "step": 396300 }, { "epoch": 46.761444077395, "grad_norm": 0.0021981545723974705, "learning_rate": 1.0364499728415356e-06, "loss": 0.0004, "step": 396350 }, { "epoch": 46.76734308636149, "grad_norm": 0.0005304654478095472, "learning_rate": 1.0326906771556565e-06, "loss": 0.0004, "step": 396400 }, { "epoch": 46.77324209532799, "grad_norm": 0.00022860248282086104, "learning_rate": 1.0289381403699671e-06, "loss": 0.0006, "step": 396450 }, { "epoch": 46.77914110429448, "grad_norm": 0.02480207197368145, "learning_rate": 1.0251923630024141e-06, "loss": 0.0006, "step": 396500 }, { "epoch": 46.78504011326097, "grad_norm": 0.004287264309823513, "learning_rate": 1.0214533455700226e-06, "loss": 0.0006, "step": 396550 }, { "epoch": 46.79093912222746, "grad_norm": 0.205963596701622, "learning_rate": 1.017721088588891e-06, "loss": 0.0006, "step": 396600 }, { "epoch": 46.79683813119396, "grad_norm": 0.012891861610114574, "learning_rate": 1.0139955925741795e-06, "loss": 0.0007, "step": 396650 }, { "epoch": 46.80273714016045, "grad_norm": 0.0017631052760407329, "learning_rate": 1.01027685804011e-06, "loss": 0.0003, "step": 396700 }, { "epoch": 46.80863614912695, "grad_norm": 0.07544956356287003, "learning_rate": 1.0065648854999775e-06, "loss": 0.0013, "step": 396750 }, { "epoch": 46.81453515809344, "grad_norm": 0.00015506643103435636, "learning_rate": 1.0028596754661336e-06, "loss": 0.0008, "step": 396800 }, { "epoch": 46.820434167059936, "grad_norm": 0.004577050916850567, "learning_rate": 9.99161228450013e-07, "loss": 0.0004, "step": 396850 }, { "epoch": 46.82633317602643, "grad_norm": 0.001355480053462088, "learning_rate": 9.954695449621076e-07, "loss": 0.0007, "step": 396900 }, { "epoch": 46.83223218499292, "grad_norm": 0.0052574025467038155, "learning_rate": 9.91784625511971e-07, "loss": 0.0007, "step": 396950 }, { "epoch": 46.83813119395941, "grad_norm": 0.004470845218747854, "learning_rate": 9.881064706082299e-07, "loss": 0.0008, "step": 397000 }, { "epoch": 46.83813119395941, "eval_cer": 0.08347676419965576, "eval_loss": 8.851336315274239e-05, "eval_runtime": 2.1271, "eval_samples_per_second": 47.013, "eval_steps_per_second": 1.881, "eval_wer": 0.26, "step": 397000 }, { "epoch": 46.84403020292591, "grad_norm": 0.04701912775635719, "learning_rate": 9.844350807585778e-07, "loss": 0.0004, "step": 397050 }, { "epoch": 46.8499292118924, "grad_norm": 0.0001664694573264569, "learning_rate": 9.80770456469765e-07, "loss": 0.0008, "step": 397100 }, { "epoch": 46.855828220858896, "grad_norm": 0.000675744900945574, "learning_rate": 9.77112598247626e-07, "loss": 0.0004, "step": 397150 }, { "epoch": 46.86172722982539, "grad_norm": 0.08290919661521912, "learning_rate": 9.734615065970454e-07, "loss": 0.0008, "step": 397200 }, { "epoch": 46.867626238791885, "grad_norm": 0.001877761329524219, "learning_rate": 9.698171820219814e-07, "loss": 0.0005, "step": 397250 }, { "epoch": 46.87352524775838, "grad_norm": 0.0017517971573397517, "learning_rate": 9.66179625025454e-07, "loss": 0.0006, "step": 397300 }, { "epoch": 46.87942425672487, "grad_norm": 0.00782626774162054, "learning_rate": 9.625488361095502e-07, "loss": 0.0005, "step": 397350 }, { "epoch": 46.88532326569136, "grad_norm": 0.16137580573558807, "learning_rate": 9.58924815775425e-07, "loss": 0.0007, "step": 397400 }, { "epoch": 46.891222274657856, "grad_norm": 0.0018151520052924752, "learning_rate": 9.553075645232945e-07, "loss": 0.0006, "step": 397450 }, { "epoch": 46.89712128362435, "grad_norm": 0.00419260049238801, "learning_rate": 9.516970828524485e-07, "loss": 0.0004, "step": 397500 }, { "epoch": 46.903020292590845, "grad_norm": 0.06188889592885971, "learning_rate": 9.480933712612328e-07, "loss": 0.0005, "step": 397550 }, { "epoch": 46.90891930155734, "grad_norm": 0.0024251434952020645, "learning_rate": 9.444964302470716e-07, "loss": 0.0004, "step": 397600 }, { "epoch": 46.914818310523835, "grad_norm": 0.024205999448895454, "learning_rate": 9.409062603064345e-07, "loss": 0.0009, "step": 397650 }, { "epoch": 46.92071731949033, "grad_norm": 0.0035415722522884607, "learning_rate": 9.373228619348696e-07, "loss": 0.0004, "step": 397700 }, { "epoch": 46.92661632845682, "grad_norm": 0.1100892499089241, "learning_rate": 9.337462356270033e-07, "loss": 0.0002, "step": 397750 }, { "epoch": 46.93251533742331, "grad_norm": 0.008176487870514393, "learning_rate": 9.30176381876502e-07, "loss": 0.0004, "step": 397800 }, { "epoch": 46.938414346389806, "grad_norm": 0.00041167359449900687, "learning_rate": 9.266133011761102e-07, "loss": 0.0006, "step": 397850 }, { "epoch": 46.9443133553563, "grad_norm": 0.0733209103345871, "learning_rate": 9.230569940176348e-07, "loss": 0.0008, "step": 397900 }, { "epoch": 46.950212364322795, "grad_norm": 0.0023354601580649614, "learning_rate": 9.195074608919551e-07, "loss": 0.0011, "step": 397950 }, { "epoch": 46.95611137328929, "grad_norm": 0.0009104014025069773, "learning_rate": 9.159647022889961e-07, "loss": 0.0008, "step": 398000 }, { "epoch": 46.95611137328929, "eval_cer": 0.08347676419965576, "eval_loss": 8.493169298162684e-05, "eval_runtime": 2.0514, "eval_samples_per_second": 48.747, "eval_steps_per_second": 1.95, "eval_wer": 0.26, "step": 398000 }, { "epoch": 46.962010382255784, "grad_norm": 0.008425116539001465, "learning_rate": 9.12428718697772e-07, "loss": 0.0005, "step": 398050 }, { "epoch": 46.96790939122228, "grad_norm": 0.002881021471694112, "learning_rate": 9.08899510606348e-07, "loss": 0.0007, "step": 398100 }, { "epoch": 46.973808400188766, "grad_norm": 0.09984217584133148, "learning_rate": 9.053770785018568e-07, "loss": 0.0003, "step": 398150 }, { "epoch": 46.97970740915526, "grad_norm": 0.17311875522136688, "learning_rate": 9.018614228704925e-07, "loss": 0.0008, "step": 398200 }, { "epoch": 46.985606418121755, "grad_norm": 0.14129196107387543, "learning_rate": 8.983525441975116e-07, "loss": 0.0005, "step": 398250 }, { "epoch": 46.99150542708825, "grad_norm": 0.0005260613979771733, "learning_rate": 8.948504429672544e-07, "loss": 0.0006, "step": 398300 }, { "epoch": 46.997404436054744, "grad_norm": 0.0001089020588551648, "learning_rate": 8.913551196631009e-07, "loss": 0.0005, "step": 398350 }, { "epoch": 47.00330344502124, "grad_norm": 0.016055826097726822, "learning_rate": 8.878665747675153e-07, "loss": 0.0006, "step": 398400 }, { "epoch": 47.00920245398773, "grad_norm": 0.001078003435395658, "learning_rate": 8.843848087620066e-07, "loss": 0.0004, "step": 398450 }, { "epoch": 47.01510146295422, "grad_norm": 0.006222466938197613, "learning_rate": 8.809098221271683e-07, "loss": 0.0006, "step": 398500 }, { "epoch": 47.021000471920715, "grad_norm": 0.00021408234897535294, "learning_rate": 8.774416153426446e-07, "loss": 0.0004, "step": 398550 }, { "epoch": 47.02689948088721, "grad_norm": 0.00019881637126673013, "learning_rate": 8.739801888871469e-07, "loss": 0.0005, "step": 398600 }, { "epoch": 47.032798489853704, "grad_norm": 0.0010085158282890916, "learning_rate": 8.70525543238454e-07, "loss": 0.0006, "step": 398650 }, { "epoch": 47.0386974988202, "grad_norm": 0.0011127314064651728, "learning_rate": 8.670776788734125e-07, "loss": 0.0008, "step": 398700 }, { "epoch": 47.04459650778669, "grad_norm": 0.007537886966019869, "learning_rate": 8.636365962679138e-07, "loss": 0.0003, "step": 398750 }, { "epoch": 47.05049551675319, "grad_norm": 0.04997146874666214, "learning_rate": 8.602022958969336e-07, "loss": 0.0004, "step": 398800 }, { "epoch": 47.05639452571968, "grad_norm": 0.028280159458518028, "learning_rate": 8.567747782345037e-07, "loss": 0.0003, "step": 398850 }, { "epoch": 47.06229353468617, "grad_norm": 0.00010447719978401437, "learning_rate": 8.533540437537235e-07, "loss": 0.0008, "step": 398900 }, { "epoch": 47.068192543652664, "grad_norm": 0.0001140000531449914, "learning_rate": 8.499400929267487e-07, "loss": 0.0008, "step": 398950 }, { "epoch": 47.07409155261916, "grad_norm": 0.002449434483423829, "learning_rate": 8.465329262248078e-07, "loss": 0.0006, "step": 399000 }, { "epoch": 47.07409155261916, "eval_cer": 0.08347676419965576, "eval_loss": 8.271922706626356e-05, "eval_runtime": 2.0641, "eval_samples_per_second": 48.446, "eval_steps_per_second": 1.938, "eval_wer": 0.26, "step": 399000 }, { "epoch": 47.07999056158565, "grad_norm": 0.04780275374650955, "learning_rate": 8.431325441181748e-07, "loss": 0.0007, "step": 399050 }, { "epoch": 47.08588957055215, "grad_norm": 0.0018070247024297714, "learning_rate": 8.397389470762185e-07, "loss": 0.0011, "step": 399100 }, { "epoch": 47.09178857951864, "grad_norm": 0.1098192036151886, "learning_rate": 8.363521355673421e-07, "loss": 0.0007, "step": 399150 }, { "epoch": 47.09768758848514, "grad_norm": 0.01592187024652958, "learning_rate": 8.329721100590271e-07, "loss": 0.0007, "step": 399200 }, { "epoch": 47.10358659745163, "grad_norm": 0.0029478061478585005, "learning_rate": 8.295988710178115e-07, "loss": 0.0005, "step": 399250 }, { "epoch": 47.10948560641812, "grad_norm": 0.0018132416298612952, "learning_rate": 8.262324189093063e-07, "loss": 0.0007, "step": 399300 }, { "epoch": 47.11538461538461, "grad_norm": 0.00202446011826396, "learning_rate": 8.228727541981673e-07, "loss": 0.0004, "step": 399350 }, { "epoch": 47.12128362435111, "grad_norm": 0.01001717895269394, "learning_rate": 8.195198773481405e-07, "loss": 0.0002, "step": 399400 }, { "epoch": 47.1271826333176, "grad_norm": 0.07225947082042694, "learning_rate": 8.161737888220001e-07, "loss": 0.0006, "step": 399450 }, { "epoch": 47.1330816422841, "grad_norm": 0.021582763642072678, "learning_rate": 8.128344890816209e-07, "loss": 0.0008, "step": 399500 }, { "epoch": 47.13898065125059, "grad_norm": 0.16638065874576569, "learning_rate": 8.095019785879121e-07, "loss": 0.0004, "step": 399550 }, { "epoch": 47.144879660217086, "grad_norm": 0.036714524030685425, "learning_rate": 8.061762578008614e-07, "loss": 0.0007, "step": 399600 }, { "epoch": 47.15077866918358, "grad_norm": 0.00794762559235096, "learning_rate": 8.02857327179507e-07, "loss": 0.0004, "step": 399650 }, { "epoch": 47.15667767815007, "grad_norm": 0.12166692316532135, "learning_rate": 7.995451871819659e-07, "loss": 0.0007, "step": 399700 }, { "epoch": 47.16257668711656, "grad_norm": 0.0007882051868364215, "learning_rate": 7.962398382654002e-07, "loss": 0.0006, "step": 399750 }, { "epoch": 47.16847569608306, "grad_norm": 0.002018239349126816, "learning_rate": 7.92941280886056e-07, "loss": 0.0004, "step": 399800 }, { "epoch": 47.17437470504955, "grad_norm": 0.01089867576956749, "learning_rate": 7.896495154992134e-07, "loss": 0.0009, "step": 399850 }, { "epoch": 47.180273714016046, "grad_norm": 0.002059461083263159, "learning_rate": 7.863645425592425e-07, "loss": 0.0003, "step": 399900 }, { "epoch": 47.18617272298254, "grad_norm": 0.05442357063293457, "learning_rate": 7.830863625195584e-07, "loss": 0.0004, "step": 399950 }, { "epoch": 47.192071731949035, "grad_norm": 0.16573812067508698, "learning_rate": 7.79814975832649e-07, "loss": 0.0006, "step": 400000 }, { "epoch": 47.192071731949035, "eval_cer": 0.08347676419965576, "eval_loss": 8.440347301075235e-05, "eval_runtime": 2.0345, "eval_samples_per_second": 49.152, "eval_steps_per_second": 1.966, "eval_wer": 0.26, "step": 400000 }, { "epoch": 47.19797074091553, "grad_norm": 0.01065076980739832, "learning_rate": 7.765503829500586e-07, "loss": 0.0006, "step": 400050 }, { "epoch": 47.20386974988202, "grad_norm": 0.005790959112346172, "learning_rate": 7.732925843223937e-07, "loss": 0.0009, "step": 400100 }, { "epoch": 47.20976875884851, "grad_norm": 0.043339114636182785, "learning_rate": 7.700415803993221e-07, "loss": 0.0008, "step": 400150 }, { "epoch": 47.215667767815006, "grad_norm": 0.2635165750980377, "learning_rate": 7.667973716295851e-07, "loss": 0.0004, "step": 400200 }, { "epoch": 47.2215667767815, "grad_norm": 0.07121681421995163, "learning_rate": 7.635599584609632e-07, "loss": 0.0006, "step": 400250 }, { "epoch": 47.227465785747995, "grad_norm": 0.0006516047287732363, "learning_rate": 7.603293413403267e-07, "loss": 0.0007, "step": 400300 }, { "epoch": 47.23336479471449, "grad_norm": 0.0004581193206831813, "learning_rate": 7.571055207135858e-07, "loss": 0.0005, "step": 400350 }, { "epoch": 47.239263803680984, "grad_norm": 0.18299949169158936, "learning_rate": 7.538884970257121e-07, "loss": 0.0006, "step": 400400 }, { "epoch": 47.24516281264748, "grad_norm": 0.000871701748110354, "learning_rate": 7.506782707207671e-07, "loss": 0.0007, "step": 400450 }, { "epoch": 47.251061821613966, "grad_norm": 0.02796931564807892, "learning_rate": 7.474748422418465e-07, "loss": 0.0008, "step": 400500 }, { "epoch": 47.25696083058046, "grad_norm": 0.08161459863185883, "learning_rate": 7.442782120311132e-07, "loss": 0.0004, "step": 400550 }, { "epoch": 47.262859839546955, "grad_norm": 0.0007770252996124327, "learning_rate": 7.410883805297919e-07, "loss": 0.0005, "step": 400600 }, { "epoch": 47.26875884851345, "grad_norm": 0.008818699978291988, "learning_rate": 7.379053481781806e-07, "loss": 0.0005, "step": 400650 }, { "epoch": 47.274657857479944, "grad_norm": 0.07932118326425552, "learning_rate": 7.347291154156222e-07, "loss": 0.0006, "step": 400700 }, { "epoch": 47.28055686644644, "grad_norm": 0.0054678283631801605, "learning_rate": 7.315596826805271e-07, "loss": 0.0008, "step": 400750 }, { "epoch": 47.28645587541293, "grad_norm": 0.012972763739526272, "learning_rate": 7.283970504103733e-07, "loss": 0.0006, "step": 400800 }, { "epoch": 47.29235488437942, "grad_norm": 0.0326010100543499, "learning_rate": 7.252412190416946e-07, "loss": 0.0007, "step": 400850 }, { "epoch": 47.298253893345915, "grad_norm": 0.0005183659377507865, "learning_rate": 7.220921890100874e-07, "loss": 0.0009, "step": 400900 }, { "epoch": 47.30415290231241, "grad_norm": 0.32762959599494934, "learning_rate": 7.189499607502037e-07, "loss": 0.0007, "step": 400950 }, { "epoch": 47.310051911278904, "grad_norm": 0.0009381647687405348, "learning_rate": 7.158145346957578e-07, "loss": 0.0006, "step": 401000 }, { "epoch": 47.310051911278904, "eval_cer": 0.08347676419965576, "eval_loss": 8.177373820217326e-05, "eval_runtime": 2.0645, "eval_samples_per_second": 48.439, "eval_steps_per_second": 1.938, "eval_wer": 0.26, "step": 401000 }, { "epoch": 47.3159509202454, "grad_norm": 0.0005764758097939193, "learning_rate": 7.126859112795426e-07, "loss": 0.0007, "step": 401050 }, { "epoch": 47.32184992921189, "grad_norm": 5.51937882846687e-05, "learning_rate": 7.095640909333956e-07, "loss": 0.0004, "step": 401100 }, { "epoch": 47.32774893817839, "grad_norm": 0.00013453529390972108, "learning_rate": 7.064490740882057e-07, "loss": 0.0007, "step": 401150 }, { "epoch": 47.33364794714488, "grad_norm": 0.0021027831826359034, "learning_rate": 7.033408611739456e-07, "loss": 0.0008, "step": 401200 }, { "epoch": 47.33954695611137, "grad_norm": 0.00045611688983626664, "learning_rate": 7.002394526196387e-07, "loss": 0.0008, "step": 401250 }, { "epoch": 47.345445965077865, "grad_norm": 0.001532847760245204, "learning_rate": 6.97144848853365e-07, "loss": 0.0007, "step": 401300 }, { "epoch": 47.35134497404436, "grad_norm": 0.1319761723279953, "learning_rate": 6.940570503022658e-07, "loss": 0.0002, "step": 401350 }, { "epoch": 47.357243983010854, "grad_norm": 0.0002547252515796572, "learning_rate": 6.909760573925561e-07, "loss": 0.0007, "step": 401400 }, { "epoch": 47.36314299197735, "grad_norm": 0.16586649417877197, "learning_rate": 6.8790187054949e-07, "loss": 0.0008, "step": 401450 }, { "epoch": 47.36904200094384, "grad_norm": 0.12491147965192795, "learning_rate": 6.848344901974057e-07, "loss": 0.0007, "step": 401500 }, { "epoch": 47.37494100991034, "grad_norm": 0.0006847885088063776, "learning_rate": 6.817739167596759e-07, "loss": 0.0003, "step": 401550 }, { "epoch": 47.38084001887683, "grad_norm": 0.007871212437748909, "learning_rate": 6.787201506587626e-07, "loss": 0.0004, "step": 401600 }, { "epoch": 47.38673902784332, "grad_norm": 0.007913952693343163, "learning_rate": 6.756731923161674e-07, "loss": 0.0004, "step": 401650 }, { "epoch": 47.392638036809814, "grad_norm": 0.12770964205265045, "learning_rate": 6.72633042152454e-07, "loss": 0.0005, "step": 401700 }, { "epoch": 47.39853704577631, "grad_norm": 0.11426486819982529, "learning_rate": 6.695997005872534e-07, "loss": 0.0009, "step": 401750 }, { "epoch": 47.4044360547428, "grad_norm": 0.006483613513410091, "learning_rate": 6.66573168039264e-07, "loss": 0.0009, "step": 401800 }, { "epoch": 47.4103350637093, "grad_norm": 2.5308960175607353e-05, "learning_rate": 6.635534449262237e-07, "loss": 0.0005, "step": 401850 }, { "epoch": 47.41623407267579, "grad_norm": 0.013583158142864704, "learning_rate": 6.605405316649383e-07, "loss": 0.0007, "step": 401900 }, { "epoch": 47.422133081642286, "grad_norm": 0.03462893143296242, "learning_rate": 6.575344286712914e-07, "loss": 0.0004, "step": 401950 }, { "epoch": 47.42803209060878, "grad_norm": 0.0868329256772995, "learning_rate": 6.545351363601959e-07, "loss": 0.0004, "step": 402000 }, { "epoch": 47.42803209060878, "eval_cer": 0.08347676419965576, "eval_loss": 8.167790656443685e-05, "eval_runtime": 2.049, "eval_samples_per_second": 48.804, "eval_steps_per_second": 1.952, "eval_wer": 0.26, "step": 402000 }, { "epoch": 47.43393109957527, "grad_norm": 9.264099935535342e-05, "learning_rate": 6.515426551456482e-07, "loss": 0.0006, "step": 402050 }, { "epoch": 47.43983010854176, "grad_norm": 0.000806966854725033, "learning_rate": 6.485569854407014e-07, "loss": 0.0009, "step": 402100 }, { "epoch": 47.44572911750826, "grad_norm": 0.0003343829303048551, "learning_rate": 6.455781276574535e-07, "loss": 0.0006, "step": 402150 }, { "epoch": 47.45162812647475, "grad_norm": 0.18649131059646606, "learning_rate": 6.426060822070812e-07, "loss": 0.0005, "step": 402200 }, { "epoch": 47.45752713544125, "grad_norm": 0.1344735473394394, "learning_rate": 6.396408494998063e-07, "loss": 0.0005, "step": 402250 }, { "epoch": 47.46342614440774, "grad_norm": 0.0005723058711737394, "learning_rate": 6.366824299449237e-07, "loss": 0.0005, "step": 402300 }, { "epoch": 47.469325153374236, "grad_norm": 0.5578960180282593, "learning_rate": 6.337308239507678e-07, "loss": 0.0004, "step": 402350 }, { "epoch": 47.47522416234073, "grad_norm": 0.005179436411708593, "learning_rate": 6.307860319247571e-07, "loss": 0.001, "step": 402400 }, { "epoch": 47.48112317130722, "grad_norm": 0.05035494267940521, "learning_rate": 6.278480542733611e-07, "loss": 0.0004, "step": 402450 }, { "epoch": 47.48702218027371, "grad_norm": 0.0004162903060205281, "learning_rate": 6.249168914020942e-07, "loss": 0.0006, "step": 402500 }, { "epoch": 47.49292118924021, "grad_norm": 0.00013200730609241873, "learning_rate": 6.219925437155439e-07, "loss": 0.0005, "step": 402550 }, { "epoch": 47.4988201982067, "grad_norm": 0.042070284485816956, "learning_rate": 6.190750116173593e-07, "loss": 0.0004, "step": 402600 }, { "epoch": 47.504719207173196, "grad_norm": 0.006147420033812523, "learning_rate": 6.161642955102409e-07, "loss": 0.0005, "step": 402650 }, { "epoch": 47.51061821613969, "grad_norm": 0.0007724081515334547, "learning_rate": 6.132603957959504e-07, "loss": 0.0008, "step": 402700 }, { "epoch": 47.516517225106185, "grad_norm": 0.0013638774398714304, "learning_rate": 6.103633128753117e-07, "loss": 0.0008, "step": 402750 }, { "epoch": 47.52241623407268, "grad_norm": 0.00011656794958980754, "learning_rate": 6.074730471482049e-07, "loss": 0.0007, "step": 402800 }, { "epoch": 47.52831524303917, "grad_norm": 0.0009822258725762367, "learning_rate": 6.04589599013572e-07, "loss": 0.0006, "step": 402850 }, { "epoch": 47.53421425200566, "grad_norm": 0.003950378857553005, "learning_rate": 6.017129688694057e-07, "loss": 0.0004, "step": 402900 }, { "epoch": 47.540113260972156, "grad_norm": 0.0004427473177202046, "learning_rate": 5.988431571127662e-07, "loss": 0.0005, "step": 402950 }, { "epoch": 47.54601226993865, "grad_norm": 0.012820222415030003, "learning_rate": 5.959801641397755e-07, "loss": 0.0006, "step": 403000 }, { "epoch": 47.54601226993865, "eval_cer": 0.08347676419965576, "eval_loss": 7.833616837160662e-05, "eval_runtime": 2.1294, "eval_samples_per_second": 46.961, "eval_steps_per_second": 1.878, "eval_wer": 0.26, "step": 403000 }, { "epoch": 47.551911278905145, "grad_norm": 0.06610594689846039, "learning_rate": 5.931239903456065e-07, "loss": 0.0006, "step": 403050 }, { "epoch": 47.55781028787164, "grad_norm": 0.008430350571870804, "learning_rate": 5.902746361244937e-07, "loss": 0.0004, "step": 403100 }, { "epoch": 47.563709296838134, "grad_norm": 0.04362620785832405, "learning_rate": 5.874321018697282e-07, "loss": 0.0008, "step": 403150 }, { "epoch": 47.56960830580462, "grad_norm": 0.002118949079886079, "learning_rate": 5.845963879736627e-07, "loss": 0.0006, "step": 403200 }, { "epoch": 47.575507314771116, "grad_norm": 0.0004232650971971452, "learning_rate": 5.817674948277063e-07, "loss": 0.0006, "step": 403250 }, { "epoch": 47.58140632373761, "grad_norm": 0.009180941618978977, "learning_rate": 5.789454228223357e-07, "loss": 0.0003, "step": 403300 }, { "epoch": 47.587305332704105, "grad_norm": 0.05574605241417885, "learning_rate": 5.761301723470669e-07, "loss": 0.0009, "step": 403350 }, { "epoch": 47.5932043416706, "grad_norm": 0.005610080901533365, "learning_rate": 5.733217437904892e-07, "loss": 0.0006, "step": 403400 }, { "epoch": 47.599103350637094, "grad_norm": 0.005680935923010111, "learning_rate": 5.705201375402481e-07, "loss": 0.0009, "step": 403450 }, { "epoch": 47.60500235960359, "grad_norm": 0.0001155834979726933, "learning_rate": 5.677253539830452e-07, "loss": 0.0006, "step": 403500 }, { "epoch": 47.61090136857008, "grad_norm": 0.022634664550423622, "learning_rate": 5.649373935046442e-07, "loss": 0.0006, "step": 403550 }, { "epoch": 47.61680037753657, "grad_norm": 0.00025838473811745644, "learning_rate": 5.621562564898597e-07, "loss": 0.0004, "step": 403600 }, { "epoch": 47.622699386503065, "grad_norm": 0.0016754425596445799, "learning_rate": 5.593819433225678e-07, "loss": 0.001, "step": 403650 }, { "epoch": 47.62859839546956, "grad_norm": 0.0017528330208733678, "learning_rate": 5.566144543857066e-07, "loss": 0.0009, "step": 403700 }, { "epoch": 47.634497404436054, "grad_norm": 0.0011554661905393004, "learning_rate": 5.538537900612705e-07, "loss": 0.0006, "step": 403750 }, { "epoch": 47.64039641340255, "grad_norm": 0.0019626098219305277, "learning_rate": 5.510999507303161e-07, "loss": 0.0007, "step": 403800 }, { "epoch": 47.64629542236904, "grad_norm": 0.17227599024772644, "learning_rate": 5.48352936772939e-07, "loss": 0.0009, "step": 403850 }, { "epoch": 47.65219443133554, "grad_norm": 0.0006800978444516659, "learning_rate": 5.456127485683138e-07, "loss": 0.0005, "step": 403900 }, { "epoch": 47.65809344030203, "grad_norm": 0.49200639128685, "learning_rate": 5.428793864946658e-07, "loss": 0.0008, "step": 403950 }, { "epoch": 47.66399244926852, "grad_norm": 7.956695480970666e-05, "learning_rate": 5.401528509292763e-07, "loss": 0.0005, "step": 404000 }, { "epoch": 47.66399244926852, "eval_cer": 0.08347676419965576, "eval_loss": 7.651116902707145e-05, "eval_runtime": 2.1073, "eval_samples_per_second": 47.454, "eval_steps_per_second": 1.898, "eval_wer": 0.26, "step": 404000 }, { "epoch": 47.669891458235014, "grad_norm": 0.015060722827911377, "learning_rate": 5.374331422484891e-07, "loss": 0.0006, "step": 404050 }, { "epoch": 47.67579046720151, "grad_norm": 0.0020557474344968796, "learning_rate": 5.34720260827698e-07, "loss": 0.0004, "step": 404100 }, { "epoch": 47.681689476168, "grad_norm": 0.00021002558059990406, "learning_rate": 5.320142070413592e-07, "loss": 0.0004, "step": 404150 }, { "epoch": 47.6875884851345, "grad_norm": 0.0206303671002388, "learning_rate": 5.29314981262985e-07, "loss": 0.0004, "step": 404200 }, { "epoch": 47.69348749410099, "grad_norm": 0.04469038546085358, "learning_rate": 5.26622583865155e-07, "loss": 0.0007, "step": 404250 }, { "epoch": 47.69938650306749, "grad_norm": 0.002549189142882824, "learning_rate": 5.239370152194834e-07, "loss": 0.0003, "step": 404300 }, { "epoch": 47.70528551203398, "grad_norm": 0.005796152167022228, "learning_rate": 5.212582756966677e-07, "loss": 0.0004, "step": 404350 }, { "epoch": 47.71118452100047, "grad_norm": 0.004797693341970444, "learning_rate": 5.185863656664513e-07, "loss": 0.0007, "step": 404400 }, { "epoch": 47.71708352996696, "grad_norm": 0.0014546213205903769, "learning_rate": 5.159212854976281e-07, "loss": 0.0005, "step": 404450 }, { "epoch": 47.72298253893346, "grad_norm": 0.22174124419689178, "learning_rate": 5.132630355580592e-07, "loss": 0.0006, "step": 404500 }, { "epoch": 47.72888154789995, "grad_norm": 0.0007466779788956046, "learning_rate": 5.106116162146623e-07, "loss": 0.0007, "step": 404550 }, { "epoch": 47.73478055686645, "grad_norm": 0.0002966526953969151, "learning_rate": 5.079670278334004e-07, "loss": 0.0005, "step": 404600 }, { "epoch": 47.74067956583294, "grad_norm": 0.0023430988658219576, "learning_rate": 5.053292707793145e-07, "loss": 0.0006, "step": 404650 }, { "epoch": 47.746578574799436, "grad_norm": 0.006053830496966839, "learning_rate": 5.026983454164858e-07, "loss": 0.0004, "step": 404700 }, { "epoch": 47.75247758376593, "grad_norm": 0.11605625599622726, "learning_rate": 5.00074252108057e-07, "loss": 0.0005, "step": 404750 }, { "epoch": 47.75837659273242, "grad_norm": 0.013047035783529282, "learning_rate": 4.974569912162331e-07, "loss": 0.0006, "step": 404800 }, { "epoch": 47.76427560169891, "grad_norm": 0.00903988815844059, "learning_rate": 4.948465631022581e-07, "loss": 0.0006, "step": 404850 }, { "epoch": 47.77017461066541, "grad_norm": 0.0035154595971107483, "learning_rate": 4.922429681264662e-07, "loss": 0.0002, "step": 404900 }, { "epoch": 47.7760736196319, "grad_norm": 0.0002549257769715041, "learning_rate": 4.896462066482144e-07, "loss": 0.0003, "step": 404950 }, { "epoch": 47.781972628598396, "grad_norm": 0.005716355517506599, "learning_rate": 4.870562790259325e-07, "loss": 0.0007, "step": 405000 }, { "epoch": 47.781972628598396, "eval_cer": 0.08347676419965576, "eval_loss": 7.806865323800594e-05, "eval_runtime": 2.0742, "eval_samples_per_second": 48.211, "eval_steps_per_second": 1.928, "eval_wer": 0.26, "step": 405000 }, { "epoch": 47.78787163756489, "grad_norm": 0.013888003304600716, "learning_rate": 4.844731856171125e-07, "loss": 0.0005, "step": 405050 }, { "epoch": 47.793770646531385, "grad_norm": 0.01892109215259552, "learning_rate": 4.818969267782858e-07, "loss": 0.0007, "step": 405100 }, { "epoch": 47.79966965549788, "grad_norm": 0.026816487312316895, "learning_rate": 4.793275028650623e-07, "loss": 0.0005, "step": 405150 }, { "epoch": 47.80556866446437, "grad_norm": 0.004132518079131842, "learning_rate": 4.7676491423208626e-07, "loss": 0.0009, "step": 405200 }, { "epoch": 47.81146767343086, "grad_norm": 0.0013976250775158405, "learning_rate": 4.7420916123306904e-07, "loss": 0.0005, "step": 405250 }, { "epoch": 47.817366682397356, "grad_norm": 0.006846655625849962, "learning_rate": 4.71660244220784e-07, "loss": 0.0005, "step": 405300 }, { "epoch": 47.82326569136385, "grad_norm": 0.04035031050443649, "learning_rate": 4.691181635470554e-07, "loss": 0.0005, "step": 405350 }, { "epoch": 47.829164700330345, "grad_norm": 0.0018447987968102098, "learning_rate": 4.6658291956275804e-07, "loss": 0.0004, "step": 405400 }, { "epoch": 47.83506370929684, "grad_norm": 0.00023173747467808425, "learning_rate": 4.640545126178342e-07, "loss": 0.0005, "step": 405450 }, { "epoch": 47.840962718263334, "grad_norm": 0.08946145325899124, "learning_rate": 4.6153294306127136e-07, "loss": 0.0003, "step": 405500 }, { "epoch": 47.84686172722982, "grad_norm": 0.00021396196098066866, "learning_rate": 4.59018211241119e-07, "loss": 0.0006, "step": 405550 }, { "epoch": 47.852760736196316, "grad_norm": 0.02956235408782959, "learning_rate": 4.5651031750448826e-07, "loss": 0.0006, "step": 405600 }, { "epoch": 47.85865974516281, "grad_norm": 0.015189684927463531, "learning_rate": 4.540092621975356e-07, "loss": 0.0005, "step": 405650 }, { "epoch": 47.864558754129305, "grad_norm": 0.0017290408723056316, "learning_rate": 4.5151504566548486e-07, "loss": 0.0008, "step": 405700 }, { "epoch": 47.8704577630958, "grad_norm": 0.0021081757731735706, "learning_rate": 4.490276682525996e-07, "loss": 0.0006, "step": 405750 }, { "epoch": 47.876356772062294, "grad_norm": 0.00165890424977988, "learning_rate": 4.4654713030222173e-07, "loss": 0.0004, "step": 405800 }, { "epoch": 47.88225578102879, "grad_norm": 0.19076505303382874, "learning_rate": 4.44073432156733e-07, "loss": 0.0006, "step": 405850 }, { "epoch": 47.88815478999528, "grad_norm": 0.0010529367718845606, "learning_rate": 4.416065741575659e-07, "loss": 0.0007, "step": 405900 }, { "epoch": 47.89405379896177, "grad_norm": 0.000922882289160043, "learning_rate": 4.391465566452313e-07, "loss": 0.0004, "step": 405950 }, { "epoch": 47.899952807928265, "grad_norm": 0.0028297449462115765, "learning_rate": 4.366933799592743e-07, "loss": 0.0005, "step": 406000 }, { "epoch": 47.899952807928265, "eval_cer": 0.08347676419965576, "eval_loss": 7.761820597806945e-05, "eval_runtime": 2.0322, "eval_samples_per_second": 49.209, "eval_steps_per_second": 1.968, "eval_wer": 0.26, "step": 406000 }, { "epoch": 47.90585181689476, "grad_norm": 0.0005841662641614676, "learning_rate": 4.342470444383018e-07, "loss": 0.0003, "step": 406050 }, { "epoch": 47.911750825861255, "grad_norm": 0.009201875887811184, "learning_rate": 4.318075504199881e-07, "loss": 0.0004, "step": 406100 }, { "epoch": 47.91764983482775, "grad_norm": 0.00024930282961577177, "learning_rate": 4.293748982410473e-07, "loss": 0.0007, "step": 406150 }, { "epoch": 47.923548843794244, "grad_norm": 0.018681693822145462, "learning_rate": 4.269490882372551e-07, "loss": 0.0003, "step": 406200 }, { "epoch": 47.92944785276074, "grad_norm": 0.017578382045030594, "learning_rate": 4.245301207434438e-07, "loss": 0.0005, "step": 406250 }, { "epoch": 47.93534686172723, "grad_norm": 0.015581062063574791, "learning_rate": 4.221179960935073e-07, "loss": 0.0005, "step": 406300 }, { "epoch": 47.94124587069372, "grad_norm": 0.09334775805473328, "learning_rate": 4.197127146203794e-07, "loss": 0.0006, "step": 406350 }, { "epoch": 47.947144879660215, "grad_norm": 0.0009991306578740478, "learning_rate": 4.1731427665606113e-07, "loss": 0.0005, "step": 406400 }, { "epoch": 47.95304388862671, "grad_norm": 0.012545519508421421, "learning_rate": 4.149226825316044e-07, "loss": 0.0005, "step": 406450 }, { "epoch": 47.958942897593204, "grad_norm": 0.14486080408096313, "learning_rate": 4.125379325771228e-07, "loss": 0.0006, "step": 406500 }, { "epoch": 47.9648419065597, "grad_norm": 0.006860724184662104, "learning_rate": 4.101600271217754e-07, "loss": 0.0007, "step": 406550 }, { "epoch": 47.97074091552619, "grad_norm": 0.0031311465427279472, "learning_rate": 4.077889664937884e-07, "loss": 0.0009, "step": 406600 }, { "epoch": 47.97663992449269, "grad_norm": 0.007272269576787949, "learning_rate": 4.0542475102042786e-07, "loss": 0.0008, "step": 406650 }, { "epoch": 47.98253893345918, "grad_norm": 0.0010082223452627659, "learning_rate": 4.0306738102803276e-07, "loss": 0.0005, "step": 406700 }, { "epoch": 47.98843794242567, "grad_norm": 0.8011488914489746, "learning_rate": 4.007168568419817e-07, "loss": 0.0007, "step": 406750 }, { "epoch": 47.994336951392164, "grad_norm": 0.005595592316240072, "learning_rate": 3.9837317878672063e-07, "loss": 0.0006, "step": 406800 }, { "epoch": 48.00023596035866, "grad_norm": 0.001323662349022925, "learning_rate": 3.9603634718573535e-07, "loss": 0.0005, "step": 406850 }, { "epoch": 48.00613496932515, "grad_norm": 0.06415877491235733, "learning_rate": 3.937063623615844e-07, "loss": 0.0005, "step": 406900 }, { "epoch": 48.01203397829165, "grad_norm": 0.001429243478924036, "learning_rate": 3.913832246358662e-07, "loss": 0.0007, "step": 406950 }, { "epoch": 48.01793298725814, "grad_norm": 0.06532358378171921, "learning_rate": 3.8906693432924634e-07, "loss": 0.001, "step": 407000 }, { "epoch": 48.01793298725814, "eval_cer": 0.08347676419965576, "eval_loss": 7.729417848167941e-05, "eval_runtime": 2.0743, "eval_samples_per_second": 48.208, "eval_steps_per_second": 1.928, "eval_wer": 0.26, "step": 407000 }, { "epoch": 48.02383199622464, "grad_norm": 0.002314080484211445, "learning_rate": 3.8675749176143583e-07, "loss": 0.0009, "step": 407050 }, { "epoch": 48.02973100519113, "grad_norm": 0.10558099299669266, "learning_rate": 3.8445489725120743e-07, "loss": 0.0006, "step": 407100 }, { "epoch": 48.03563001415762, "grad_norm": 0.1449650079011917, "learning_rate": 3.821591511163847e-07, "loss": 0.0012, "step": 407150 }, { "epoch": 48.04152902312411, "grad_norm": 0.011975950561463833, "learning_rate": 3.798702536738474e-07, "loss": 0.0003, "step": 407200 }, { "epoch": 48.04742803209061, "grad_norm": 0.002222105162218213, "learning_rate": 3.7758820523952634e-07, "loss": 0.0005, "step": 407250 }, { "epoch": 48.0533270410571, "grad_norm": 0.006028191652148962, "learning_rate": 3.753130061284138e-07, "loss": 0.0005, "step": 407300 }, { "epoch": 48.0592260500236, "grad_norm": 0.002693602116778493, "learning_rate": 3.730446566545476e-07, "loss": 0.0003, "step": 407350 }, { "epoch": 48.06512505899009, "grad_norm": 0.0022688747849315405, "learning_rate": 3.707831571310327e-07, "loss": 0.0009, "step": 407400 }, { "epoch": 48.071024067956586, "grad_norm": 0.00024794740602374077, "learning_rate": 3.68528507870014e-07, "loss": 0.0006, "step": 407450 }, { "epoch": 48.07692307692308, "grad_norm": 0.004685192834585905, "learning_rate": 3.6628070918270363e-07, "loss": 0.0004, "step": 407500 }, { "epoch": 48.08282208588957, "grad_norm": 0.05585699528455734, "learning_rate": 3.640397613793589e-07, "loss": 0.0004, "step": 407550 }, { "epoch": 48.08872109485606, "grad_norm": 0.01353492308408022, "learning_rate": 3.618056647692991e-07, "loss": 0.0008, "step": 407600 }, { "epoch": 48.09462010382256, "grad_norm": 0.004613310564309359, "learning_rate": 3.595784196608887e-07, "loss": 0.0009, "step": 407650 }, { "epoch": 48.10051911278905, "grad_norm": 0.010520169511437416, "learning_rate": 3.573580263615539e-07, "loss": 0.0008, "step": 407700 }, { "epoch": 48.106418121755546, "grad_norm": 0.10125765949487686, "learning_rate": 3.5514448517777745e-07, "loss": 0.0003, "step": 407750 }, { "epoch": 48.11231713072204, "grad_norm": 0.0003676725609693676, "learning_rate": 3.529377964150815e-07, "loss": 0.0008, "step": 407800 }, { "epoch": 48.118216139688535, "grad_norm": 0.0003816532844211906, "learning_rate": 3.5073796037806695e-07, "loss": 0.0004, "step": 407850 }, { "epoch": 48.12411514865503, "grad_norm": 0.0023519699461758137, "learning_rate": 3.485449773703575e-07, "loss": 0.0003, "step": 407900 }, { "epoch": 48.13001415762152, "grad_norm": 0.003563364502042532, "learning_rate": 3.4635884769466663e-07, "loss": 0.0004, "step": 407950 }, { "epoch": 48.13591316658801, "grad_norm": 0.004140680190175772, "learning_rate": 3.441795716527307e-07, "loss": 0.0005, "step": 408000 }, { "epoch": 48.13591316658801, "eval_cer": 0.08347676419965576, "eval_loss": 7.803116750437766e-05, "eval_runtime": 2.1069, "eval_samples_per_second": 47.462, "eval_steps_per_second": 1.898, "eval_wer": 0.26, "step": 408000 }, { "epoch": 48.141812175554506, "grad_norm": 0.11837950348854065, "learning_rate": 3.420071495453536e-07, "loss": 0.0009, "step": 408050 }, { "epoch": 48.147711184521, "grad_norm": 0.00443257624283433, "learning_rate": 3.398415816723954e-07, "loss": 0.0007, "step": 408100 }, { "epoch": 48.153610193487495, "grad_norm": 0.0020167077891528606, "learning_rate": 3.376828683327615e-07, "loss": 0.0012, "step": 408150 }, { "epoch": 48.15950920245399, "grad_norm": 0.0007013337453827262, "learning_rate": 3.355310098244302e-07, "loss": 0.0008, "step": 408200 }, { "epoch": 48.165408211420484, "grad_norm": 0.003715683938935399, "learning_rate": 3.333860064444028e-07, "loss": 0.0003, "step": 408250 }, { "epoch": 48.17130722038698, "grad_norm": 0.00018921955779660493, "learning_rate": 3.312478584887646e-07, "loss": 0.0009, "step": 408300 }, { "epoch": 48.177206229353466, "grad_norm": 0.0030964540783315897, "learning_rate": 3.291165662526352e-07, "loss": 0.0005, "step": 408350 }, { "epoch": 48.18310523831996, "grad_norm": 0.8989170789718628, "learning_rate": 3.269921300301959e-07, "loss": 0.0006, "step": 408400 }, { "epoch": 48.189004247286455, "grad_norm": 0.004960858263075352, "learning_rate": 3.248745501146788e-07, "loss": 0.0008, "step": 408450 }, { "epoch": 48.19490325625295, "grad_norm": 0.0002743980730883777, "learning_rate": 3.2276382679837236e-07, "loss": 0.0004, "step": 408500 }, { "epoch": 48.200802265219444, "grad_norm": 0.0289805606007576, "learning_rate": 3.206599603726157e-07, "loss": 0.0003, "step": 408550 }, { "epoch": 48.20670127418594, "grad_norm": 0.00011558565165614709, "learning_rate": 3.185629511278099e-07, "loss": 0.0008, "step": 408600 }, { "epoch": 48.21260028315243, "grad_norm": 0.0006930461968295276, "learning_rate": 3.1647279935339556e-07, "loss": 0.0014, "step": 408650 }, { "epoch": 48.21849929211892, "grad_norm": 0.001499299774877727, "learning_rate": 3.143895053378698e-07, "loss": 0.0005, "step": 408700 }, { "epoch": 48.224398301085415, "grad_norm": 0.0046599870547652245, "learning_rate": 3.12313069368797e-07, "loss": 0.0003, "step": 408750 }, { "epoch": 48.23029731005191, "grad_norm": 0.0038810833357274532, "learning_rate": 3.1024349173278124e-07, "loss": 0.0004, "step": 408800 }, { "epoch": 48.236196319018404, "grad_norm": 0.12173482030630112, "learning_rate": 3.081807727154884e-07, "loss": 0.0008, "step": 408850 }, { "epoch": 48.2420953279849, "grad_norm": 0.019358523190021515, "learning_rate": 3.061249126016241e-07, "loss": 0.0005, "step": 408900 }, { "epoch": 48.24799433695139, "grad_norm": 0.0304104033857584, "learning_rate": 3.0407591167495587e-07, "loss": 0.0004, "step": 408950 }, { "epoch": 48.25389334591789, "grad_norm": 0.011904969811439514, "learning_rate": 3.020337702183129e-07, "loss": 0.0005, "step": 409000 }, { "epoch": 48.25389334591789, "eval_cer": 0.08347676419965576, "eval_loss": 7.696289685554802e-05, "eval_runtime": 2.0773, "eval_samples_per_second": 48.139, "eval_steps_per_second": 1.926, "eval_wer": 0.26, "step": 409000 }, { "epoch": 48.25979235488438, "grad_norm": 0.0012150059919804335, "learning_rate": 2.999984885135698e-07, "loss": 0.0006, "step": 409050 }, { "epoch": 48.26569136385087, "grad_norm": 0.008966565132141113, "learning_rate": 2.9797006684165184e-07, "loss": 0.0004, "step": 409100 }, { "epoch": 48.271590372817364, "grad_norm": 0.012045137584209442, "learning_rate": 2.9594850548253506e-07, "loss": 0.0005, "step": 409150 }, { "epoch": 48.27748938178386, "grad_norm": 0.0002871212491299957, "learning_rate": 2.9393380471525735e-07, "loss": 0.0007, "step": 409200 }, { "epoch": 48.28338839075035, "grad_norm": 0.2816624939441681, "learning_rate": 2.919259648179018e-07, "loss": 0.0005, "step": 409250 }, { "epoch": 48.28928739971685, "grad_norm": 0.034078437834978104, "learning_rate": 2.89924986067619e-07, "loss": 0.0004, "step": 409300 }, { "epoch": 48.29518640868334, "grad_norm": 0.009548027999699116, "learning_rate": 2.8793086874058794e-07, "loss": 0.0013, "step": 409350 }, { "epoch": 48.30108541764984, "grad_norm": 0.009912475943565369, "learning_rate": 2.8594361311206077e-07, "loss": 0.0003, "step": 409400 }, { "epoch": 48.30698442661633, "grad_norm": 0.06001600995659828, "learning_rate": 2.8396321945633465e-07, "loss": 0.0007, "step": 409450 }, { "epoch": 48.31288343558282, "grad_norm": 0.0003677801869343966, "learning_rate": 2.8198968804676874e-07, "loss": 0.0003, "step": 409500 }, { "epoch": 48.31878244454931, "grad_norm": 0.0014226788189262152, "learning_rate": 2.800230191557507e-07, "loss": 0.0009, "step": 409550 }, { "epoch": 48.32468145351581, "grad_norm": 2.3964892534422688e-05, "learning_rate": 2.7806321305475226e-07, "loss": 0.0003, "step": 409600 }, { "epoch": 48.3305804624823, "grad_norm": 0.0023455044720321894, "learning_rate": 2.7611027001427926e-07, "loss": 0.0004, "step": 409650 }, { "epoch": 48.3364794714488, "grad_norm": 0.28645259141921997, "learning_rate": 2.7416419030389384e-07, "loss": 0.0007, "step": 409700 }, { "epoch": 48.34237848041529, "grad_norm": 0.20430713891983032, "learning_rate": 2.7222497419220896e-07, "loss": 0.0007, "step": 409750 }, { "epoch": 48.348277489381786, "grad_norm": 0.0500316247344017, "learning_rate": 2.702926219468882e-07, "loss": 0.0005, "step": 409800 }, { "epoch": 48.35417649834828, "grad_norm": 0.003233336377888918, "learning_rate": 2.683671338346627e-07, "loss": 0.0011, "step": 409850 }, { "epoch": 48.36007550731477, "grad_norm": 0.007070308085530996, "learning_rate": 2.6644851012129766e-07, "loss": 0.0008, "step": 409900 }, { "epoch": 48.36597451628126, "grad_norm": 0.13075904548168182, "learning_rate": 2.6453675107162567e-07, "loss": 0.0004, "step": 409950 }, { "epoch": 48.37187352524776, "grad_norm": 0.32315483689308167, "learning_rate": 2.626318569495134e-07, "loss": 0.0005, "step": 410000 }, { "epoch": 48.37187352524776, "eval_cer": 0.08347676419965576, "eval_loss": 7.560063386335969e-05, "eval_runtime": 2.1078, "eval_samples_per_second": 47.442, "eval_steps_per_second": 1.898, "eval_wer": 0.26, "step": 410000 }, { "epoch": 48.37777253421425, "grad_norm": 0.03615459054708481, "learning_rate": 2.607338280179061e-07, "loss": 0.0003, "step": 410050 }, { "epoch": 48.383671543180746, "grad_norm": 0.004535224288702011, "learning_rate": 2.58842664538772e-07, "loss": 0.0004, "step": 410100 }, { "epoch": 48.38957055214724, "grad_norm": 0.0004091524169780314, "learning_rate": 2.5695836677315235e-07, "loss": 0.0007, "step": 410150 }, { "epoch": 48.395469561113735, "grad_norm": 0.03188466280698776, "learning_rate": 2.550809349811334e-07, "loss": 0.0007, "step": 410200 }, { "epoch": 48.40136857008023, "grad_norm": 0.014120721258223057, "learning_rate": 2.532103694218579e-07, "loss": 0.0006, "step": 410250 }, { "epoch": 48.40726757904672, "grad_norm": 0.2618721127510071, "learning_rate": 2.5134667035350833e-07, "loss": 0.0005, "step": 410300 }, { "epoch": 48.41316658801321, "grad_norm": 0.007105052005499601, "learning_rate": 2.4948983803334545e-07, "loss": 0.0006, "step": 410350 }, { "epoch": 48.419065596979706, "grad_norm": 0.011871850118041039, "learning_rate": 2.476398727176532e-07, "loss": 0.0004, "step": 410400 }, { "epoch": 48.4249646059462, "grad_norm": 0.002108538057655096, "learning_rate": 2.4579677466178286e-07, "loss": 0.0003, "step": 410450 }, { "epoch": 48.430863614912695, "grad_norm": 0.18160288035869598, "learning_rate": 2.4396054412013647e-07, "loss": 0.0007, "step": 410500 }, { "epoch": 48.43676262387919, "grad_norm": 0.000891724368557334, "learning_rate": 2.4213118134616686e-07, "loss": 0.0008, "step": 410550 }, { "epoch": 48.442661632845684, "grad_norm": 0.0011557036777958274, "learning_rate": 2.40308686592372e-07, "loss": 0.0005, "step": 410600 }, { "epoch": 48.44856064181218, "grad_norm": 0.03431496024131775, "learning_rate": 2.3849306011031746e-07, "loss": 0.0004, "step": 410650 }, { "epoch": 48.45445965077867, "grad_norm": 0.004672985058277845, "learning_rate": 2.3668430215061377e-07, "loss": 0.0007, "step": 410700 }, { "epoch": 48.46035865974516, "grad_norm": 0.0021776992361992598, "learning_rate": 2.3488241296291125e-07, "loss": 0.0005, "step": 410750 }, { "epoch": 48.466257668711656, "grad_norm": 0.003380203153938055, "learning_rate": 2.3308739279593316e-07, "loss": 0.0007, "step": 410800 }, { "epoch": 48.47215667767815, "grad_norm": 0.14600567519664764, "learning_rate": 2.3129924189743134e-07, "loss": 0.0011, "step": 410850 }, { "epoch": 48.478055686644645, "grad_norm": 0.008740336634218693, "learning_rate": 2.2951796051423613e-07, "loss": 0.0008, "step": 410900 }, { "epoch": 48.48395469561114, "grad_norm": 0.003297007642686367, "learning_rate": 2.2774354889220638e-07, "loss": 0.0005, "step": 410950 }, { "epoch": 48.489853704577634, "grad_norm": 0.15382985770702362, "learning_rate": 2.2597600727626844e-07, "loss": 0.0012, "step": 411000 }, { "epoch": 48.489853704577634, "eval_cer": 0.08347676419965576, "eval_loss": 7.503133383579552e-05, "eval_runtime": 2.0414, "eval_samples_per_second": 48.985, "eval_steps_per_second": 1.959, "eval_wer": 0.26, "step": 411000 }, { "epoch": 48.49575271354412, "grad_norm": 0.002857362385839224, "learning_rate": 2.2421533591038823e-07, "loss": 0.0006, "step": 411050 }, { "epoch": 48.501651722510616, "grad_norm": 0.15841659903526306, "learning_rate": 2.224615350375936e-07, "loss": 0.0008, "step": 411100 }, { "epoch": 48.50755073147711, "grad_norm": 0.12293801456689835, "learning_rate": 2.2071460489995199e-07, "loss": 0.0011, "step": 411150 }, { "epoch": 48.513449740443605, "grad_norm": 0.10713602602481842, "learning_rate": 2.1897454573860387e-07, "loss": 0.0004, "step": 411200 }, { "epoch": 48.5193487494101, "grad_norm": 8.285351213999093e-05, "learning_rate": 2.1724135779371268e-07, "loss": 0.0005, "step": 411250 }, { "epoch": 48.525247758376594, "grad_norm": 0.005994133651256561, "learning_rate": 2.1551504130452039e-07, "loss": 0.0003, "step": 411300 }, { "epoch": 48.53114676734309, "grad_norm": 0.07506370544433594, "learning_rate": 2.137955965092975e-07, "loss": 0.0004, "step": 411350 }, { "epoch": 48.53704577630958, "grad_norm": 0.001973693026229739, "learning_rate": 2.1208302364538745e-07, "loss": 0.0004, "step": 411400 }, { "epoch": 48.54294478527607, "grad_norm": 0.011924123391509056, "learning_rate": 2.103773229491679e-07, "loss": 0.0004, "step": 411450 }, { "epoch": 48.548843794242565, "grad_norm": 0.0014796233735978603, "learning_rate": 2.0867849465607824e-07, "loss": 0.0007, "step": 411500 }, { "epoch": 48.55474280320906, "grad_norm": 0.004951133392751217, "learning_rate": 2.0698653900060316e-07, "loss": 0.0005, "step": 411550 }, { "epoch": 48.560641812175554, "grad_norm": 0.14277109503746033, "learning_rate": 2.0530145621627806e-07, "loss": 0.0003, "step": 411600 }, { "epoch": 48.56654082114205, "grad_norm": 0.00034399694413878024, "learning_rate": 2.036232465357002e-07, "loss": 0.0006, "step": 411650 }, { "epoch": 48.57243983010854, "grad_norm": 0.019251860678195953, "learning_rate": 2.0195191019051208e-07, "loss": 0.0009, "step": 411700 }, { "epoch": 48.57833883907504, "grad_norm": 0.03132783994078636, "learning_rate": 2.0028744741139583e-07, "loss": 0.0008, "step": 411750 }, { "epoch": 48.58423784804153, "grad_norm": 0.02730208821594715, "learning_rate": 1.9862985842810656e-07, "loss": 0.0004, "step": 411800 }, { "epoch": 48.59013685700802, "grad_norm": 0.0003078631707467139, "learning_rate": 1.969791434694279e-07, "loss": 0.0002, "step": 411850 }, { "epoch": 48.596035865974514, "grad_norm": 4.404577339300886e-05, "learning_rate": 1.9533530276321655e-07, "loss": 0.0004, "step": 411900 }, { "epoch": 48.60193487494101, "grad_norm": 0.3497564494609833, "learning_rate": 1.936983365363687e-07, "loss": 0.0007, "step": 411950 }, { "epoch": 48.6078338839075, "grad_norm": 0.17277435958385468, "learning_rate": 1.920682450148259e-07, "loss": 0.0009, "step": 412000 }, { "epoch": 48.6078338839075, "eval_cer": 0.08347676419965576, "eval_loss": 7.690564234508201e-05, "eval_runtime": 2.0448, "eval_samples_per_second": 48.905, "eval_steps_per_second": 1.956, "eval_wer": 0.26, "step": 412000 }, { "epoch": 48.613732892874, "grad_norm": 0.0014455723576247692, "learning_rate": 1.9044502842359703e-07, "loss": 0.0009, "step": 412050 }, { "epoch": 48.61963190184049, "grad_norm": 0.08656203001737595, "learning_rate": 1.888286869867195e-07, "loss": 0.0007, "step": 412100 }, { "epoch": 48.62553091080699, "grad_norm": 0.014779774472117424, "learning_rate": 1.8721922092730936e-07, "loss": 0.0004, "step": 412150 }, { "epoch": 48.63142991977348, "grad_norm": 0.1817491203546524, "learning_rate": 1.856166304675111e-07, "loss": 0.0005, "step": 412200 }, { "epoch": 48.63732892873997, "grad_norm": 0.12811370193958282, "learning_rate": 1.840209158285311e-07, "loss": 0.0008, "step": 412250 }, { "epoch": 48.64322793770646, "grad_norm": 0.01662193052470684, "learning_rate": 1.8243207723062094e-07, "loss": 0.0007, "step": 412300 }, { "epoch": 48.64912694667296, "grad_norm": 0.018967173993587494, "learning_rate": 1.8085011489308856e-07, "loss": 0.0004, "step": 412350 }, { "epoch": 48.65502595563945, "grad_norm": 0.0005063657881692052, "learning_rate": 1.792750290342926e-07, "loss": 0.0002, "step": 412400 }, { "epoch": 48.66092496460595, "grad_norm": 0.0003537478041835129, "learning_rate": 1.7770681987163695e-07, "loss": 0.0004, "step": 412450 }, { "epoch": 48.66682397357244, "grad_norm": 5.376294211600907e-05, "learning_rate": 1.7614548762158177e-07, "loss": 0.0007, "step": 412500 }, { "epoch": 48.672722982538936, "grad_norm": 0.005151310935616493, "learning_rate": 1.7459103249963248e-07, "loss": 0.0003, "step": 412550 }, { "epoch": 48.67862199150543, "grad_norm": 0.0007760898442938924, "learning_rate": 1.7304345472035631e-07, "loss": 0.0009, "step": 412600 }, { "epoch": 48.68452100047192, "grad_norm": 0.00027769655571319163, "learning_rate": 1.7150275449735464e-07, "loss": 0.0004, "step": 412650 }, { "epoch": 48.69042000943841, "grad_norm": 0.004775156266987324, "learning_rate": 1.6996893204329069e-07, "loss": 0.0002, "step": 412700 }, { "epoch": 48.69631901840491, "grad_norm": 0.05347403511404991, "learning_rate": 1.684419875698784e-07, "loss": 0.0006, "step": 412750 }, { "epoch": 48.7022180273714, "grad_norm": 0.001224490231834352, "learning_rate": 1.6692192128788254e-07, "loss": 0.0005, "step": 412800 }, { "epoch": 48.708117036337896, "grad_norm": 0.35443374514579773, "learning_rate": 1.654087334071075e-07, "loss": 0.0007, "step": 412850 }, { "epoch": 48.71401604530439, "grad_norm": 0.00891154631972313, "learning_rate": 1.639024241364251e-07, "loss": 0.0005, "step": 412900 }, { "epoch": 48.719915054270885, "grad_norm": 0.016497673466801643, "learning_rate": 1.6240299368375234e-07, "loss": 0.0004, "step": 412950 }, { "epoch": 48.72581406323738, "grad_norm": 0.20646966993808746, "learning_rate": 1.6091044225604034e-07, "loss": 0.0006, "step": 413000 }, { "epoch": 48.72581406323738, "eval_cer": 0.08347676419965576, "eval_loss": 7.702159200562164e-05, "eval_runtime": 2.0806, "eval_samples_per_second": 48.062, "eval_steps_per_second": 1.922, "eval_wer": 0.26, "step": 413000 }, { "epoch": 48.73171307220387, "grad_norm": 0.0005972142098471522, "learning_rate": 1.5942477005931322e-07, "loss": 0.0007, "step": 413050 }, { "epoch": 48.73761208117036, "grad_norm": 0.04833425208926201, "learning_rate": 1.579459772986347e-07, "loss": 0.0004, "step": 413100 }, { "epoch": 48.743511090136856, "grad_norm": 0.0016931253485381603, "learning_rate": 1.5647406417812483e-07, "loss": 0.0005, "step": 413150 }, { "epoch": 48.74941009910335, "grad_norm": 0.05633385479450226, "learning_rate": 1.5500903090094888e-07, "loss": 0.0008, "step": 413200 }, { "epoch": 48.755309108069845, "grad_norm": 0.000661261728964746, "learning_rate": 1.535508776693173e-07, "loss": 0.0003, "step": 413250 }, { "epoch": 48.76120811703634, "grad_norm": 0.005861399229615927, "learning_rate": 1.5209960468450245e-07, "loss": 0.0005, "step": 413300 }, { "epoch": 48.767107126002834, "grad_norm": 0.03965440392494202, "learning_rate": 1.5065521214682188e-07, "loss": 0.0005, "step": 413350 }, { "epoch": 48.77300613496932, "grad_norm": 0.004837429616600275, "learning_rate": 1.492177002556383e-07, "loss": 0.0006, "step": 413400 }, { "epoch": 48.778905143935816, "grad_norm": 0.009812522679567337, "learning_rate": 1.4778706920937634e-07, "loss": 0.0005, "step": 413450 }, { "epoch": 48.78480415290231, "grad_norm": 0.011994519270956516, "learning_rate": 1.4636331920550583e-07, "loss": 0.0005, "step": 413500 }, { "epoch": 48.790703161868805, "grad_norm": 0.006271115969866514, "learning_rate": 1.4494645044053622e-07, "loss": 0.0009, "step": 413550 }, { "epoch": 48.7966021708353, "grad_norm": 0.00727760000154376, "learning_rate": 1.4353646311004444e-07, "loss": 0.0006, "step": 413600 }, { "epoch": 48.802501179801794, "grad_norm": 0.0004021060885861516, "learning_rate": 1.4213335740864143e-07, "loss": 0.0004, "step": 413650 }, { "epoch": 48.80840018876829, "grad_norm": 2.7337282517692074e-05, "learning_rate": 1.4073713353000562e-07, "loss": 0.0007, "step": 413700 }, { "epoch": 48.81429919773478, "grad_norm": 0.0011226859642192721, "learning_rate": 1.3934779166684953e-07, "loss": 0.0006, "step": 413750 }, { "epoch": 48.82019820670127, "grad_norm": 0.0001842749770730734, "learning_rate": 1.3796533201094752e-07, "loss": 0.0004, "step": 413800 }, { "epoch": 48.826097215667765, "grad_norm": 0.00580819183960557, "learning_rate": 1.3658975475311363e-07, "loss": 0.0008, "step": 413850 }, { "epoch": 48.83199622463426, "grad_norm": 0.032496411353349686, "learning_rate": 1.3522106008321823e-07, "loss": 0.0003, "step": 413900 }, { "epoch": 48.837895233600754, "grad_norm": 0.06328654289245605, "learning_rate": 1.3385924819018236e-07, "loss": 0.0004, "step": 413950 }, { "epoch": 48.84379424256725, "grad_norm": 0.1636272817850113, "learning_rate": 1.3250431926197792e-07, "loss": 0.0006, "step": 414000 }, { "epoch": 48.84379424256725, "eval_cer": 0.08347676419965576, "eval_loss": 7.646250742254779e-05, "eval_runtime": 2.0539, "eval_samples_per_second": 48.687, "eval_steps_per_second": 1.947, "eval_wer": 0.26, "step": 414000 }, { "epoch": 48.84969325153374, "grad_norm": 0.00641202088445425, "learning_rate": 1.3115627348561643e-07, "loss": 0.0006, "step": 414050 }, { "epoch": 48.85559226050024, "grad_norm": 0.001476761419326067, "learning_rate": 1.298151110471768e-07, "loss": 0.0006, "step": 414100 }, { "epoch": 48.86149126946673, "grad_norm": 0.0005472783814184368, "learning_rate": 1.2848083213176653e-07, "loss": 0.0004, "step": 414150 }, { "epoch": 48.86739027843322, "grad_norm": 0.11366862803697586, "learning_rate": 1.2715343692356607e-07, "loss": 0.0005, "step": 414200 }, { "epoch": 48.873289287399714, "grad_norm": 0.018642233684659004, "learning_rate": 1.2583292560578442e-07, "loss": 0.0005, "step": 414250 }, { "epoch": 48.87918829636621, "grad_norm": 0.0017654963303357363, "learning_rate": 1.2451929836069797e-07, "loss": 0.0003, "step": 414300 }, { "epoch": 48.8850873053327, "grad_norm": 0.12381187081336975, "learning_rate": 1.2321255536962284e-07, "loss": 0.0005, "step": 414350 }, { "epoch": 48.8909863142992, "grad_norm": 0.04478571191430092, "learning_rate": 1.2191269681292584e-07, "loss": 0.0004, "step": 414400 }, { "epoch": 48.89688532326569, "grad_norm": 0.18267956376075745, "learning_rate": 1.2061972287002455e-07, "loss": 0.0004, "step": 414450 }, { "epoch": 48.90278433223219, "grad_norm": 0.0013961527729406953, "learning_rate": 1.1933363371938733e-07, "loss": 0.0003, "step": 414500 }, { "epoch": 48.90868334119868, "grad_norm": 0.10599919408559799, "learning_rate": 1.1805442953853329e-07, "loss": 0.0006, "step": 414550 }, { "epoch": 48.91458235016517, "grad_norm": 0.004633627831935883, "learning_rate": 1.1678211050402677e-07, "loss": 0.0009, "step": 414600 }, { "epoch": 48.920481359131664, "grad_norm": 0.008601036854088306, "learning_rate": 1.1551667679148837e-07, "loss": 0.0009, "step": 414650 }, { "epoch": 48.92638036809816, "grad_norm": 0.0008673184784129262, "learning_rate": 1.1425812857557839e-07, "loss": 0.0006, "step": 414700 }, { "epoch": 48.93227937706465, "grad_norm": 7.263608858920634e-05, "learning_rate": 1.1300646603001896e-07, "loss": 0.0005, "step": 414750 }, { "epoch": 48.93817838603115, "grad_norm": 0.23631854355335236, "learning_rate": 1.1176168932757192e-07, "loss": 0.0008, "step": 414800 }, { "epoch": 48.94407739499764, "grad_norm": 0.30425724387168884, "learning_rate": 1.105237986400498e-07, "loss": 0.0006, "step": 414850 }, { "epoch": 48.949976403964136, "grad_norm": 0.002729305997490883, "learning_rate": 1.0929279413832149e-07, "loss": 0.0006, "step": 414900 }, { "epoch": 48.95587541293063, "grad_norm": 0.14255063235759735, "learning_rate": 1.0806867599230108e-07, "loss": 0.0003, "step": 414950 }, { "epoch": 48.96177442189712, "grad_norm": 0.02592490240931511, "learning_rate": 1.068514443709534e-07, "loss": 0.0004, "step": 415000 }, { "epoch": 48.96177442189712, "eval_cer": 0.08347676419965576, "eval_loss": 7.593550253659487e-05, "eval_runtime": 2.055, "eval_samples_per_second": 48.661, "eval_steps_per_second": 1.946, "eval_wer": 0.26, "step": 415000 }, { "epoch": 48.96767343086361, "grad_norm": 0.00042228822712786496, "learning_rate": 1.056410994422885e-07, "loss": 0.0004, "step": 415050 }, { "epoch": 48.97357243983011, "grad_norm": 0.009984726086258888, "learning_rate": 1.0443764137337275e-07, "loss": 0.0005, "step": 415100 }, { "epoch": 48.9794714487966, "grad_norm": 5.753475852543488e-05, "learning_rate": 1.0324107033031216e-07, "loss": 0.0005, "step": 415150 }, { "epoch": 48.985370457763096, "grad_norm": 0.0002023816341534257, "learning_rate": 1.0205138647826906e-07, "loss": 0.0002, "step": 415200 }, { "epoch": 48.99126946672959, "grad_norm": 0.3319569230079651, "learning_rate": 1.0086858998145654e-07, "loss": 0.0004, "step": 415250 }, { "epoch": 48.997168475696085, "grad_norm": 0.20437169075012207, "learning_rate": 9.969268100313844e-08, "loss": 0.0005, "step": 415300 }, { "epoch": 49.00306748466258, "grad_norm": 0.005713355261832476, "learning_rate": 9.852365970561272e-08, "loss": 0.0002, "step": 415350 }, { "epoch": 49.00896649362907, "grad_norm": 0.007855798117816448, "learning_rate": 9.73615262502503e-08, "loss": 0.0005, "step": 415400 }, { "epoch": 49.01486550259556, "grad_norm": 0.0014335099840536714, "learning_rate": 9.620628079745065e-08, "loss": 0.0011, "step": 415450 }, { "epoch": 49.02076451156206, "grad_norm": 0.042315710335969925, "learning_rate": 9.505792350667509e-08, "loss": 0.0003, "step": 415500 }, { "epoch": 49.02666352052855, "grad_norm": 0.003934670705348253, "learning_rate": 9.391645453643016e-08, "loss": 0.0004, "step": 415550 }, { "epoch": 49.032562529495046, "grad_norm": 0.022115672007203102, "learning_rate": 9.278187404426763e-08, "loss": 0.0003, "step": 415600 }, { "epoch": 49.03846153846154, "grad_norm": 0.1853216588497162, "learning_rate": 9.16541821868011e-08, "loss": 0.0007, "step": 415650 }, { "epoch": 49.044360547428035, "grad_norm": 0.01467698160558939, "learning_rate": 9.053337911967275e-08, "loss": 0.0003, "step": 415700 }, { "epoch": 49.05025955639453, "grad_norm": 0.0008228406659327447, "learning_rate": 8.941946499759213e-08, "loss": 0.0005, "step": 415750 }, { "epoch": 49.05615856536102, "grad_norm": 0.010203052312135696, "learning_rate": 8.831243997431404e-08, "loss": 0.0008, "step": 415800 }, { "epoch": 49.06205757432751, "grad_norm": 0.0004401857149787247, "learning_rate": 8.721230420263293e-08, "loss": 0.0008, "step": 415850 }, { "epoch": 49.067956583294006, "grad_norm": 0.0003421982692088932, "learning_rate": 8.61190578344051e-08, "loss": 0.0004, "step": 415900 }, { "epoch": 49.0738555922605, "grad_norm": 0.007245743181556463, "learning_rate": 8.503270102052651e-08, "loss": 0.0004, "step": 415950 }, { "epoch": 49.079754601226995, "grad_norm": 0.06399574130773544, "learning_rate": 8.395323391094945e-08, "loss": 0.0003, "step": 416000 }, { "epoch": 49.079754601226995, "eval_cer": 0.08347676419965576, "eval_loss": 7.57471498218365e-05, "eval_runtime": 2.0764, "eval_samples_per_second": 48.16, "eval_steps_per_second": 1.926, "eval_wer": 0.26, "step": 416000 }, { "epoch": 49.08565361019349, "grad_norm": 0.030033167451620102, "learning_rate": 8.288065665466582e-08, "loss": 0.0008, "step": 416050 }, { "epoch": 49.091552619159984, "grad_norm": 0.07643922418355942, "learning_rate": 8.181496939972943e-08, "loss": 0.0011, "step": 416100 }, { "epoch": 49.09745162812647, "grad_norm": 0.27305173873901367, "learning_rate": 8.075617229322818e-08, "loss": 0.0005, "step": 416150 }, { "epoch": 49.103350637092966, "grad_norm": 0.0006275794585235417, "learning_rate": 7.970426548131183e-08, "loss": 0.0005, "step": 416200 }, { "epoch": 49.10924964605946, "grad_norm": 9.748250886332244e-05, "learning_rate": 7.865924910916977e-08, "loss": 0.0007, "step": 416250 }, { "epoch": 49.115148655025955, "grad_norm": 0.0004018845793325454, "learning_rate": 7.762112332105331e-08, "loss": 0.0009, "step": 416300 }, { "epoch": 49.12104766399245, "grad_norm": 0.060976509004831314, "learning_rate": 7.658988826024227e-08, "loss": 0.0005, "step": 416350 }, { "epoch": 49.126946672958944, "grad_norm": 0.004640060011297464, "learning_rate": 7.556554406908389e-08, "loss": 0.0009, "step": 416400 }, { "epoch": 49.13284568192544, "grad_norm": 0.0005037415539845824, "learning_rate": 7.454809088896508e-08, "loss": 0.0006, "step": 416450 }, { "epoch": 49.13874469089193, "grad_norm": 0.024594217538833618, "learning_rate": 7.35375288603235e-08, "loss": 0.0003, "step": 416500 }, { "epoch": 49.14464369985842, "grad_norm": 0.13777543604373932, "learning_rate": 7.253385812264757e-08, "loss": 0.0004, "step": 416550 }, { "epoch": 49.150542708824915, "grad_norm": 0.0017333400901407003, "learning_rate": 7.153707881446537e-08, "loss": 0.0005, "step": 416600 }, { "epoch": 49.15644171779141, "grad_norm": 1.2152125236752909e-05, "learning_rate": 7.054719107337237e-08, "loss": 0.0005, "step": 416650 }, { "epoch": 49.162340726757904, "grad_norm": 0.1406897008419037, "learning_rate": 6.95641950359982e-08, "loss": 0.0009, "step": 416700 }, { "epoch": 49.1682397357244, "grad_norm": 0.005326620768755674, "learning_rate": 6.85880908380232e-08, "loss": 0.0004, "step": 416750 }, { "epoch": 49.17413874469089, "grad_norm": 0.0039842803962528706, "learning_rate": 6.761887861417293e-08, "loss": 0.0009, "step": 416800 }, { "epoch": 49.18003775365739, "grad_norm": 0.034177958965301514, "learning_rate": 6.665655849823482e-08, "loss": 0.0008, "step": 416850 }, { "epoch": 49.18593676262388, "grad_norm": 0.004166777711361647, "learning_rate": 6.570113062303041e-08, "loss": 0.0005, "step": 416900 }, { "epoch": 49.19183577159037, "grad_norm": 0.05237885192036629, "learning_rate": 6.475259512044307e-08, "loss": 0.0006, "step": 416950 }, { "epoch": 49.197734780556864, "grad_norm": 0.2825731635093689, "learning_rate": 6.381095212139032e-08, "loss": 0.0005, "step": 417000 }, { "epoch": 49.197734780556864, "eval_cer": 0.08347676419965576, "eval_loss": 7.555844786111265e-05, "eval_runtime": 2.0477, "eval_samples_per_second": 48.835, "eval_steps_per_second": 1.953, "eval_wer": 0.26, "step": 417000 }, { "epoch": 49.20363378952336, "grad_norm": 0.0001901771902339533, "learning_rate": 6.287620175585152e-08, "loss": 0.0003, "step": 417050 }, { "epoch": 49.20953279848985, "grad_norm": 0.0042240554466843605, "learning_rate": 6.194834415284568e-08, "loss": 0.0005, "step": 417100 }, { "epoch": 49.21543180745635, "grad_norm": 0.005294712260365486, "learning_rate": 6.102737944044811e-08, "loss": 0.0006, "step": 417150 }, { "epoch": 49.22133081642284, "grad_norm": 0.00023482386313844472, "learning_rate": 6.011330774577384e-08, "loss": 0.0003, "step": 417200 }, { "epoch": 49.22722982538934, "grad_norm": 0.03158803656697273, "learning_rate": 5.920612919499413e-08, "loss": 0.0005, "step": 417250 }, { "epoch": 49.23312883435583, "grad_norm": 0.0074103036895394325, "learning_rate": 5.83058439133255e-08, "loss": 0.0004, "step": 417300 }, { "epoch": 49.23902784332232, "grad_norm": 0.02583913318812847, "learning_rate": 5.74124520250352e-08, "loss": 0.0007, "step": 417350 }, { "epoch": 49.24492685228881, "grad_norm": 0.05303646996617317, "learning_rate": 5.652595365343016e-08, "loss": 0.0004, "step": 417400 }, { "epoch": 49.25082586125531, "grad_norm": 0.05893798917531967, "learning_rate": 5.5646348920879165e-08, "loss": 0.0007, "step": 417450 }, { "epoch": 49.2567248702218, "grad_norm": 0.2373729795217514, "learning_rate": 5.4773637948790646e-08, "loss": 0.0004, "step": 417500 }, { "epoch": 49.2626238791883, "grad_norm": 0.0285467691719532, "learning_rate": 5.39078208576238e-08, "loss": 0.0005, "step": 417550 }, { "epoch": 49.26852288815479, "grad_norm": 0.0014066204894334078, "learning_rate": 5.304889776688859e-08, "loss": 0.0004, "step": 417600 }, { "epoch": 49.274421897121286, "grad_norm": 0.0020605805329978466, "learning_rate": 5.219686879514019e-08, "loss": 0.0006, "step": 417650 }, { "epoch": 49.28032090608778, "grad_norm": 0.037081919610500336, "learning_rate": 5.135173405997895e-08, "loss": 0.0008, "step": 417700 }, { "epoch": 49.28621991505427, "grad_norm": 0.003928342368453741, "learning_rate": 5.051349367806157e-08, "loss": 0.0004, "step": 417750 }, { "epoch": 49.29211892402076, "grad_norm": 0.0005015079514123499, "learning_rate": 4.9682147765089946e-08, "loss": 0.0008, "step": 417800 }, { "epoch": 49.29801793298726, "grad_norm": 0.0018291808664798737, "learning_rate": 4.8857696435816724e-08, "loss": 0.0004, "step": 417850 }, { "epoch": 49.30391694195375, "grad_norm": 0.6299231052398682, "learning_rate": 4.804013980402866e-08, "loss": 0.0008, "step": 417900 }, { "epoch": 49.309815950920246, "grad_norm": 0.0006088523659855127, "learning_rate": 4.722947798258548e-08, "loss": 0.0006, "step": 417950 }, { "epoch": 49.31571495988674, "grad_norm": 0.0005604173056781292, "learning_rate": 4.642571108337546e-08, "loss": 0.0005, "step": 418000 }, { "epoch": 49.31571495988674, "eval_cer": 0.08347676419965576, "eval_loss": 7.538218778790906e-05, "eval_runtime": 2.0727, "eval_samples_per_second": 48.247, "eval_steps_per_second": 1.93, "eval_wer": 0.26, "step": 418000 }, { "epoch": 49.321613968853235, "grad_norm": 0.01563143916428089, "learning_rate": 4.5628839217337626e-08, "loss": 0.0006, "step": 418050 }, { "epoch": 49.32751297781973, "grad_norm": 0.0017408902058377862, "learning_rate": 4.483886249447289e-08, "loss": 0.0008, "step": 418100 }, { "epoch": 49.33341198678622, "grad_norm": 0.0008327718242071569, "learning_rate": 4.405578102381069e-08, "loss": 0.0005, "step": 418150 }, { "epoch": 49.33931099575271, "grad_norm": 0.0191559549421072, "learning_rate": 4.3279594913447905e-08, "loss": 0.0003, "step": 418200 }, { "epoch": 49.345210004719206, "grad_norm": 0.16153286397457123, "learning_rate": 4.25103042705155e-08, "loss": 0.0006, "step": 418250 }, { "epoch": 49.3511090136857, "grad_norm": 0.1855524480342865, "learning_rate": 4.17479092011952e-08, "loss": 0.0009, "step": 418300 }, { "epoch": 49.357008022652195, "grad_norm": 0.054585181176662445, "learning_rate": 4.0992409810725054e-08, "loss": 0.0006, "step": 418350 }, { "epoch": 49.36290703161869, "grad_norm": 0.006370527669787407, "learning_rate": 4.0243806203388305e-08, "loss": 0.0003, "step": 418400 }, { "epoch": 49.368806040585184, "grad_norm": 0.15621598064899445, "learning_rate": 3.95020984825023e-08, "loss": 0.0006, "step": 418450 }, { "epoch": 49.37470504955168, "grad_norm": 0.001195842050947249, "learning_rate": 3.876728675045738e-08, "loss": 0.0003, "step": 418500 }, { "epoch": 49.380604058518166, "grad_norm": 0.0005837788339704275, "learning_rate": 3.8039371108666846e-08, "loss": 0.0005, "step": 418550 }, { "epoch": 49.38650306748466, "grad_norm": 0.004550235345959663, "learning_rate": 3.731835165761699e-08, "loss": 0.0007, "step": 418600 }, { "epoch": 49.392402076451155, "grad_norm": 0.08952528983354568, "learning_rate": 3.6604228496817105e-08, "loss": 0.0006, "step": 418650 }, { "epoch": 49.39830108541765, "grad_norm": 0.0037430247757583857, "learning_rate": 3.5897001724838344e-08, "loss": 0.0007, "step": 418700 }, { "epoch": 49.404200094384144, "grad_norm": 0.007650204934179783, "learning_rate": 3.519667143930816e-08, "loss": 0.0006, "step": 418750 }, { "epoch": 49.41009910335064, "grad_norm": 0.059633560478687286, "learning_rate": 3.4503237736882573e-08, "loss": 0.0008, "step": 418800 }, { "epoch": 49.41599811231713, "grad_norm": 9.815617522690445e-05, "learning_rate": 3.381670071327392e-08, "loss": 0.0011, "step": 418850 }, { "epoch": 49.42189712128362, "grad_norm": 3.5841407225234434e-05, "learning_rate": 3.3137060463256374e-08, "loss": 0.0003, "step": 418900 }, { "epoch": 49.427796130250115, "grad_norm": 0.0076682656072080135, "learning_rate": 3.246431708062714e-08, "loss": 0.0005, "step": 418950 }, { "epoch": 49.43369513921661, "grad_norm": 0.004445759579539299, "learning_rate": 3.179847065825081e-08, "loss": 0.0005, "step": 419000 }, { "epoch": 49.43369513921661, "eval_cer": 0.08347676419965576, "eval_loss": 7.539572106907144e-05, "eval_runtime": 2.0718, "eval_samples_per_second": 48.268, "eval_steps_per_second": 1.931, "eval_wer": 0.26, "step": 419000 }, { "epoch": 49.439594148183104, "grad_norm": 0.0011972383363172412, "learning_rate": 3.113952128803721e-08, "loss": 0.0005, "step": 419050 }, { "epoch": 49.4454931571496, "grad_norm": 0.0005843276157975197, "learning_rate": 3.048746906093025e-08, "loss": 0.0005, "step": 419100 }, { "epoch": 49.451392166116094, "grad_norm": 0.00023377517936751246, "learning_rate": 2.984231406693572e-08, "loss": 0.0005, "step": 419150 }, { "epoch": 49.45729117508259, "grad_norm": 0.000999730546027422, "learning_rate": 2.9204056395104596e-08, "loss": 0.0007, "step": 419200 }, { "epoch": 49.46319018404908, "grad_norm": 0.0005787918926216662, "learning_rate": 2.8572696133538635e-08, "loss": 0.0006, "step": 419250 }, { "epoch": 49.46908919301557, "grad_norm": 0.006501109804958105, "learning_rate": 2.794823336937924e-08, "loss": 0.0005, "step": 419300 }, { "epoch": 49.474988201982065, "grad_norm": 0.07590062916278839, "learning_rate": 2.7330668188818575e-08, "loss": 0.0007, "step": 419350 }, { "epoch": 49.48088721094856, "grad_norm": 0.0020696392748504877, "learning_rate": 2.6720000677099567e-08, "loss": 0.0007, "step": 419400 }, { "epoch": 49.486786219915054, "grad_norm": 0.00017109738837461919, "learning_rate": 2.6116230918515895e-08, "loss": 0.0006, "step": 419450 }, { "epoch": 49.49268522888155, "grad_norm": 0.010812696069478989, "learning_rate": 2.5519358996400898e-08, "loss": 0.0005, "step": 419500 }, { "epoch": 49.49858423784804, "grad_norm": 0.02557627111673355, "learning_rate": 2.4929384993144233e-08, "loss": 0.0005, "step": 419550 }, { "epoch": 49.50448324681454, "grad_norm": 0.04865173622965813, "learning_rate": 2.4346308990175203e-08, "loss": 0.0007, "step": 419600 }, { "epoch": 49.51038225578103, "grad_norm": 0.0024546361528337, "learning_rate": 2.377013106797943e-08, "loss": 0.0003, "step": 419650 }, { "epoch": 49.51628126474752, "grad_norm": 0.01200852356851101, "learning_rate": 2.3200851306082184e-08, "loss": 0.0006, "step": 419700 }, { "epoch": 49.522180273714014, "grad_norm": 0.03309919685125351, "learning_rate": 2.26384697830595e-08, "loss": 0.0005, "step": 419750 }, { "epoch": 49.52807928268051, "grad_norm": 0.0006669036229141057, "learning_rate": 2.208298657653818e-08, "loss": 0.0005, "step": 419800 }, { "epoch": 49.533978291647, "grad_norm": 0.26941242814064026, "learning_rate": 2.1534401763195765e-08, "loss": 0.0005, "step": 419850 }, { "epoch": 49.5398773006135, "grad_norm": 0.001346247154287994, "learning_rate": 2.0992715418743926e-08, "loss": 0.0006, "step": 419900 }, { "epoch": 49.54577630957999, "grad_norm": 0.000770066340919584, "learning_rate": 2.0457927617956176e-08, "loss": 0.0006, "step": 419950 }, { "epoch": 49.551675318546486, "grad_norm": 0.0573977492749691, "learning_rate": 1.9930038434645692e-08, "loss": 0.0005, "step": 420000 }, { "epoch": 49.551675318546486, "eval_cer": 0.08347676419965576, "eval_loss": 7.54550492274575e-05, "eval_runtime": 2.0435, "eval_samples_per_second": 48.935, "eval_steps_per_second": 1.957, "eval_wer": 0.26, "step": 420000 }, { "epoch": 49.55757432751298, "grad_norm": 0.08284132927656174, "learning_rate": 1.9409047941681968e-08, "loss": 0.0007, "step": 420050 }, { "epoch": 49.56347333647947, "grad_norm": 0.01058940403163433, "learning_rate": 1.8894956210974146e-08, "loss": 0.0008, "step": 420100 }, { "epoch": 49.56937234544596, "grad_norm": 0.0036124857142567635, "learning_rate": 1.838776331347658e-08, "loss": 0.0011, "step": 420150 }, { "epoch": 49.57527135441246, "grad_norm": 0.024712003767490387, "learning_rate": 1.7887469319205485e-08, "loss": 0.0007, "step": 420200 }, { "epoch": 49.58117036337895, "grad_norm": 0.13153326511383057, "learning_rate": 1.739407429720563e-08, "loss": 0.0007, "step": 420250 }, { "epoch": 49.58706937234545, "grad_norm": 0.0074769179336726665, "learning_rate": 1.6907578315589202e-08, "loss": 0.0005, "step": 420300 }, { "epoch": 49.59296838131194, "grad_norm": 0.00011508304305607453, "learning_rate": 1.642798144150248e-08, "loss": 0.0008, "step": 420350 }, { "epoch": 49.598867390278436, "grad_norm": 0.3957825303077698, "learning_rate": 1.5955283741142523e-08, "loss": 0.0004, "step": 420400 }, { "epoch": 49.60476639924493, "grad_norm": 0.001039958093315363, "learning_rate": 1.548948527975158e-08, "loss": 0.0005, "step": 420450 }, { "epoch": 49.61066540821142, "grad_norm": 0.00474273506551981, "learning_rate": 1.503058612163377e-08, "loss": 0.0004, "step": 420500 }, { "epoch": 49.61656441717791, "grad_norm": 0.0044846972450613976, "learning_rate": 1.4578586330127319e-08, "loss": 0.0003, "step": 420550 }, { "epoch": 49.62246342614441, "grad_norm": 0.0013838200829923153, "learning_rate": 1.4133485967615657e-08, "loss": 0.0002, "step": 420600 }, { "epoch": 49.6283624351109, "grad_norm": 0.0007263546576723456, "learning_rate": 1.3695285095538523e-08, "loss": 0.0003, "step": 420650 }, { "epoch": 49.634261444077396, "grad_norm": 0.0011951547348871827, "learning_rate": 1.3263983774380873e-08, "loss": 0.0003, "step": 420700 }, { "epoch": 49.64016045304389, "grad_norm": 0.02542712353169918, "learning_rate": 1.2839582063672862e-08, "loss": 0.0004, "step": 420750 }, { "epoch": 49.646059462010385, "grad_norm": 0.0006233283784240484, "learning_rate": 1.2422080021995408e-08, "loss": 0.0005, "step": 420800 }, { "epoch": 49.65195847097688, "grad_norm": 8.120842539938167e-05, "learning_rate": 1.2011477706974639e-08, "loss": 0.0003, "step": 420850 }, { "epoch": 49.65785747994337, "grad_norm": 0.05971002206206322, "learning_rate": 1.1607775175287438e-08, "loss": 0.001, "step": 420900 }, { "epoch": 49.66375648890986, "grad_norm": 0.07958637177944183, "learning_rate": 1.1210972482655902e-08, "loss": 0.001, "step": 420950 }, { "epoch": 49.669655497876356, "grad_norm": 0.0006789467297494411, "learning_rate": 1.082106968385288e-08, "loss": 0.0006, "step": 421000 }, { "epoch": 49.669655497876356, "eval_cer": 0.08347676419965576, "eval_loss": 7.542281673522666e-05, "eval_runtime": 2.1203, "eval_samples_per_second": 47.162, "eval_steps_per_second": 1.886, "eval_wer": 0.26, "step": 421000 }, { "epoch": 49.67555450684285, "grad_norm": 0.03390537202358246, "learning_rate": 1.0438066832690885e-08, "loss": 0.0004, "step": 421050 }, { "epoch": 49.681453515809345, "grad_norm": 0.0034110918641090393, "learning_rate": 1.0061963982038735e-08, "loss": 0.0008, "step": 421100 }, { "epoch": 49.68735252477584, "grad_norm": 0.06125304847955704, "learning_rate": 9.692761183810462e-09, "loss": 0.0005, "step": 421150 }, { "epoch": 49.693251533742334, "grad_norm": 0.030881114304065704, "learning_rate": 9.330458488959748e-09, "loss": 0.0006, "step": 421200 }, { "epoch": 49.69915054270882, "grad_norm": 0.0015352725749835372, "learning_rate": 8.975055947502143e-09, "loss": 0.0005, "step": 421250 }, { "epoch": 49.705049551675316, "grad_norm": 0.00407454976812005, "learning_rate": 8.626553608492849e-09, "loss": 0.0007, "step": 421300 }, { "epoch": 49.71094856064181, "grad_norm": 0.0459156334400177, "learning_rate": 8.28495152003228e-09, "loss": 0.0003, "step": 421350 }, { "epoch": 49.716847569608305, "grad_norm": 3.26529516314622e-05, "learning_rate": 7.950249729271608e-09, "loss": 0.0006, "step": 421400 }, { "epoch": 49.7227465785748, "grad_norm": 0.016105152666568756, "learning_rate": 7.622448282412765e-09, "loss": 0.0005, "step": 421450 }, { "epoch": 49.728645587541294, "grad_norm": 0.01196216605603695, "learning_rate": 7.301547224697336e-09, "loss": 0.0003, "step": 421500 }, { "epoch": 49.73454459650779, "grad_norm": 0.0033947941847145557, "learning_rate": 6.987546600417671e-09, "loss": 0.0007, "step": 421550 }, { "epoch": 49.74044360547428, "grad_norm": 0.0012588825775310397, "learning_rate": 6.680446452922428e-09, "loss": 0.0006, "step": 421600 }, { "epoch": 49.74634261444077, "grad_norm": 0.0033959203865379095, "learning_rate": 6.380246824594372e-09, "loss": 0.0007, "step": 421650 }, { "epoch": 49.752241623407265, "grad_norm": 0.001727660303004086, "learning_rate": 6.086947756872574e-09, "loss": 0.0007, "step": 421700 }, { "epoch": 49.75814063237376, "grad_norm": 0.060117822140455246, "learning_rate": 5.80054929024132e-09, "loss": 0.0007, "step": 421750 }, { "epoch": 49.764039641340254, "grad_norm": 0.08742111921310425, "learning_rate": 5.521051464230098e-09, "loss": 0.0003, "step": 421800 }, { "epoch": 49.76993865030675, "grad_norm": 0.0029761940240859985, "learning_rate": 5.2484543174136095e-09, "loss": 0.0008, "step": 421850 }, { "epoch": 49.77583765927324, "grad_norm": 0.00024322271929122508, "learning_rate": 4.982757887422862e-09, "loss": 0.0005, "step": 421900 }, { "epoch": 49.78173666823974, "grad_norm": 0.016622144728899002, "learning_rate": 4.723962210934074e-09, "loss": 0.0007, "step": 421950 }, { "epoch": 49.78763567720623, "grad_norm": 0.26689812541007996, "learning_rate": 4.4720673236631204e-09, "loss": 0.0004, "step": 422000 }, { "epoch": 49.78763567720623, "eval_cer": 0.08347676419965576, "eval_loss": 7.545654807472602e-05, "eval_runtime": 2.0834, "eval_samples_per_second": 48.0, "eval_steps_per_second": 1.92, "eval_wer": 0.26, "step": 422000 }, { "epoch": 49.79353468617272, "grad_norm": 0.0003045262710656971, "learning_rate": 4.227073260382186e-09, "loss": 0.0004, "step": 422050 }, { "epoch": 49.799433695139214, "grad_norm": 0.013733979314565659, "learning_rate": 3.988980054903113e-09, "loss": 0.0004, "step": 422100 }, { "epoch": 49.80533270410571, "grad_norm": 0.06126213073730469, "learning_rate": 3.7577877400940545e-09, "loss": 0.0011, "step": 422150 }, { "epoch": 49.8112317130722, "grad_norm": 0.000759149610530585, "learning_rate": 3.5334963478683704e-09, "loss": 0.0007, "step": 422200 }, { "epoch": 49.8171307220387, "grad_norm": 0.0035799795296043158, "learning_rate": 3.3161059091790793e-09, "loss": 0.0007, "step": 422250 }, { "epoch": 49.82302973100519, "grad_norm": 0.0008849373552948236, "learning_rate": 3.1056164540355094e-09, "loss": 0.0004, "step": 422300 }, { "epoch": 49.82892873997169, "grad_norm": 0.03855409100651741, "learning_rate": 2.902028011486646e-09, "loss": 0.0008, "step": 422350 }, { "epoch": 49.83482774893818, "grad_norm": 0.12472325563430786, "learning_rate": 2.7053406096433364e-09, "loss": 0.0004, "step": 422400 }, { "epoch": 49.84072675790467, "grad_norm": 0.0017989615444093943, "learning_rate": 2.5155542756449823e-09, "loss": 0.0006, "step": 422450 }, { "epoch": 49.84662576687116, "grad_norm": 0.11063988506793976, "learning_rate": 2.332669035687296e-09, "loss": 0.0009, "step": 422500 }, { "epoch": 49.85252477583766, "grad_norm": 0.004342131782323122, "learning_rate": 2.1566849150223e-09, "loss": 0.0001, "step": 422550 }, { "epoch": 49.85842378480415, "grad_norm": 0.08853607624769211, "learning_rate": 1.987601937930572e-09, "loss": 0.0006, "step": 422600 }, { "epoch": 49.86432279377065, "grad_norm": 0.008752568624913692, "learning_rate": 1.8254201277601025e-09, "loss": 0.0006, "step": 422650 }, { "epoch": 49.87022180273714, "grad_norm": 0.0006558979512192309, "learning_rate": 1.670139506892987e-09, "loss": 0.0005, "step": 422700 }, { "epoch": 49.876120811703636, "grad_norm": 0.02426406741142273, "learning_rate": 1.5217600967565305e-09, "loss": 0.0004, "step": 422750 }, { "epoch": 49.88201982067013, "grad_norm": 0.10635874420404434, "learning_rate": 1.3802819178398985e-09, "loss": 0.0007, "step": 422800 }, { "epoch": 49.88791882963662, "grad_norm": 0.025806760415434837, "learning_rate": 1.2457049896663632e-09, "loss": 0.0004, "step": 422850 }, { "epoch": 49.89381783860311, "grad_norm": 0.00840833317488432, "learning_rate": 1.1180293308099554e-09, "loss": 0.0005, "step": 422900 }, { "epoch": 49.89971684756961, "grad_norm": 0.015891149640083313, "learning_rate": 9.972549589010172e-10, "loss": 0.0003, "step": 422950 }, { "epoch": 49.9056158565361, "grad_norm": 0.0005708543467335403, "learning_rate": 8.833818906039959e-10, "loss": 0.0007, "step": 423000 }, { "epoch": 49.9056158565361, "eval_cer": 0.08347676419965576, "eval_loss": 7.5459000072442e-05, "eval_runtime": 2.0537, "eval_samples_per_second": 48.692, "eval_steps_per_second": 1.948, "eval_wer": 0.26, "step": 423000 }, { "epoch": 49.911514865502596, "grad_norm": 0.07168089598417282, "learning_rate": 7.764101416340985e-10, "loss": 0.0003, "step": 423050 }, { "epoch": 49.91741387446909, "grad_norm": 0.09722711890935898, "learning_rate": 6.763397267628424e-10, "loss": 0.0007, "step": 423100 }, { "epoch": 49.923312883435585, "grad_norm": 0.0083692641928792, "learning_rate": 5.831706598014019e-10, "loss": 0.0005, "step": 423150 }, { "epoch": 49.92921189240208, "grad_norm": 0.0010149440495297313, "learning_rate": 4.969029536061598e-10, "loss": 0.0005, "step": 423200 }, { "epoch": 49.93511090136857, "grad_norm": 0.05050593242049217, "learning_rate": 4.17536620089809e-10, "loss": 0.0009, "step": 423250 }, { "epoch": 49.94100991033506, "grad_norm": 0.07773095369338989, "learning_rate": 3.4507167020469967e-10, "loss": 0.0004, "step": 423300 }, { "epoch": 49.946908919301556, "grad_norm": 0.3872154653072357, "learning_rate": 2.7950811394839017e-10, "loss": 0.0012, "step": 423350 }, { "epoch": 49.95280792826805, "grad_norm": 0.04908178746700287, "learning_rate": 2.2084596038030036e-10, "loss": 0.0007, "step": 423400 }, { "epoch": 49.958706937234545, "grad_norm": 0.0987749695777893, "learning_rate": 1.69085217588405e-10, "loss": 0.0004, "step": 423450 }, { "epoch": 49.96460594620104, "grad_norm": 0.004623358137905598, "learning_rate": 1.2422589272254038e-10, "loss": 0.0005, "step": 423500 }, { "epoch": 49.970504955167534, "grad_norm": 0.00019995598995592445, "learning_rate": 8.626799197219981e-11, "loss": 0.0007, "step": 423550 }, { "epoch": 49.97640396413403, "grad_norm": 0.019338928163051605, "learning_rate": 5.521152057763601e-11, "loss": 0.0005, "step": 423600 }, { "epoch": 49.982302973100516, "grad_norm": 0.09547653794288635, "learning_rate": 3.105648282430984e-11, "loss": 0.0004, "step": 423650 }, { "epoch": 49.98820198206701, "grad_norm": 0.0816146731376648, "learning_rate": 1.380288204844149e-11, "loss": 0.0004, "step": 423700 }, { "epoch": 49.994100991033505, "grad_norm": 0.01624957099556923, "learning_rate": 3.4507206314593476e-12, "loss": 0.0005, "step": 423750 }, { "epoch": 50.0, "grad_norm": 0.1923017054796219, "learning_rate": 0.0, "loss": 0.0011, "step": 423800 } ], "logging_steps": 50, "max_steps": 423800, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.781753702511616e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }