| {"loss": 4.8244, "grad_norm": 38.99475860595703, "learning_rate": 3.0625e-06, "epoch": 0.06949270326615706, "step": 50} |
| {"loss": 3.1741, "grad_norm": 28.659194946289062, "learning_rate": 6.1875000000000005e-06, "epoch": 0.13898540653231412, "step": 100} |
| {"loss": 2.3217, "grad_norm": 9.76420783996582, "learning_rate": 9.312500000000001e-06, "epoch": 0.20847810979847117, "step": 150} |
| {"loss": 1.9903, "grad_norm": 8.092910766601562, "learning_rate": 1.24375e-05, "epoch": 0.27797081306462823, "step": 200} |
| {"loss": 1.8289, "grad_norm": 7.155191898345947, "learning_rate": 1.5562500000000002e-05, "epoch": 0.34746351633078526, "step": 250} |
| {"loss": 1.7227, "grad_norm": 7.600743293762207, "learning_rate": 1.8687500000000004e-05, "epoch": 0.41695621959694235, "step": 300} |
| {"loss": 1.6408, "grad_norm": 6.648166656494141, "learning_rate": 2.18125e-05, "epoch": 0.4864489228630994, "step": 350} |
| {"loss": 1.5723, "grad_norm": 7.106318950653076, "learning_rate": 2.4937500000000003e-05, "epoch": 0.5559416261292565, "step": 400} |
| {"loss": 1.5171, "grad_norm": 5.8243608474731445, "learning_rate": 2.80625e-05, "epoch": 0.6254343293954134, "step": 450} |
| {"loss": 1.4672, "grad_norm": 5.86684513092041, "learning_rate": 3.1187500000000006e-05, "epoch": 0.6949270326615705, "step": 500} |
| {"loss": 1.4244, "grad_norm": 5.580212593078613, "learning_rate": 3.43125e-05, "epoch": 0.7644197359277276, "step": 550} |
| {"loss": 1.3901, "grad_norm": 5.865068435668945, "learning_rate": 3.74375e-05, "epoch": 0.8339124391938847, "step": 600} |
| {"loss": 1.3578, "grad_norm": 5.812740325927734, "learning_rate": 4.0562500000000003e-05, "epoch": 0.9034051424600417, "step": 650} |
| {"loss": 1.3267, "grad_norm": 5.176089763641357, "learning_rate": 4.3687500000000005e-05, "epoch": 0.9728978457261988, "step": 700} |
| {"loss": 1.353, "grad_norm": 2.073694944381714, "learning_rate": 4.6812500000000006e-05, "epoch": 1.0416956219596942, "step": 750} |
| {"loss": 1.2614, "grad_norm": 1.838789463043213, "learning_rate": 4.99375e-05, "epoch": 1.1111883252258512, "step": 800} |
| {"loss": 1.2389, "grad_norm": 1.992063045501709, "learning_rate": 4.998796063157356e-05, "epoch": 1.1806810284920084, "step": 850} |
| {"loss": 1.2231, "grad_norm": 1.887824535369873, "learning_rate": 4.995086686357275e-05, "epoch": 1.2501737317581654, "step": 900} |
| {"loss": 1.2128, "grad_norm": 1.7871160507202148, "learning_rate": 4.9888750881119504e-05, "epoch": 1.3196664350243226, "step": 950} |
| {"loss": 1.1887, "grad_norm": 1.6327871084213257, "learning_rate": 4.98016749778184e-05, "epoch": 1.3891591382904795, "step": 1000} |
| {"eval_loss": 1.0391631126403809, "eval_wer": 3.7607, "eval_cer": 3.579, "eval_runtime": 625.5759, "eval_samples_per_second": 43.438, "eval_steps_per_second": 0.906, "epoch": 1.3891591382904795, "step": 1000} |
| {"loss": 1.1792, "grad_norm": 1.7504212856292725, "learning_rate": 4.9689726478568314e-05, "epoch": 1.4586518415566365, "step": 1050} |
| {"loss": 1.1657, "grad_norm": 1.5947614908218384, "learning_rate": 4.9553017651987854e-05, "epoch": 1.5281445448227937, "step": 1100} |
| {"loss": 1.1528, "grad_norm": 1.5509134531021118, "learning_rate": 4.93916855978257e-05, "epoch": 1.5976372480889507, "step": 1150} |
| {"loss": 1.1482, "grad_norm": 1.465322732925415, "learning_rate": 4.9205892109468804e-05, "epoch": 1.6671299513551077, "step": 1200} |
| {"loss": 1.1303, "grad_norm": 1.5529003143310547, "learning_rate": 4.899582351168636e-05, "epoch": 1.7366226546212649, "step": 1250} |
| {"loss": 1.1284, "grad_norm": 1.6809085607528687, "learning_rate": 4.8761690473772236e-05, "epoch": 1.8061153578874218, "step": 1300} |
| {"loss": 1.114, "grad_norm": 1.595049262046814, "learning_rate": 4.8503727798273247e-05, "epoch": 1.8756080611535788, "step": 1350} |
| {"loss": 1.1116, "grad_norm": 1.65565824508667, "learning_rate": 4.822219418551522e-05, "epoch": 1.945100764419736, "step": 1400} |
| {"loss": 1.0901, "grad_norm": 1.9717499017715454, "learning_rate": 4.7917371974162886e-05, "epoch": 2.0138985406532313, "step": 1450} |
| {"loss": 1.0709, "grad_norm": 1.9328181743621826, "learning_rate": 4.7589566858073855e-05, "epoch": 2.0833912439193885, "step": 1500} |
| {"loss": 1.067, "grad_norm": 1.8737449645996094, "learning_rate": 4.723910757973061e-05, "epoch": 2.1528839471855457, "step": 1550} |
| {"loss": 1.0625, "grad_norm": 1.8878850936889648, "learning_rate": 4.686634560055805e-05, "epoch": 2.2223766504517024, "step": 1600} |
| {"loss": 1.0606, "grad_norm": 1.877172589302063, "learning_rate": 4.647165474845695e-05, "epoch": 2.2918693537178596, "step": 1650} |
| {"loss": 1.0531, "grad_norm": 1.895273208618164, "learning_rate": 4.6055430842907167e-05, "epoch": 2.361362056984017, "step": 1700} |
| {"loss": 1.0556, "grad_norm": 1.8738126754760742, "learning_rate": 4.5618091298016255e-05, "epoch": 2.4308547602501736, "step": 1750} |
| {"loss": 1.046, "grad_norm": 1.8704030513763428, "learning_rate": 4.5160074703911806e-05, "epoch": 2.5003474635163307, "step": 1800} |
| {"loss": 1.0384, "grad_norm": 1.7708660364151, "learning_rate": 4.46818403868971e-05, "epoch": 2.569840166782488, "step": 1850} |
| {"loss": 1.0393, "grad_norm": 1.9206907749176025, "learning_rate": 4.41838679488114e-05, "epoch": 2.639332870048645, "step": 1900} |
| {"loss": 1.0364, "grad_norm": 1.8969734907150269, "learning_rate": 4.366665678605666e-05, "epoch": 2.708825573314802, "step": 1950} |
| {"loss": 1.0278, "grad_norm": 1.6385365724563599, "learning_rate": 4.313072558877308e-05, "epoch": 2.778318276580959, "step": 2000} |
| {"eval_loss": 0.9487738013267517, "eval_wer": 5.3681, "eval_cer": 4.9045, "eval_runtime": 578.0265, "eval_samples_per_second": 47.012, "eval_steps_per_second": 0.981, "epoch": 2.778318276580959, "step": 2000} |
| {"loss": 1.0258, "grad_norm": 1.7948607206344604, "learning_rate": 4.2576611820665765e-05, "epoch": 2.8478109798471163, "step": 2050} |
| {"loss": 1.0266, "grad_norm": 1.890069603919983, "learning_rate": 4.200487118000413e-05, "epoch": 2.917303683113273, "step": 2100} |
| {"loss": 1.0121, "grad_norm": 1.8889206647872925, "learning_rate": 4.141607704233462e-05, "epoch": 2.9867963863794302, "step": 2150} |
| {"loss": 1.0039, "grad_norm": 1.27235746383667, "learning_rate": 4.081081988546555e-05, "epoch": 3.0555941626129255, "step": 2200} |
| {"loss": 0.9971, "grad_norm": 1.2282471656799316, "learning_rate": 4.01897066973008e-05, "epoch": 3.1250868658790827, "step": 2250} |
| {"loss": 0.9935, "grad_norm": 1.3210399150848389, "learning_rate": 3.9553360367116274e-05, "epoch": 3.19457956914524, "step": 2300} |
| {"loss": 0.9931, "grad_norm": 1.116134762763977, "learning_rate": 3.890241906088941e-05, "epoch": 3.2640722724113966, "step": 2350} |
| {"loss": 0.9948, "grad_norm": 1.3665287494659424, "learning_rate": 3.823753558130836e-05, "epoch": 3.333564975677554, "step": 2400} |
| {"loss": 0.9902, "grad_norm": 1.1243659257888794, "learning_rate": 3.755937671310261e-05, "epoch": 3.403057678943711, "step": 2450} |
| {"loss": 0.99, "grad_norm": 1.3323463201522827, "learning_rate": 3.686862255435154e-05, "epoch": 3.472550382209868, "step": 2500} |
| {"loss": 0.9885, "grad_norm": 1.1714366674423218, "learning_rate": 3.6165965834441636e-05, "epoch": 3.542043085476025, "step": 2550} |
| {"loss": 0.982, "grad_norm": 1.0881036520004272, "learning_rate": 3.5452111219356145e-05, "epoch": 3.611535788742182, "step": 2600} |
| {"loss": 0.9761, "grad_norm": 1.0824658870697021, "learning_rate": 3.472777460499419e-05, "epoch": 3.6810284920083394, "step": 2650} |
| {"loss": 0.9785, "grad_norm": 1.2313238382339478, "learning_rate": 3.399368239922766e-05, "epoch": 3.750521195274496, "step": 2700} |
| {"loss": 0.978, "grad_norm": 1.2594448328018188, "learning_rate": 3.325057079341618e-05, "epoch": 3.8200138985406533, "step": 2750} |
| {"loss": 0.9688, "grad_norm": 1.0986511707305908, "learning_rate": 3.249918502411065e-05, "epoch": 3.8895066018068105, "step": 2800} |
| {"loss": 0.9743, "grad_norm": 1.0293147563934326, "learning_rate": 3.174027862568559e-05, "epoch": 3.9589993050729673, "step": 2850} |
| {"loss": 0.9917, "grad_norm": 1.596439003944397, "learning_rate": 3.097461267465005e-05, "epoch": 4.0277970813064625, "step": 2900} |
| {"loss": 0.9556, "grad_norm": 1.5343629121780396, "learning_rate": 3.020295502639483e-05, "epoch": 4.09728978457262, "step": 2950} |
| {"loss": 0.9583, "grad_norm": 1.4187082052230835, "learning_rate": 2.9426079545141316e-05, "epoch": 4.166782487838777, "step": 3000} |
| {"eval_loss": 0.9133290648460388, "eval_wer": 5.5577, "eval_cer": 4.9862, "eval_runtime": 577.1773, "eval_samples_per_second": 47.081, "eval_steps_per_second": 0.982, "epoch": 4.166782487838777, "step": 3000} |
| {"loss": 0.951, "grad_norm": 1.5351015329360962, "learning_rate": 2.8644765327864416e-05, "epoch": 4.236275191104934, "step": 3050} |
| {"loss": 0.9519, "grad_norm": 1.585117220878601, "learning_rate": 2.7859795922967737e-05, "epoch": 4.305767894371091, "step": 3100} |
| {"loss": 0.9496, "grad_norm": 1.3467897176742554, "learning_rate": 2.707195854449458e-05, "epoch": 4.375260597637248, "step": 3150} |
| {"loss": 0.9505, "grad_norm": 1.6137511730194092, "learning_rate": 2.6282043282662888e-05, "epoch": 4.444753300903405, "step": 3200} |
| {"loss": 0.9488, "grad_norm": 1.3600081205368042, "learning_rate": 2.5490842311515707e-05, "epoch": 4.5142460041695625, "step": 3250} |
| {"loss": 0.9476, "grad_norm": 1.4230895042419434, "learning_rate": 2.4699149094481917e-05, "epoch": 4.583738707435719, "step": 3300} |
| {"loss": 0.9422, "grad_norm": 1.559622883796692, "learning_rate": 2.3907757588643857e-05, "epoch": 4.653231410701876, "step": 3350} |
| {"loss": 0.9456, "grad_norm": 1.3852342367172241, "learning_rate": 2.311746144850994e-05, "epoch": 4.722724113968034, "step": 3400} |
| {"loss": 0.9469, "grad_norm": 1.3593952655792236, "learning_rate": 2.2329053230090646e-05, "epoch": 4.79221681723419, "step": 3450} |
| {"loss": 0.9435, "grad_norm": 1.4249401092529297, "learning_rate": 2.1543323596076178e-05, "epoch": 4.861709520500347, "step": 3500} |
| {"loss": 0.9412, "grad_norm": 1.487253189086914, "learning_rate": 2.0761060522912867e-05, "epoch": 4.931202223766505, "step": 3550} |
| {"loss": 0.9065, "grad_norm": 5.93625545501709, "learning_rate": 1.9983048510573473e-05, "epoch": 5.0, "step": 3600} |
| {"loss": 0.93, "grad_norm": 2.594355583190918, "learning_rate": 1.9210067795813933e-05, "epoch": 5.069492703266157, "step": 3650} |
| {"loss": 0.9284, "grad_norm": 2.8298065662384033, "learning_rate": 1.8442893569705477e-05, "epoch": 5.138985406532314, "step": 3700} |
| {"loss": 0.9288, "grad_norm": 2.6053807735443115, "learning_rate": 1.76822952002269e-05, "epoch": 5.208478109798471, "step": 3750} |
| {"loss": 0.9276, "grad_norm": 2.54282546043396, "learning_rate": 1.692903546069658e-05, "epoch": 5.277970813064628, "step": 3800} |
| {"loss": 0.9275, "grad_norm": 2.406851053237915, "learning_rate": 1.6183869764817894e-05, "epoch": 5.3474635163307855, "step": 3850} |
| {"loss": 0.9263, "grad_norm": 2.661360263824463, "learning_rate": 1.5447545409105543e-05, "epoch": 5.416956219596942, "step": 3900} |
| {"loss": 0.9268, "grad_norm": 2.834134340286255, "learning_rate": 1.4720800823451963e-05, "epoch": 5.486448922863099, "step": 3950} |
| {"loss": 0.926, "grad_norm": 2.603018283843994, "learning_rate": 1.4004364830586014e-05, "epoch": 5.555941626129257, "step": 4000} |
| {"eval_loss": 0.8957561254501343, "eval_wer": 4.8409, "eval_cer": 4.5952, "eval_runtime": 576.5195, "eval_samples_per_second": 47.135, "eval_steps_per_second": 0.983, "epoch": 5.555941626129257, "step": 4000} |
| {"train_runtime": 18235.8637, "train_samples_per_second": 90.889, "train_steps_per_second": 0.316, "total_flos": 2.83389217560576e+19, "train_loss": 1.191746675491333, "epoch": 5.555941626129257, "step": 4000} |
| {"eval_loss": 1.038948655128479, "eval_wer": 2.7731, "eval_cer": 3.4121, "eval_runtime": 3329.1528, "eval_samples_per_second": 8.162, "eval_steps_per_second": 0.17, "epoch": 5.555941626129257, "step": 4000} |
|
|