diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,6 +1,6 @@ { - "best_metric": 96.57708628005658, - "best_model_checkpoint": "./iteboshi_temp/checkpoint-19000", + "best_metric": 82.34794908062236, + "best_model_checkpoint": "./iteboshi_temp/checkpoint-20000", "epoch": 22.026431718061673, "eval_steps": 1000, "global_step": 20000, @@ -10,5812 +10,5812 @@ "log_history": [ { "epoch": 0.02753303964757709, - "grad_norm": 8.593878746032715, + "grad_norm": 4.4749908447265625, "learning_rate": 1.0000000000000002e-06, - "loss": 11.2184, + "loss": 11.1891, "step": 25 }, { "epoch": 0.05506607929515418, - "grad_norm": 5.720788955688477, + "grad_norm": 2.3296124935150146, "learning_rate": 2.0000000000000003e-06, - "loss": 9.9763, + "loss": 10.3963, "step": 50 }, { "epoch": 0.08259911894273128, - "grad_norm": 3.1753361225128174, + "grad_norm": 2.1847357749938965, "learning_rate": 3e-06, - "loss": 7.2986, + "loss": 8.5795, "step": 75 }, { "epoch": 0.11013215859030837, - "grad_norm": 1.9015727043151855, + "grad_norm": 1.6425435543060303, "learning_rate": 4.000000000000001e-06, - "loss": 5.6306, + "loss": 6.6137, "step": 100 }, { "epoch": 0.13766519823788545, - "grad_norm": 1.7933436632156372, + "grad_norm": 2.1252479553222656, "learning_rate": 5e-06, - "loss": 4.7211, + "loss": 5.5477, "step": 125 }, { "epoch": 0.16519823788546256, - "grad_norm": 1.9143211841583252, + "grad_norm": 1.7178585529327393, "learning_rate": 6e-06, - "loss": 4.1871, + "loss": 4.6046, "step": 150 }, { "epoch": 0.19273127753303965, - "grad_norm": 1.8120731115341187, + "grad_norm": 1.5731301307678223, "learning_rate": 7e-06, - "loss": 3.728, + "loss": 4.2698, "step": 175 }, { "epoch": 0.22026431718061673, - "grad_norm": 1.5976663827896118, + "grad_norm": 1.764426589012146, "learning_rate": 8.000000000000001e-06, - "loss": 3.3999, + "loss": 4.1151, "step": 200 }, { "epoch": 0.24779735682819384, - "grad_norm": 1.4841374158859253, + "grad_norm": 1.4512161016464233, "learning_rate": 9e-06, - "loss": 3.1248, + "loss": 3.9763, "step": 225 }, { "epoch": 0.2753303964757709, - "grad_norm": 1.571273922920227, + "grad_norm": 1.7636539936065674, "learning_rate": 1e-05, - "loss": 2.9023, + "loss": 3.8495, "step": 250 }, { "epoch": 0.30286343612334804, - "grad_norm": 1.5719149112701416, + "grad_norm": 1.829896092414856, "learning_rate": 1.1000000000000001e-05, - "loss": 2.6609, + "loss": 3.7063, "step": 275 }, { "epoch": 0.3303964757709251, - "grad_norm": 1.4737190008163452, + "grad_norm": 1.4855419397354126, "learning_rate": 1.2e-05, - "loss": 2.4913, + "loss": 3.6014, "step": 300 }, { "epoch": 0.3579295154185022, - "grad_norm": 1.5062530040740967, + "grad_norm": 1.594559669494629, "learning_rate": 1.3000000000000001e-05, - "loss": 2.3478, + "loss": 3.4983, "step": 325 }, { "epoch": 0.3854625550660793, - "grad_norm": 1.5584601163864136, + "grad_norm": 1.5209401845932007, "learning_rate": 1.4e-05, - "loss": 2.2405, + "loss": 3.4202, "step": 350 }, { "epoch": 0.4129955947136564, - "grad_norm": 1.5639300346374512, + "grad_norm": 1.687501072883606, "learning_rate": 1.5000000000000002e-05, - "loss": 2.0632, + "loss": 3.3077, "step": 375 }, { "epoch": 0.44052863436123346, - "grad_norm": 1.574596881866455, + "grad_norm": 1.868544101715088, "learning_rate": 1.6000000000000003e-05, - "loss": 1.9272, + "loss": 3.1396, "step": 400 }, { "epoch": 0.46806167400881055, - "grad_norm": 1.5253156423568726, + "grad_norm": 1.5542718172073364, "learning_rate": 1.7e-05, - "loss": 1.8261, + "loss": 3.0484, "step": 425 }, { "epoch": 0.4955947136563877, - "grad_norm": 1.5775047540664673, + "grad_norm": 1.710310935974121, "learning_rate": 1.8e-05, - "loss": 1.7746, + "loss": 2.9423, "step": 450 }, { "epoch": 0.5231277533039648, - "grad_norm": 1.388596534729004, + "grad_norm": 1.6189175844192505, "learning_rate": 1.9e-05, - "loss": 1.709, + "loss": 2.7964, "step": 475 }, { "epoch": 0.5506607929515418, - "grad_norm": 1.2958333492279053, + "grad_norm": 1.4187376499176025, "learning_rate": 2e-05, - "loss": 1.6352, + "loss": 2.5943, "step": 500 }, { "epoch": 0.5781938325991189, - "grad_norm": 1.3782161474227905, + "grad_norm": 1.8632656335830688, "learning_rate": 1.9974358974358975e-05, - "loss": 1.5945, + "loss": 2.4722, "step": 525 }, { "epoch": 0.6057268722466961, - "grad_norm": 1.3231383562088013, + "grad_norm": 1.6982722282409668, "learning_rate": 1.994871794871795e-05, - "loss": 1.5081, + "loss": 2.2914, "step": 550 }, { "epoch": 0.6332599118942731, - "grad_norm": 1.4640569686889648, + "grad_norm": 1.5999418497085571, "learning_rate": 1.9923076923076926e-05, - "loss": 1.4445, + "loss": 2.1696, "step": 575 }, { "epoch": 0.6607929515418502, - "grad_norm": 1.2492103576660156, + "grad_norm": 1.4150657653808594, "learning_rate": 1.98974358974359e-05, - "loss": 1.4051, + "loss": 2.0164, "step": 600 }, { "epoch": 0.6883259911894273, - "grad_norm": 1.7467623949050903, + "grad_norm": 1.7492330074310303, "learning_rate": 1.9871794871794873e-05, - "loss": 1.3562, + "loss": 1.9055, "step": 625 }, { "epoch": 0.7158590308370044, - "grad_norm": 1.420554757118225, + "grad_norm": 2.138176679611206, "learning_rate": 1.9846153846153847e-05, - "loss": 1.3596, + "loss": 1.8614, "step": 650 }, { "epoch": 0.7433920704845814, - "grad_norm": 1.239926815032959, + "grad_norm": 1.6388601064682007, "learning_rate": 1.9820512820512824e-05, - "loss": 1.3058, + "loss": 1.7373, "step": 675 }, { "epoch": 0.7709251101321586, - "grad_norm": 1.1721984148025513, + "grad_norm": 1.6186158657073975, "learning_rate": 1.9794871794871798e-05, - "loss": 1.2863, + "loss": 1.6638, "step": 700 }, { "epoch": 0.7984581497797357, - "grad_norm": 1.3416831493377686, + "grad_norm": 1.4821542501449585, "learning_rate": 1.976923076923077e-05, - "loss": 1.3276, + "loss": 1.6584, "step": 725 }, { "epoch": 0.8259911894273128, - "grad_norm": 1.4568818807601929, + "grad_norm": 1.8213533163070679, "learning_rate": 1.9743589743589745e-05, - "loss": 1.3027, + "loss": 1.6406, "step": 750 }, { "epoch": 0.8535242290748899, - "grad_norm": 1.4908838272094727, + "grad_norm": 1.768345832824707, "learning_rate": 1.9717948717948722e-05, - "loss": 1.2134, + "loss": 1.5021, "step": 775 }, { "epoch": 0.8810572687224669, - "grad_norm": 1.3530240058898926, + "grad_norm": 2.655186176300049, "learning_rate": 1.9692307692307696e-05, - "loss": 1.2212, + "loss": 1.498, "step": 800 }, { "epoch": 0.9085903083700441, - "grad_norm": 1.1275429725646973, + "grad_norm": 1.2734943628311157, "learning_rate": 1.9666666666666666e-05, - "loss": 1.1196, + "loss": 1.3656, "step": 825 }, { "epoch": 0.9361233480176211, - "grad_norm": 1.3054031133651733, + "grad_norm": 1.422397255897522, "learning_rate": 1.9641025641025643e-05, - "loss": 1.14, + "loss": 1.3727, "step": 850 }, { "epoch": 0.9636563876651982, - "grad_norm": 1.0485919713974, + "grad_norm": 1.976857304573059, "learning_rate": 1.9615384615384617e-05, - "loss": 1.1557, + "loss": 1.3799, "step": 875 }, { "epoch": 0.9911894273127754, - "grad_norm": 1.1853173971176147, + "grad_norm": 1.9809314012527466, "learning_rate": 1.958974358974359e-05, - "loss": 1.1259, + "loss": 1.3436, "step": 900 }, { "epoch": 1.0187224669603525, - "grad_norm": 1.0613934993743896, + "grad_norm": 1.2582379579544067, "learning_rate": 1.9564102564102564e-05, - "loss": 1.0608, + "loss": 1.2313, "step": 925 }, { "epoch": 1.0462555066079295, - "grad_norm": 1.2642765045166016, + "grad_norm": 1.376512885093689, "learning_rate": 1.953846153846154e-05, - "loss": 0.9548, + "loss": 1.1155, "step": 950 }, { "epoch": 1.0737885462555066, - "grad_norm": 1.2651335000991821, + "grad_norm": 1.6270042657852173, "learning_rate": 1.9512820512820515e-05, - "loss": 0.9568, + "loss": 1.1154, "step": 975 }, { "epoch": 1.1013215859030836, - "grad_norm": 1.0594816207885742, + "grad_norm": 1.1371513605117798, "learning_rate": 1.9487179487179488e-05, - "loss": 0.9609, + "loss": 1.0854, "step": 1000 }, { "epoch": 1.1013215859030836, - "eval_cer": 98.0001834168262, - "eval_loss": 1.1190180778503418, - "eval_runtime": 1035.5852, - "eval_samples_per_second": 10.217, - "eval_steps_per_second": 2.555, - "eval_wer": 99.20792079207921, + "eval_cer": 52.7088372519574, + "eval_loss": 1.2533994913101196, + "eval_runtime": 1797.2888, + "eval_samples_per_second": 5.887, + "eval_steps_per_second": 1.472, + "eval_wer": 97.56718528995756, "step": 1000 }, { "epoch": 1.1288546255506609, - "grad_norm": 1.158616065979004, + "grad_norm": 1.21933913230896, "learning_rate": 1.9461538461538462e-05, - "loss": 0.9555, + "loss": 1.0609, "step": 1025 }, { "epoch": 1.1563876651982379, - "grad_norm": 1.3309080600738525, + "grad_norm": 1.4192328453063965, "learning_rate": 1.943589743589744e-05, - "loss": 0.9634, + "loss": 1.0849, "step": 1050 }, { "epoch": 1.183920704845815, - "grad_norm": 1.1525321006774902, + "grad_norm": 1.730343222618103, "learning_rate": 1.9410256410256413e-05, - "loss": 0.9368, + "loss": 1.0461, "step": 1075 }, { "epoch": 1.2114537444933922, - "grad_norm": 1.1301850080490112, + "grad_norm": 1.7515013217926025, "learning_rate": 1.9384615384615386e-05, - "loss": 0.8904, + "loss": 0.9945, "step": 1100 }, { "epoch": 1.2389867841409692, - "grad_norm": 1.0413904190063477, + "grad_norm": 2.047463893890381, "learning_rate": 1.935897435897436e-05, - "loss": 0.9584, + "loss": 1.0707, "step": 1125 }, { "epoch": 1.2665198237885462, - "grad_norm": 1.2024365663528442, + "grad_norm": 1.3279300928115845, "learning_rate": 1.9333333333333333e-05, - "loss": 0.8963, + "loss": 0.985, "step": 1150 }, { "epoch": 1.2940528634361232, - "grad_norm": 1.0379301309585571, + "grad_norm": 1.2352490425109863, "learning_rate": 1.930769230769231e-05, - "loss": 0.8991, + "loss": 0.9729, "step": 1175 }, { "epoch": 1.3215859030837005, - "grad_norm": 1.1425848007202148, + "grad_norm": 1.4669734239578247, "learning_rate": 1.9282051282051284e-05, - "loss": 0.8995, + "loss": 0.9921, "step": 1200 }, { "epoch": 1.3491189427312775, - "grad_norm": 1.3492079973220825, + "grad_norm": 1.233565092086792, "learning_rate": 1.9256410256410258e-05, - "loss": 0.8936, + "loss": 0.9546, "step": 1225 }, { "epoch": 1.3766519823788546, - "grad_norm": 1.0994338989257812, + "grad_norm": 1.4740595817565918, "learning_rate": 1.923076923076923e-05, - "loss": 0.9107, + "loss": 0.9798, "step": 1250 }, { "epoch": 1.4041850220264318, - "grad_norm": 0.919613778591156, + "grad_norm": 1.0927783250808716, "learning_rate": 1.920512820512821e-05, - "loss": 0.8634, + "loss": 0.9465, "step": 1275 }, { "epoch": 1.4317180616740088, - "grad_norm": 1.2518469095230103, + "grad_norm": 1.5051064491271973, "learning_rate": 1.9179487179487182e-05, - "loss": 0.8661, + "loss": 0.9202, "step": 1300 }, { "epoch": 1.4592511013215859, - "grad_norm": 1.1138235330581665, + "grad_norm": 1.218206524848938, "learning_rate": 1.9153846153846156e-05, - "loss": 0.9121, + "loss": 0.9766, "step": 1325 }, { "epoch": 1.4867841409691631, - "grad_norm": 1.0779719352722168, + "grad_norm": 1.3299014568328857, "learning_rate": 1.912820512820513e-05, - "loss": 0.8868, + "loss": 0.9298, "step": 1350 }, { "epoch": 1.51431718061674, - "grad_norm": 1.042301058769226, + "grad_norm": 2.0308456420898438, "learning_rate": 1.9102564102564106e-05, - "loss": 0.8515, + "loss": 0.9287, "step": 1375 }, { "epoch": 1.5418502202643172, - "grad_norm": 1.1100974082946777, + "grad_norm": 1.0478333234786987, "learning_rate": 1.907692307692308e-05, - "loss": 0.874, + "loss": 0.9302, "step": 1400 }, { "epoch": 1.5693832599118944, - "grad_norm": 1.0085911750793457, + "grad_norm": 1.2070943117141724, "learning_rate": 1.905128205128205e-05, - "loss": 0.878, + "loss": 0.9162, "step": 1425 }, { "epoch": 1.5969162995594712, - "grad_norm": 1.12621009349823, + "grad_norm": 1.317423701286316, "learning_rate": 1.9025641025641027e-05, - "loss": 0.8764, + "loss": 0.9029, "step": 1450 }, { "epoch": 1.6244493392070485, - "grad_norm": 1.19857919216156, + "grad_norm": 1.3458503484725952, "learning_rate": 1.9e-05, - "loss": 0.8275, + "loss": 0.8649, "step": 1475 }, { "epoch": 1.6519823788546255, - "grad_norm": 1.0735284090042114, + "grad_norm": 1.8415597677230835, "learning_rate": 1.8974358974358975e-05, - "loss": 0.7936, + "loss": 0.8413, "step": 1500 }, { "epoch": 1.6795154185022025, - "grad_norm": 1.0843554735183716, + "grad_norm": 1.2922658920288086, "learning_rate": 1.894871794871795e-05, - "loss": 0.8225, + "loss": 0.8506, "step": 1525 }, { "epoch": 1.7070484581497798, - "grad_norm": 1.013730525970459, + "grad_norm": 1.3912965059280396, "learning_rate": 1.8923076923076925e-05, - "loss": 0.8167, + "loss": 0.8408, "step": 1550 }, { "epoch": 1.7345814977973568, - "grad_norm": 0.9826481938362122, + "grad_norm": 1.5371092557907104, "learning_rate": 1.88974358974359e-05, - "loss": 0.8245, + "loss": 0.8579, "step": 1575 }, { "epoch": 1.7621145374449338, - "grad_norm": 1.0689139366149902, + "grad_norm": 1.188888669013977, "learning_rate": 1.8871794871794873e-05, - "loss": 0.8134, + "loss": 0.8251, "step": 1600 }, { "epoch": 1.789647577092511, - "grad_norm": 1.1507508754730225, + "grad_norm": 1.2093167304992676, "learning_rate": 1.8846153846153846e-05, - "loss": 0.8573, + "loss": 0.8787, "step": 1625 }, { "epoch": 1.8171806167400881, - "grad_norm": 1.2734869718551636, + "grad_norm": 1.3911653757095337, "learning_rate": 1.8820512820512823e-05, - "loss": 0.849, + "loss": 0.8652, "step": 1650 }, { "epoch": 1.8447136563876652, - "grad_norm": 1.2002122402191162, + "grad_norm": 1.707056999206543, "learning_rate": 1.8794871794871797e-05, - "loss": 0.852, + "loss": 0.8693, "step": 1675 }, { "epoch": 1.8722466960352424, - "grad_norm": 1.0352803468704224, + "grad_norm": 1.1974895000457764, "learning_rate": 1.876923076923077e-05, - "loss": 0.7986, + "loss": 0.8092, "step": 1700 }, { "epoch": 1.8997797356828194, - "grad_norm": 1.1347341537475586, + "grad_norm": 1.9799768924713135, "learning_rate": 1.8743589743589744e-05, - "loss": 0.7943, + "loss": 0.8103, "step": 1725 }, { "epoch": 1.9273127753303965, - "grad_norm": 1.0505036115646362, + "grad_norm": 1.5621815919876099, "learning_rate": 1.8717948717948718e-05, - "loss": 0.8155, + "loss": 0.8212, "step": 1750 }, { "epoch": 1.9548458149779737, - "grad_norm": 1.1983745098114014, + "grad_norm": 1.3315322399139404, "learning_rate": 1.8692307692307695e-05, - "loss": 0.7966, + "loss": 0.813, "step": 1775 }, { "epoch": 1.9823788546255505, - "grad_norm": 0.9563141465187073, + "grad_norm": 0.9858968257904053, "learning_rate": 1.866666666666667e-05, - "loss": 0.7796, + "loss": 0.7959, "step": 1800 }, { "epoch": 2.0099118942731278, - "grad_norm": 0.9476405382156372, + "grad_norm": 1.0958722829818726, "learning_rate": 1.8641025641025642e-05, - "loss": 0.7589, + "loss": 0.7545, "step": 1825 }, { "epoch": 2.037444933920705, - "grad_norm": 0.933421790599823, + "grad_norm": 1.0353975296020508, "learning_rate": 1.8615384615384616e-05, - "loss": 0.65, + "loss": 0.5917, "step": 1850 }, { "epoch": 2.064977973568282, - "grad_norm": 0.9493615031242371, + "grad_norm": 2.9925360679626465, "learning_rate": 1.8589743589743593e-05, - "loss": 0.6437, + "loss": 0.5989, "step": 1875 }, { "epoch": 2.092511013215859, - "grad_norm": 0.8501890301704407, + "grad_norm": 1.3685253858566284, "learning_rate": 1.8564102564102567e-05, - "loss": 0.6301, + "loss": 0.605, "step": 1900 }, { "epoch": 2.1200440528634363, - "grad_norm": 1.0540467500686646, + "grad_norm": 1.0744121074676514, "learning_rate": 1.853846153846154e-05, - "loss": 0.6763, + "loss": 0.6191, "step": 1925 }, { "epoch": 2.147577092511013, - "grad_norm": 0.9693484306335449, + "grad_norm": 0.898098349571228, "learning_rate": 1.8512820512820514e-05, - "loss": 0.6113, + "loss": 0.5486, "step": 1950 }, { "epoch": 2.1751101321585904, - "grad_norm": 1.0185853242874146, + "grad_norm": 1.2373496294021606, "learning_rate": 1.848717948717949e-05, - "loss": 0.6293, + "loss": 0.5693, "step": 1975 }, { "epoch": 2.202643171806167, - "grad_norm": 0.9777405261993408, + "grad_norm": 1.3621195554733276, "learning_rate": 1.8461538461538465e-05, - "loss": 0.6483, + "loss": 0.5859, "step": 2000 }, { "epoch": 2.202643171806167, - "eval_cer": 82.70379328923687, - "eval_loss": 0.8775949478149414, - "eval_runtime": 1068.4921, - "eval_samples_per_second": 9.903, - "eval_steps_per_second": 2.476, - "eval_wer": 98.7081565299387, + "eval_cer": 48.10966033496498, + "eval_loss": 0.8996243476867676, + "eval_runtime": 1860.7163, + "eval_samples_per_second": 5.687, + "eval_steps_per_second": 1.422, + "eval_wer": 90.94766619519095, "step": 2000 }, { "epoch": 2.2301762114537445, - "grad_norm": 1.0211451053619385, + "grad_norm": 0.9919908046722412, "learning_rate": 1.8435897435897435e-05, - "loss": 0.6264, + "loss": 0.5958, "step": 2025 }, { "epoch": 2.2577092511013217, - "grad_norm": 1.119489312171936, + "grad_norm": 1.0579532384872437, "learning_rate": 1.8410256410256412e-05, - "loss": 0.6394, + "loss": 0.5828, "step": 2050 }, { "epoch": 2.2852422907488985, - "grad_norm": 0.8650968074798584, + "grad_norm": 0.8056641221046448, "learning_rate": 1.8384615384615386e-05, - "loss": 0.6084, + "loss": 0.5666, "step": 2075 }, { "epoch": 2.3127753303964758, - "grad_norm": 0.9389939308166504, + "grad_norm": 1.1343415975570679, "learning_rate": 1.835897435897436e-05, - "loss": 0.6263, + "loss": 0.577, "step": 2100 }, { "epoch": 2.340308370044053, - "grad_norm": 0.8610661029815674, + "grad_norm": 1.04411780834198, "learning_rate": 1.8333333333333333e-05, - "loss": 0.5827, + "loss": 0.5307, "step": 2125 }, { "epoch": 2.36784140969163, - "grad_norm": 1.0328221321105957, + "grad_norm": 1.0452271699905396, "learning_rate": 1.830769230769231e-05, - "loss": 0.6006, + "loss": 0.5545, "step": 2150 }, { "epoch": 2.395374449339207, - "grad_norm": 1.0742344856262207, + "grad_norm": 0.927592396736145, "learning_rate": 1.8282051282051284e-05, - "loss": 0.6153, + "loss": 0.5551, "step": 2175 }, { "epoch": 2.4229074889867843, - "grad_norm": 0.9074801206588745, + "grad_norm": 1.7057969570159912, "learning_rate": 1.8256410256410257e-05, - "loss": 0.5994, + "loss": 0.5326, "step": 2200 }, { "epoch": 2.450440528634361, - "grad_norm": 1.0016812086105347, + "grad_norm": 1.4575523138046265, "learning_rate": 1.823076923076923e-05, - "loss": 0.5789, + "loss": 0.5308, "step": 2225 }, { "epoch": 2.4779735682819384, - "grad_norm": 0.8489738702774048, + "grad_norm": 1.0914602279663086, "learning_rate": 1.8205128205128208e-05, - "loss": 0.6075, + "loss": 0.5547, "step": 2250 }, { "epoch": 2.505506607929515, - "grad_norm": 0.8862083554267883, + "grad_norm": 0.990104079246521, "learning_rate": 1.817948717948718e-05, - "loss": 0.6455, + "loss": 0.6072, "step": 2275 }, { "epoch": 2.5330396475770924, - "grad_norm": 0.9439239501953125, + "grad_norm": 0.8314220905303955, "learning_rate": 1.8153846153846155e-05, - "loss": 0.6152, + "loss": 0.5671, "step": 2300 }, { "epoch": 2.5605726872246697, - "grad_norm": 1.0628591775894165, + "grad_norm": 0.9760991334915161, "learning_rate": 1.812820512820513e-05, - "loss": 0.61, + "loss": 0.5421, "step": 2325 }, { "epoch": 2.5881057268722465, - "grad_norm": 0.9690099358558655, + "grad_norm": 1.0801244974136353, "learning_rate": 1.8102564102564102e-05, - "loss": 0.614, + "loss": 0.5558, "step": 2350 }, { "epoch": 2.6156387665198237, - "grad_norm": 0.8917598128318787, + "grad_norm": 0.8499842286109924, "learning_rate": 1.807692307692308e-05, - "loss": 0.6227, + "loss": 0.56, "step": 2375 }, { "epoch": 2.643171806167401, - "grad_norm": 0.9932863712310791, + "grad_norm": 1.1915971040725708, "learning_rate": 1.8051282051282053e-05, - "loss": 0.5905, + "loss": 0.5198, "step": 2400 }, { "epoch": 2.670704845814978, - "grad_norm": 0.8728902339935303, + "grad_norm": 1.093216061592102, "learning_rate": 1.8025641025641027e-05, - "loss": 0.5781, + "loss": 0.5156, "step": 2425 }, { "epoch": 2.698237885462555, - "grad_norm": 1.1754846572875977, + "grad_norm": 1.1357547044754028, "learning_rate": 1.8e-05, - "loss": 0.6315, + "loss": 0.5755, "step": 2450 }, { "epoch": 2.7257709251101323, - "grad_norm": 0.9296525716781616, + "grad_norm": 0.963991641998291, "learning_rate": 1.7974358974358977e-05, - "loss": 0.5533, + "loss": 0.5219, "step": 2475 }, { "epoch": 2.753303964757709, - "grad_norm": 0.9781590700149536, + "grad_norm": 2.196319341659546, "learning_rate": 1.794871794871795e-05, - "loss": 0.6297, + "loss": 0.5858, "step": 2500 }, { "epoch": 2.7808370044052864, - "grad_norm": 1.140966534614563, + "grad_norm": 1.075908899307251, "learning_rate": 1.7923076923076925e-05, - "loss": 0.6024, + "loss": 0.5284, "step": 2525 }, { "epoch": 2.8083700440528636, - "grad_norm": 0.9966636300086975, + "grad_norm": 1.052140712738037, "learning_rate": 1.78974358974359e-05, - "loss": 0.5567, + "loss": 0.4964, "step": 2550 }, { "epoch": 2.8359030837004404, - "grad_norm": 0.8988731503486633, + "grad_norm": 0.9454672336578369, "learning_rate": 1.7871794871794875e-05, - "loss": 0.5831, + "loss": 0.5225, "step": 2575 }, { "epoch": 2.8634361233480177, - "grad_norm": 0.8624971508979797, + "grad_norm": 0.8262547850608826, "learning_rate": 1.784615384615385e-05, - "loss": 0.6088, + "loss": 0.5573, "step": 2600 }, { "epoch": 2.890969162995595, - "grad_norm": 1.2052279710769653, + "grad_norm": 1.0611587762832642, "learning_rate": 1.7820512820512823e-05, - "loss": 0.5729, + "loss": 0.5183, "step": 2625 }, { "epoch": 2.9185022026431717, - "grad_norm": 1.0274139642715454, + "grad_norm": 0.7847844958305359, "learning_rate": 1.7794871794871796e-05, - "loss": 0.586, + "loss": 0.5333, "step": 2650 }, { "epoch": 2.946035242290749, - "grad_norm": 0.8874242305755615, + "grad_norm": 0.746285617351532, "learning_rate": 1.776923076923077e-05, - "loss": 0.5864, + "loss": 0.5264, "step": 2675 }, { "epoch": 2.9735682819383262, - "grad_norm": 1.0037022829055786, + "grad_norm": 1.381616234779358, "learning_rate": 1.7743589743589744e-05, - "loss": 0.5726, + "loss": 0.5074, "step": 2700 }, { "epoch": 3.001101321585903, - "grad_norm": 0.7996382117271423, + "grad_norm": 0.6723135113716125, "learning_rate": 1.7717948717948717e-05, - "loss": 0.6186, + "loss": 0.5631, "step": 2725 }, { "epoch": 3.0286343612334803, - "grad_norm": 0.8733274340629578, + "grad_norm": 0.9439449906349182, "learning_rate": 1.7692307692307694e-05, - "loss": 0.4873, + "loss": 0.3836, "step": 2750 }, { "epoch": 3.056167400881057, - "grad_norm": 0.9040515422821045, + "grad_norm": 0.9093062281608582, "learning_rate": 1.7666666666666668e-05, - "loss": 0.437, + "loss": 0.342, "step": 2775 }, { "epoch": 3.0837004405286343, - "grad_norm": 0.9405097365379333, + "grad_norm": 0.7883495092391968, "learning_rate": 1.7641025641025642e-05, - "loss": 0.4555, + "loss": 0.3678, "step": 2800 }, { "epoch": 3.1112334801762116, - "grad_norm": 0.6494017243385315, + "grad_norm": 0.5074595808982849, "learning_rate": 1.7615384615384615e-05, - "loss": 0.4352, + "loss": 0.3406, "step": 2825 }, { "epoch": 3.1387665198237884, - "grad_norm": 0.9472844004631042, + "grad_norm": 1.330426812171936, "learning_rate": 1.7589743589743592e-05, - "loss": 0.439, + "loss": 0.3432, "step": 2850 }, { "epoch": 3.1662995594713657, - "grad_norm": 0.9458653330802917, + "grad_norm": 1.008254051208496, "learning_rate": 1.7564102564102566e-05, - "loss": 0.4657, + "loss": 0.3722, "step": 2875 }, { "epoch": 3.193832599118943, - "grad_norm": 0.9796208739280701, + "grad_norm": 1.0520501136779785, "learning_rate": 1.753846153846154e-05, - "loss": 0.4627, + "loss": 0.3719, "step": 2900 }, { "epoch": 3.2213656387665197, - "grad_norm": 0.836024820804596, + "grad_norm": 0.7822287082672119, "learning_rate": 1.7512820512820513e-05, - "loss": 0.4649, + "loss": 0.3729, "step": 2925 }, { "epoch": 3.248898678414097, - "grad_norm": 1.0059202909469604, + "grad_norm": 1.1690279245376587, "learning_rate": 1.7487179487179487e-05, - "loss": 0.4563, + "loss": 0.3723, "step": 2950 }, { "epoch": 3.2764317180616738, - "grad_norm": 0.9281357526779175, + "grad_norm": 0.8030567765235901, "learning_rate": 1.7461538461538464e-05, - "loss": 0.4511, + "loss": 0.362, "step": 2975 }, { "epoch": 3.303964757709251, - "grad_norm": 0.7751272916793823, + "grad_norm": 0.7881470918655396, "learning_rate": 1.7435897435897438e-05, - "loss": 0.4333, + "loss": 0.3373, "step": 3000 }, { "epoch": 3.303964757709251, - "eval_cer": 67.8441644790389, - "eval_loss": 0.8138683438301086, - "eval_runtime": 875.4611, - "eval_samples_per_second": 12.086, - "eval_steps_per_second": 3.022, - "eval_wer": 97.86892975011787, + "eval_cer": 29.99495603727947, + "eval_loss": 0.7765971422195435, + "eval_runtime": 1749.0783, + "eval_samples_per_second": 6.049, + "eval_steps_per_second": 1.513, + "eval_wer": 87.76991984912777, "step": 3000 }, { "epoch": 3.3314977973568283, - "grad_norm": 0.9610785245895386, + "grad_norm": 1.067468285560608, "learning_rate": 1.741025641025641e-05, - "loss": 0.4604, + "loss": 0.3583, "step": 3025 }, { "epoch": 3.359030837004405, - "grad_norm": 0.9183628559112549, + "grad_norm": 0.8070423603057861, "learning_rate": 1.7384615384615385e-05, - "loss": 0.4899, + "loss": 0.3962, "step": 3050 }, { "epoch": 3.3865638766519823, - "grad_norm": 1.1215360164642334, + "grad_norm": 0.9048041105270386, "learning_rate": 1.7358974358974362e-05, - "loss": 0.441, + "loss": 0.3524, "step": 3075 }, { "epoch": 3.4140969162995596, - "grad_norm": 0.7758769989013672, + "grad_norm": 0.8543446063995361, "learning_rate": 1.7333333333333336e-05, - "loss": 0.5081, + "loss": 0.4073, "step": 3100 }, { "epoch": 3.4416299559471364, - "grad_norm": 0.979524552822113, + "grad_norm": 0.9092735648155212, "learning_rate": 1.730769230769231e-05, - "loss": 0.4749, + "loss": 0.3792, "step": 3125 }, { "epoch": 3.4691629955947136, - "grad_norm": 0.6989980340003967, + "grad_norm": 0.8556696772575378, "learning_rate": 1.7282051282051283e-05, - "loss": 0.4459, + "loss": 0.3548, "step": 3150 }, { "epoch": 3.496696035242291, - "grad_norm": 0.8278589844703674, + "grad_norm": 0.8062282800674438, "learning_rate": 1.725641025641026e-05, - "loss": 0.4673, + "loss": 0.3695, "step": 3175 }, { "epoch": 3.5242290748898677, - "grad_norm": 0.8663012981414795, + "grad_norm": 0.8389378190040588, "learning_rate": 1.7230769230769234e-05, - "loss": 0.4693, + "loss": 0.3795, "step": 3200 }, { "epoch": 3.551762114537445, - "grad_norm": 0.9005679488182068, + "grad_norm": 0.866780698299408, "learning_rate": 1.7205128205128207e-05, - "loss": 0.4638, + "loss": 0.3535, "step": 3225 }, { "epoch": 3.579295154185022, - "grad_norm": 0.8739259243011475, + "grad_norm": 1.140093207359314, "learning_rate": 1.717948717948718e-05, - "loss": 0.4491, + "loss": 0.3453, "step": 3250 }, { "epoch": 3.606828193832599, - "grad_norm": 0.872785747051239, + "grad_norm": 0.7346594929695129, "learning_rate": 1.7153846153846155e-05, - "loss": 0.4765, + "loss": 0.3768, "step": 3275 }, { "epoch": 3.6343612334801763, - "grad_norm": 0.9817097783088684, + "grad_norm": 0.9942306280136108, "learning_rate": 1.7128205128205128e-05, - "loss": 0.4841, + "loss": 0.3843, "step": 3300 }, { "epoch": 3.6618942731277535, - "grad_norm": 0.7745983004570007, + "grad_norm": 0.8022450804710388, "learning_rate": 1.7102564102564102e-05, - "loss": 0.4245, + "loss": 0.3319, "step": 3325 }, { "epoch": 3.6894273127753303, - "grad_norm": 0.9443695545196533, + "grad_norm": 0.7250021696090698, "learning_rate": 1.707692307692308e-05, - "loss": 0.4566, + "loss": 0.3664, "step": 3350 }, { "epoch": 3.7169603524229076, - "grad_norm": 0.8114006519317627, + "grad_norm": 0.8458446264266968, "learning_rate": 1.7051282051282053e-05, - "loss": 0.41, + "loss": 0.32, "step": 3375 }, { "epoch": 3.744493392070485, - "grad_norm": 0.8925326466560364, + "grad_norm": 0.8499473333358765, "learning_rate": 1.7025641025641026e-05, - "loss": 0.4586, + "loss": 0.365, "step": 3400 }, { "epoch": 3.7720264317180616, - "grad_norm": 0.870850682258606, + "grad_norm": 1.18294095993042, "learning_rate": 1.7e-05, - "loss": 0.4387, + "loss": 0.3453, "step": 3425 }, { "epoch": 3.799559471365639, - "grad_norm": 0.9175399541854858, + "grad_norm": 0.7673613429069519, "learning_rate": 1.6974358974358977e-05, - "loss": 0.4449, + "loss": 0.3406, "step": 3450 }, { "epoch": 3.827092511013216, - "grad_norm": 0.7667021155357361, + "grad_norm": 0.8733392953872681, "learning_rate": 1.694871794871795e-05, - "loss": 0.4329, + "loss": 0.3406, "step": 3475 }, { "epoch": 3.854625550660793, - "grad_norm": 0.9877698421478271, + "grad_norm": 0.9603780508041382, "learning_rate": 1.6923076923076924e-05, - "loss": 0.5031, + "loss": 0.4131, "step": 3500 }, { "epoch": 3.88215859030837, - "grad_norm": 1.041695237159729, + "grad_norm": 1.0154445171356201, "learning_rate": 1.6897435897435898e-05, - "loss": 0.4428, + "loss": 0.3401, "step": 3525 }, { "epoch": 3.909691629955947, - "grad_norm": 0.9440383911132812, + "grad_norm": 0.7912996411323547, "learning_rate": 1.687179487179487e-05, - "loss": 0.4535, + "loss": 0.3621, "step": 3550 }, { "epoch": 3.9372246696035242, - "grad_norm": 0.8437003493309021, + "grad_norm": 0.7029661536216736, "learning_rate": 1.684615384615385e-05, - "loss": 0.4758, + "loss": 0.3848, "step": 3575 }, { "epoch": 3.964757709251101, - "grad_norm": 0.9805810451507568, + "grad_norm": 0.7775823473930359, "learning_rate": 1.6820512820512822e-05, - "loss": 0.4638, + "loss": 0.3753, "step": 3600 }, { "epoch": 3.9922907488986783, - "grad_norm": 0.9598307013511658, + "grad_norm": 0.8819046020507812, "learning_rate": 1.6794871794871796e-05, - "loss": 0.4589, + "loss": 0.373, "step": 3625 }, { "epoch": 4.0198237885462555, - "grad_norm": 0.7278905510902405, + "grad_norm": 0.6218374967575073, "learning_rate": 1.676923076923077e-05, - "loss": 0.3531, + "loss": 0.2521, "step": 3650 }, { "epoch": 4.047356828193832, - "grad_norm": 0.781149685382843, + "grad_norm": 0.9981600642204285, "learning_rate": 1.6743589743589747e-05, - "loss": 0.35, + "loss": 0.238, "step": 3675 }, { "epoch": 4.07488986784141, - "grad_norm": 0.7003198266029358, + "grad_norm": 0.6263682246208191, "learning_rate": 1.671794871794872e-05, - "loss": 0.3073, + "loss": 0.2172, "step": 3700 }, { "epoch": 4.102422907488987, - "grad_norm": 0.6840717196464539, + "grad_norm": 0.5469574332237244, "learning_rate": 1.6692307692307694e-05, - "loss": 0.3472, + "loss": 0.2553, "step": 3725 }, { "epoch": 4.129955947136564, - "grad_norm": 0.7943404912948608, + "grad_norm": 0.9186758399009705, "learning_rate": 1.6666666666666667e-05, - "loss": 0.3549, + "loss": 0.2413, "step": 3750 }, { "epoch": 4.157488986784141, - "grad_norm": 0.7081865072250366, + "grad_norm": 0.5088458061218262, "learning_rate": 1.6641025641025645e-05, - "loss": 0.3301, + "loss": 0.2303, "step": 3775 }, { "epoch": 4.185022026431718, - "grad_norm": 0.654018759727478, + "grad_norm": 0.5243244767189026, "learning_rate": 1.6615384615384618e-05, - "loss": 0.2923, + "loss": 0.1918, "step": 3800 }, { "epoch": 4.212555066079295, - "grad_norm": 0.7970944046974182, + "grad_norm": 0.8335323333740234, "learning_rate": 1.6589743589743592e-05, - "loss": 0.3295, + "loss": 0.2309, "step": 3825 }, { "epoch": 4.240088105726873, - "grad_norm": 0.756298840045929, + "grad_norm": 0.6145501136779785, "learning_rate": 1.6564102564102565e-05, - "loss": 0.3292, + "loss": 0.226, "step": 3850 }, { "epoch": 4.2676211453744495, - "grad_norm": 0.7982486486434937, + "grad_norm": 0.9030354022979736, "learning_rate": 1.653846153846154e-05, - "loss": 0.3664, + "loss": 0.2569, "step": 3875 }, { "epoch": 4.295154185022026, - "grad_norm": 0.7807890772819519, + "grad_norm": 0.6755716800689697, "learning_rate": 1.6512820512820513e-05, - "loss": 0.3599, + "loss": 0.2424, "step": 3900 }, { "epoch": 4.322687224669604, - "grad_norm": 0.8370448350906372, + "grad_norm": 1.1529523134231567, "learning_rate": 1.6487179487179486e-05, - "loss": 0.3662, + "loss": 0.2539, "step": 3925 }, { "epoch": 4.350220264317181, - "grad_norm": 0.8618746995925903, + "grad_norm": 0.689730167388916, "learning_rate": 1.6461538461538463e-05, - "loss": 0.3458, + "loss": 0.2366, "step": 3950 }, { "epoch": 4.377753303964758, - "grad_norm": 0.7963102459907532, + "grad_norm": 0.6913994550704956, "learning_rate": 1.6435897435897437e-05, - "loss": 0.36, + "loss": 0.2667, "step": 3975 }, { "epoch": 4.405286343612334, - "grad_norm": 0.8734304904937744, + "grad_norm": 0.6518797278404236, "learning_rate": 1.641025641025641e-05, - "loss": 0.3453, + "loss": 0.2445, "step": 4000 }, { "epoch": 4.405286343612334, - "eval_cer": 68.70908945009343, - "eval_loss": 0.7949628233909607, - "eval_runtime": 877.4202, - "eval_samples_per_second": 12.059, - "eval_steps_per_second": 3.016, - "eval_wer": 98.04809052333805, + "eval_cer": 28.12639712035583, + "eval_loss": 0.7661674618721008, + "eval_runtime": 1727.6269, + "eval_samples_per_second": 6.125, + "eval_steps_per_second": 1.532, + "eval_wer": 86.67609618104667, "step": 4000 }, { "epoch": 4.432819383259912, - "grad_norm": 0.7151613831520081, + "grad_norm": 0.7283459305763245, "learning_rate": 1.6384615384615384e-05, - "loss": 0.3738, + "loss": 0.262, "step": 4025 }, { "epoch": 4.460352422907489, - "grad_norm": 0.944696843624115, + "grad_norm": 0.8054242134094238, "learning_rate": 1.635897435897436e-05, - "loss": 0.3684, + "loss": 0.2685, "step": 4050 }, { "epoch": 4.487885462555066, - "grad_norm": 0.9610768556594849, + "grad_norm": 0.7349271774291992, "learning_rate": 1.6333333333333335e-05, - "loss": 0.3814, + "loss": 0.2636, "step": 4075 }, { "epoch": 4.515418502202643, - "grad_norm": 0.8123170733451843, + "grad_norm": 0.7914747595787048, "learning_rate": 1.630769230769231e-05, - "loss": 0.3308, + "loss": 0.2241, "step": 4100 }, { "epoch": 4.54295154185022, - "grad_norm": 0.7825468182563782, + "grad_norm": 0.6789693832397461, "learning_rate": 1.6282051282051282e-05, - "loss": 0.357, + "loss": 0.2491, "step": 4125 }, { "epoch": 4.570484581497797, - "grad_norm": 0.948771595954895, + "grad_norm": 0.7700952887535095, "learning_rate": 1.625641025641026e-05, - "loss": 0.3728, + "loss": 0.2608, "step": 4150 }, { "epoch": 4.598017621145375, - "grad_norm": 0.9299787878990173, + "grad_norm": 0.6949133276939392, "learning_rate": 1.6230769230769233e-05, - "loss": 0.3591, + "loss": 0.2542, "step": 4175 }, { "epoch": 4.6255506607929515, - "grad_norm": 0.7048384547233582, + "grad_norm": 0.7408822774887085, "learning_rate": 1.6205128205128207e-05, - "loss": 0.3355, + "loss": 0.2427, "step": 4200 }, { "epoch": 4.653083700440528, - "grad_norm": 0.864458441734314, + "grad_norm": 0.7268140912055969, "learning_rate": 1.617948717948718e-05, - "loss": 0.3469, + "loss": 0.2473, "step": 4225 }, { "epoch": 4.680616740088106, - "grad_norm": 0.9717184901237488, + "grad_norm": 1.2149430513381958, "learning_rate": 1.6153846153846154e-05, - "loss": 0.3241, + "loss": 0.2374, "step": 4250 }, { "epoch": 4.708149779735683, - "grad_norm": 0.7713704705238342, + "grad_norm": 0.8126139044761658, "learning_rate": 1.612820512820513e-05, - "loss": 0.3416, + "loss": 0.2428, "step": 4275 }, { "epoch": 4.73568281938326, - "grad_norm": 0.9309021234512329, + "grad_norm": 0.7101436257362366, "learning_rate": 1.6102564102564105e-05, - "loss": 0.356, + "loss": 0.2475, "step": 4300 }, { "epoch": 4.763215859030837, - "grad_norm": 0.8275054097175598, + "grad_norm": 0.6345697641372681, "learning_rate": 1.607692307692308e-05, - "loss": 0.3525, + "loss": 0.2432, "step": 4325 }, { "epoch": 4.790748898678414, - "grad_norm": 0.8987815380096436, + "grad_norm": 0.781745195388794, "learning_rate": 1.6051282051282052e-05, - "loss": 0.3575, + "loss": 0.23, "step": 4350 }, { "epoch": 4.818281938325991, - "grad_norm": 1.142317533493042, + "grad_norm": 1.2168214321136475, "learning_rate": 1.602564102564103e-05, - "loss": 0.3611, + "loss": 0.2563, "step": 4375 }, { "epoch": 4.845814977973569, - "grad_norm": 0.6753841042518616, + "grad_norm": 0.47594118118286133, "learning_rate": 1.6000000000000003e-05, - "loss": 0.3334, + "loss": 0.2294, "step": 4400 }, { "epoch": 4.8733480176211454, - "grad_norm": 0.6985512375831604, + "grad_norm": 0.4806981384754181, "learning_rate": 1.5974358974358976e-05, - "loss": 0.3331, + "loss": 0.2321, "step": 4425 }, { "epoch": 4.900881057268722, - "grad_norm": 0.7513765692710876, + "grad_norm": 0.7716917991638184, "learning_rate": 1.594871794871795e-05, - "loss": 0.3454, + "loss": 0.2394, "step": 4450 }, { "epoch": 4.9284140969163, - "grad_norm": 0.7995850443840027, + "grad_norm": 0.579592764377594, "learning_rate": 1.5923076923076924e-05, - "loss": 0.3252, + "loss": 0.2245, "step": 4475 }, { "epoch": 4.955947136563877, - "grad_norm": 0.7515414357185364, + "grad_norm": 0.5050383806228638, "learning_rate": 1.5897435897435897e-05, - "loss": 0.3386, + "loss": 0.2357, "step": 4500 }, { "epoch": 4.983480176211454, - "grad_norm": 0.9250982999801636, + "grad_norm": 1.2022616863250732, "learning_rate": 1.587179487179487e-05, - "loss": 0.3472, + "loss": 0.246, "step": 4525 }, { "epoch": 5.011013215859031, - "grad_norm": 0.674414336681366, + "grad_norm": 0.6758959889411926, "learning_rate": 1.5846153846153848e-05, - "loss": 0.29, + "loss": 0.1977, "step": 4550 }, { "epoch": 5.038546255506608, - "grad_norm": 0.5202761888504028, + "grad_norm": 0.3403749465942383, "learning_rate": 1.582051282051282e-05, - "loss": 0.2607, + "loss": 0.1768, "step": 4575 }, { "epoch": 5.066079295154185, - "grad_norm": 0.746380627155304, + "grad_norm": 0.6206667423248291, "learning_rate": 1.5794871794871795e-05, - "loss": 0.2281, + "loss": 0.1403, "step": 4600 }, { "epoch": 5.093612334801762, - "grad_norm": 0.6206459999084473, + "grad_norm": 0.4241011440753937, "learning_rate": 1.576923076923077e-05, - "loss": 0.2658, + "loss": 0.1857, "step": 4625 }, { "epoch": 5.121145374449339, - "grad_norm": 0.6437418460845947, + "grad_norm": 0.5990819931030273, "learning_rate": 1.5743589743589746e-05, - "loss": 0.2674, + "loss": 0.1732, "step": 4650 }, { "epoch": 5.148678414096916, - "grad_norm": 0.6083819270133972, + "grad_norm": 0.36775925755500793, "learning_rate": 1.571794871794872e-05, - "loss": 0.2685, + "loss": 0.1679, "step": 4675 }, { "epoch": 5.176211453744493, - "grad_norm": 0.7241026759147644, + "grad_norm": 0.4457094669342041, "learning_rate": 1.5692307692307693e-05, - "loss": 0.237, + "loss": 0.1422, "step": 4700 }, { "epoch": 5.203744493392071, - "grad_norm": 0.6412639021873474, + "grad_norm": 0.3794902563095093, "learning_rate": 1.5666666666666667e-05, - "loss": 0.2647, + "loss": 0.1602, "step": 4725 }, { "epoch": 5.2312775330396475, - "grad_norm": 0.8345388174057007, + "grad_norm": 0.9086684584617615, "learning_rate": 1.5641025641025644e-05, - "loss": 0.2344, + "loss": 0.1401, "step": 4750 }, { "epoch": 5.258810572687224, - "grad_norm": 0.7114852070808411, + "grad_norm": 0.6391773223876953, "learning_rate": 1.5615384615384618e-05, - "loss": 0.2894, + "loss": 0.2021, "step": 4775 }, { "epoch": 5.286343612334802, - "grad_norm": 0.5992833971977234, + "grad_norm": 0.36679503321647644, "learning_rate": 1.558974358974359e-05, - "loss": 0.244, + "loss": 0.1486, "step": 4800 }, { "epoch": 5.313876651982379, - "grad_norm": 0.5545650124549866, + "grad_norm": 0.5840328335762024, "learning_rate": 1.5564102564102565e-05, - "loss": 0.2336, + "loss": 0.1356, "step": 4825 }, { "epoch": 5.341409691629956, - "grad_norm": 0.659195601940155, + "grad_norm": 0.4502796232700348, "learning_rate": 1.553846153846154e-05, - "loss": 0.2539, + "loss": 0.1549, "step": 4850 }, { "epoch": 5.368942731277533, - "grad_norm": 0.8276779651641846, + "grad_norm": 0.5962740778923035, "learning_rate": 1.5512820512820516e-05, - "loss": 0.2555, + "loss": 0.1529, "step": 4875 }, { "epoch": 5.39647577092511, - "grad_norm": 0.8427993059158325, + "grad_norm": 0.695899486541748, "learning_rate": 1.548717948717949e-05, - "loss": 0.2435, + "loss": 0.1456, "step": 4900 }, { "epoch": 5.424008810572687, - "grad_norm": 0.8535668253898621, + "grad_norm": 0.5500043630599976, "learning_rate": 1.5461538461538463e-05, - "loss": 0.2892, + "loss": 0.1991, "step": 4925 }, { "epoch": 5.451541850220265, - "grad_norm": 0.7300683856010437, + "grad_norm": 0.4101676642894745, "learning_rate": 1.5435897435897436e-05, - "loss": 0.2646, + "loss": 0.1708, "step": 4950 }, { "epoch": 5.479074889867841, - "grad_norm": 0.6704499125480652, + "grad_norm": 0.5245118141174316, "learning_rate": 1.5410256410256414e-05, - "loss": 0.2527, + "loss": 0.1495, "step": 4975 }, { "epoch": 5.506607929515418, - "grad_norm": 0.6592108011245728, + "grad_norm": 0.9378005266189575, "learning_rate": 1.5384615384615387e-05, - "loss": 0.2517, + "loss": 0.1548, "step": 5000 }, { "epoch": 5.506607929515418, - "eval_cer": 64.5243199247991, - "eval_loss": 0.8067805171012878, - "eval_runtime": 858.4851, - "eval_samples_per_second": 12.325, - "eval_steps_per_second": 3.082, - "eval_wer": 96.94483734087694, + "eval_cer": 27.874772161911203, + "eval_loss": 0.7709037661552429, + "eval_runtime": 1715.4949, + "eval_samples_per_second": 6.168, + "eval_steps_per_second": 1.542, + "eval_wer": 86.6006600660066, "step": 5000 }, { "epoch": 5.534140969162996, - "grad_norm": 0.8046155571937561, + "grad_norm": 0.5195850133895874, "learning_rate": 1.535897435897436e-05, - "loss": 0.2629, + "loss": 0.159, "step": 5025 }, { "epoch": 5.561674008810573, - "grad_norm": 0.7639729976654053, + "grad_norm": 0.5712270140647888, "learning_rate": 1.5333333333333334e-05, - "loss": 0.2815, + "loss": 0.184, "step": 5050 }, { "epoch": 5.5892070484581495, - "grad_norm": 1.1292579174041748, + "grad_norm": 0.5652280449867249, "learning_rate": 1.5307692307692308e-05, - "loss": 0.2611, + "loss": 0.1645, "step": 5075 }, { "epoch": 5.616740088105727, - "grad_norm": 0.8473858833312988, + "grad_norm": 0.6352373957633972, "learning_rate": 1.5282051282051282e-05, - "loss": 0.2603, + "loss": 0.1561, "step": 5100 }, { "epoch": 5.644273127753304, - "grad_norm": 0.891690194606781, + "grad_norm": 0.9260919690132141, "learning_rate": 1.5256410256410257e-05, - "loss": 0.2592, + "loss": 0.1653, "step": 5125 }, { "epoch": 5.671806167400881, - "grad_norm": 0.6996191740036011, + "grad_norm": 0.5775518417358398, "learning_rate": 1.523076923076923e-05, - "loss": 0.2717, + "loss": 0.1767, "step": 5150 }, { "epoch": 5.6993392070484585, - "grad_norm": 0.7721630930900574, + "grad_norm": 0.4431408941745758, "learning_rate": 1.5205128205128206e-05, - "loss": 0.2383, + "loss": 0.1365, "step": 5175 }, { "epoch": 5.726872246696035, - "grad_norm": 0.7740418910980225, + "grad_norm": 0.4839853346347809, "learning_rate": 1.517948717948718e-05, - "loss": 0.2795, + "loss": 0.1932, "step": 5200 }, { "epoch": 5.754405286343612, - "grad_norm": 0.7397506833076477, + "grad_norm": 0.4924067258834839, "learning_rate": 1.5153846153846155e-05, - "loss": 0.2575, + "loss": 0.1506, "step": 5225 }, { "epoch": 5.78193832599119, - "grad_norm": 0.8838066458702087, + "grad_norm": 0.7064109444618225, "learning_rate": 1.5128205128205129e-05, - "loss": 0.2873, + "loss": 0.1899, "step": 5250 }, { "epoch": 5.809471365638767, - "grad_norm": 1.331363320350647, + "grad_norm": 0.4375017285346985, "learning_rate": 1.5102564102564104e-05, - "loss": 0.2664, + "loss": 0.1629, "step": 5275 }, { "epoch": 5.8370044052863435, - "grad_norm": 0.8736733794212341, + "grad_norm": 0.7590004205703735, "learning_rate": 1.5076923076923078e-05, - "loss": 0.2574, + "loss": 0.1555, "step": 5300 }, { "epoch": 5.864537444933921, - "grad_norm": 0.8324550986289978, + "grad_norm": 0.704337477684021, "learning_rate": 1.5051282051282053e-05, - "loss": 0.2631, + "loss": 0.1603, "step": 5325 }, { "epoch": 5.892070484581498, - "grad_norm": 0.6863672137260437, + "grad_norm": 0.6816414594650269, "learning_rate": 1.5025641025641027e-05, - "loss": 0.2693, + "loss": 0.1753, "step": 5350 }, { "epoch": 5.919603524229075, - "grad_norm": 0.6372384428977966, + "grad_norm": 0.49803662300109863, "learning_rate": 1.5000000000000002e-05, - "loss": 0.2449, + "loss": 0.1517, "step": 5375 }, { "epoch": 5.9471365638766525, - "grad_norm": 0.8350533246994019, + "grad_norm": 1.4683668613433838, "learning_rate": 1.4974358974358976e-05, - "loss": 0.256, + "loss": 0.1656, "step": 5400 }, { "epoch": 5.974669603524229, - "grad_norm": 0.7431644797325134, + "grad_norm": 0.5251373648643494, "learning_rate": 1.494871794871795e-05, - "loss": 0.2448, + "loss": 0.1454, "step": 5425 }, { "epoch": 6.002202643171806, - "grad_norm": 0.49462586641311646, + "grad_norm": 0.39470919966697693, "learning_rate": 1.4923076923076925e-05, - "loss": 0.2238, + "loss": 0.1317, "step": 5450 }, { "epoch": 6.029735682819383, - "grad_norm": 0.7905895709991455, + "grad_norm": 0.4022665321826935, "learning_rate": 1.4897435897435898e-05, - "loss": 0.1734, + "loss": 0.1069, "step": 5475 }, { "epoch": 6.057268722466961, - "grad_norm": 0.6685343384742737, + "grad_norm": 0.462735116481781, "learning_rate": 1.4871794871794874e-05, - "loss": 0.1577, + "loss": 0.0833, "step": 5500 }, { "epoch": 6.084801762114537, - "grad_norm": 0.6416425108909607, + "grad_norm": 0.5390641689300537, "learning_rate": 1.4846153846153847e-05, - "loss": 0.1845, + "loss": 0.1123, "step": 5525 }, { "epoch": 6.112334801762114, - "grad_norm": 0.584987998008728, + "grad_norm": 0.34844598174095154, "learning_rate": 1.4820512820512823e-05, - "loss": 0.1899, + "loss": 0.1198, "step": 5550 }, { "epoch": 6.139867841409692, - "grad_norm": 0.49699297547340393, + "grad_norm": 0.3289618492126465, "learning_rate": 1.4794871794871796e-05, - "loss": 0.1774, + "loss": 0.1119, "step": 5575 }, { "epoch": 6.167400881057269, - "grad_norm": 0.7456440925598145, + "grad_norm": 0.5920740962028503, "learning_rate": 1.4769230769230772e-05, - "loss": 0.1977, + "loss": 0.1185, "step": 5600 }, { "epoch": 6.1949339207048455, - "grad_norm": 0.5305894613265991, + "grad_norm": 0.3685765862464905, "learning_rate": 1.4743589743589745e-05, - "loss": 0.1544, + "loss": 0.0869, "step": 5625 }, { "epoch": 6.222466960352423, - "grad_norm": 0.6994619369506836, + "grad_norm": 0.6981714367866516, "learning_rate": 1.471794871794872e-05, - "loss": 0.1964, + "loss": 0.1187, "step": 5650 }, { "epoch": 6.25, - "grad_norm": 0.6842361092567444, + "grad_norm": 0.5181892514228821, "learning_rate": 1.4692307692307694e-05, - "loss": 0.1873, + "loss": 0.1144, "step": 5675 }, { "epoch": 6.277533039647577, - "grad_norm": 0.5287600159645081, + "grad_norm": 0.5749639272689819, "learning_rate": 1.4666666666666666e-05, - "loss": 0.16, + "loss": 0.082, "step": 5700 }, { "epoch": 6.3050660792951545, - "grad_norm": 0.5247631669044495, + "grad_norm": 0.34826797246932983, "learning_rate": 1.4641025641025642e-05, - "loss": 0.1849, + "loss": 0.1196, "step": 5725 }, { "epoch": 6.332599118942731, - "grad_norm": 0.7973225712776184, + "grad_norm": 0.4354853332042694, "learning_rate": 1.4615384615384615e-05, - "loss": 0.17, + "loss": 0.0996, "step": 5750 }, { "epoch": 6.360132158590308, - "grad_norm": 0.5792837142944336, + "grad_norm": 0.4485156238079071, "learning_rate": 1.458974358974359e-05, - "loss": 0.1766, + "loss": 0.1012, "step": 5775 }, { "epoch": 6.387665198237886, - "grad_norm": 0.6392947435379028, + "grad_norm": 1.4070178270339966, "learning_rate": 1.4564102564102564e-05, - "loss": 0.1888, + "loss": 0.1116, "step": 5800 }, { "epoch": 6.415198237885463, - "grad_norm": 0.5919508337974548, + "grad_norm": 0.5709951519966125, "learning_rate": 1.453846153846154e-05, - "loss": 0.1729, + "loss": 0.0992, "step": 5825 }, { "epoch": 6.442731277533039, - "grad_norm": 0.5749716758728027, + "grad_norm": 0.7376241087913513, "learning_rate": 1.4512820512820513e-05, - "loss": 0.1929, + "loss": 0.1296, "step": 5850 }, { "epoch": 6.470264317180617, - "grad_norm": 0.7120850682258606, + "grad_norm": 0.5828295350074768, "learning_rate": 1.4487179487179489e-05, - "loss": 0.2032, + "loss": 0.1363, "step": 5875 }, { "epoch": 6.497797356828194, - "grad_norm": 0.7158252596855164, + "grad_norm": 0.44842031598091125, "learning_rate": 1.4461538461538462e-05, - "loss": 0.1861, + "loss": 0.1186, "step": 5900 }, { "epoch": 6.525330396475771, - "grad_norm": 0.6715514659881592, + "grad_norm": 0.5105710625648499, "learning_rate": 1.4435897435897438e-05, - "loss": 0.2071, + "loss": 0.1319, "step": 5925 }, { "epoch": 6.5528634361233475, - "grad_norm": 0.5938199758529663, + "grad_norm": 0.45816680788993835, "learning_rate": 1.4410256410256411e-05, - "loss": 0.1839, + "loss": 0.1169, "step": 5950 }, { "epoch": 6.580396475770925, - "grad_norm": 0.7464186549186707, + "grad_norm": 0.5432708859443665, "learning_rate": 1.4384615384615387e-05, - "loss": 0.1855, + "loss": 0.1163, "step": 5975 }, { "epoch": 6.607929515418502, - "grad_norm": 0.6544903516769409, + "grad_norm": 0.5402922034263611, "learning_rate": 1.435897435897436e-05, - "loss": 0.1854, + "loss": 0.1102, "step": 6000 }, { "epoch": 6.607929515418502, - "eval_cer": 73.37303543383811, - "eval_loss": 0.8310097455978394, - "eval_runtime": 933.6287, - "eval_samples_per_second": 11.333, - "eval_steps_per_second": 2.834, - "eval_wer": 97.99151343705799, + "eval_cer": 26.293375213508654, + "eval_loss": 0.7888639569282532, + "eval_runtime": 1700.1004, + "eval_samples_per_second": 6.224, + "eval_steps_per_second": 1.556, + "eval_wer": 86.31777463460631, "step": 6000 }, { "epoch": 6.635462555066079, - "grad_norm": 0.625103771686554, + "grad_norm": 0.8144319653511047, "learning_rate": 1.4333333333333334e-05, - "loss": 0.1653, + "loss": 0.0867, "step": 6025 }, { "epoch": 6.6629955947136565, - "grad_norm": 0.540448009967804, + "grad_norm": 0.3316783607006073, "learning_rate": 1.430769230769231e-05, - "loss": 0.2075, + "loss": 0.1343, "step": 6050 }, { "epoch": 6.690528634361233, - "grad_norm": 0.8601000308990479, + "grad_norm": 0.6257611513137817, "learning_rate": 1.4282051282051283e-05, - "loss": 0.2174, + "loss": 0.141, "step": 6075 }, { "epoch": 6.71806167400881, - "grad_norm": 0.8207329511642456, + "grad_norm": 0.6132906079292297, "learning_rate": 1.4256410256410258e-05, - "loss": 0.2157, + "loss": 0.1405, "step": 6100 }, { "epoch": 6.745594713656388, - "grad_norm": 0.6145961880683899, + "grad_norm": 0.4126946032047272, "learning_rate": 1.4230769230769232e-05, - "loss": 0.2074, + "loss": 0.125, "step": 6125 }, { "epoch": 6.773127753303965, - "grad_norm": 0.7318241596221924, + "grad_norm": 0.5601705312728882, "learning_rate": 1.4205128205128207e-05, - "loss": 0.1896, + "loss": 0.109, "step": 6150 }, { "epoch": 6.8006607929515415, - "grad_norm": 0.7682480216026306, + "grad_norm": 0.51097571849823, "learning_rate": 1.4179487179487181e-05, - "loss": 0.199, + "loss": 0.1227, "step": 6175 }, { "epoch": 6.828193832599119, - "grad_norm": 0.6676972508430481, + "grad_norm": 0.557555615901947, "learning_rate": 1.4153846153846156e-05, - "loss": 0.1742, + "loss": 0.1051, "step": 6200 }, { "epoch": 6.855726872246696, - "grad_norm": 0.6792747974395752, + "grad_norm": 0.602931797504425, "learning_rate": 1.412820512820513e-05, - "loss": 0.2096, + "loss": 0.1323, "step": 6225 }, { "epoch": 6.883259911894273, - "grad_norm": 0.5820185542106628, + "grad_norm": 0.4400341212749481, "learning_rate": 1.4102564102564105e-05, - "loss": 0.1843, + "loss": 0.1118, "step": 6250 }, { "epoch": 6.9107929515418505, - "grad_norm": 0.6929712295532227, + "grad_norm": 0.8456242084503174, "learning_rate": 1.4076923076923079e-05, - "loss": 0.1797, + "loss": 0.093, "step": 6275 }, { "epoch": 6.938325991189427, - "grad_norm": 0.7363738417625427, + "grad_norm": 0.34682753682136536, "learning_rate": 1.405128205128205e-05, - "loss": 0.1823, + "loss": 0.1108, "step": 6300 }, { "epoch": 6.965859030837004, - "grad_norm": 0.708763599395752, + "grad_norm": 0.5863762497901917, "learning_rate": 1.4025641025641026e-05, - "loss": 0.1856, + "loss": 0.1206, "step": 6325 }, { "epoch": 6.993392070484582, - "grad_norm": 0.6946198344230652, + "grad_norm": 0.4746488332748413, "learning_rate": 1.4e-05, - "loss": 0.1942, + "loss": 0.1128, "step": 6350 }, { "epoch": 7.020925110132159, - "grad_norm": 0.5003654956817627, + "grad_norm": 0.19997574388980865, "learning_rate": 1.3974358974358975e-05, - "loss": 0.131, + "loss": 0.0823, "step": 6375 }, { "epoch": 7.048458149779735, - "grad_norm": 0.6626909375190735, + "grad_norm": 0.44562461972236633, "learning_rate": 1.3948717948717949e-05, - "loss": 0.1278, + "loss": 0.0788, "step": 6400 }, { "epoch": 7.075991189427313, - "grad_norm": 0.36942312121391296, + "grad_norm": 1.034386396408081, "learning_rate": 1.3923076923076924e-05, - "loss": 0.1111, + "loss": 0.0651, "step": 6425 }, { "epoch": 7.10352422907489, - "grad_norm": 0.5009227395057678, + "grad_norm": 0.4024251103401184, "learning_rate": 1.3897435897435898e-05, - "loss": 0.1099, + "loss": 0.0603, "step": 6450 }, { "epoch": 7.131057268722467, - "grad_norm": 0.3141847848892212, + "grad_norm": 0.2884047329425812, "learning_rate": 1.3871794871794873e-05, - "loss": 0.1193, + "loss": 0.0759, "step": 6475 }, { "epoch": 7.158590308370044, - "grad_norm": 0.6159384250640869, + "grad_norm": 0.30277690291404724, "learning_rate": 1.3846153846153847e-05, - "loss": 0.1305, + "loss": 0.0789, "step": 6500 }, { "epoch": 7.186123348017621, - "grad_norm": 0.7549660801887512, + "grad_norm": 0.5528630614280701, "learning_rate": 1.3820512820512822e-05, - "loss": 0.1271, + "loss": 0.0687, "step": 6525 }, { "epoch": 7.213656387665198, - "grad_norm": 0.9365887641906738, + "grad_norm": 0.852532684803009, "learning_rate": 1.3794871794871796e-05, - "loss": 0.12, + "loss": 0.0733, "step": 6550 }, { "epoch": 7.241189427312776, - "grad_norm": 0.42107388377189636, + "grad_norm": 0.5282613039016724, "learning_rate": 1.3769230769230771e-05, - "loss": 0.1303, + "loss": 0.0793, "step": 6575 }, { "epoch": 7.2687224669603525, - "grad_norm": 0.6491579413414001, + "grad_norm": 0.4679343104362488, "learning_rate": 1.3743589743589745e-05, - "loss": 0.1284, + "loss": 0.0734, "step": 6600 }, { "epoch": 7.296255506607929, - "grad_norm": 0.5241743922233582, + "grad_norm": 0.30845746397972107, "learning_rate": 1.3717948717948718e-05, - "loss": 0.1352, + "loss": 0.0871, "step": 6625 }, { "epoch": 7.323788546255507, - "grad_norm": 0.5401379466056824, + "grad_norm": 0.3263518810272217, "learning_rate": 1.3692307692307694e-05, - "loss": 0.1305, + "loss": 0.0774, "step": 6650 }, { "epoch": 7.351321585903084, - "grad_norm": 0.8926748037338257, + "grad_norm": 0.6020768284797668, "learning_rate": 1.3666666666666667e-05, - "loss": 0.1414, + "loss": 0.0869, "step": 6675 }, { "epoch": 7.378854625550661, - "grad_norm": 0.4713541567325592, + "grad_norm": 0.23268386721611023, "learning_rate": 1.3641025641025643e-05, - "loss": 0.1152, + "loss": 0.0673, "step": 6700 }, { "epoch": 7.406387665198238, - "grad_norm": 0.4151066839694977, + "grad_norm": 0.34103459119796753, "learning_rate": 1.3615384615384616e-05, - "loss": 0.1123, + "loss": 0.0621, "step": 6725 }, { "epoch": 7.433920704845815, - "grad_norm": 0.5809412598609924, + "grad_norm": 0.44131579995155334, "learning_rate": 1.3589743589743592e-05, - "loss": 0.1342, + "loss": 0.0752, "step": 6750 }, { "epoch": 7.461453744493392, - "grad_norm": 0.5300701856613159, + "grad_norm": 0.41322773694992065, "learning_rate": 1.3564102564102565e-05, - "loss": 0.1394, + "loss": 0.0879, "step": 6775 }, { "epoch": 7.48898678414097, - "grad_norm": 0.5370484590530396, + "grad_norm": 0.2960795760154724, "learning_rate": 1.353846153846154e-05, - "loss": 0.1337, + "loss": 0.0784, "step": 6800 }, { "epoch": 7.516519823788546, - "grad_norm": 0.6923957467079163, + "grad_norm": 0.3674839735031128, "learning_rate": 1.3512820512820514e-05, - "loss": 0.1314, + "loss": 0.0745, "step": 6825 }, { "epoch": 7.544052863436123, - "grad_norm": 0.35029956698417664, + "grad_norm": 0.16305261850357056, "learning_rate": 1.348717948717949e-05, - "loss": 0.128, + "loss": 0.0794, "step": 6850 }, { "epoch": 7.5715859030837, - "grad_norm": 0.5914937853813171, + "grad_norm": 0.4451794922351837, "learning_rate": 1.3461538461538463e-05, - "loss": 0.121, + "loss": 0.0631, "step": 6875 }, { "epoch": 7.599118942731278, - "grad_norm": 0.5608924627304077, + "grad_norm": 0.507660448551178, "learning_rate": 1.3435897435897435e-05, - "loss": 0.1439, + "loss": 0.0895, "step": 6900 }, { "epoch": 7.6266519823788546, - "grad_norm": 0.38419216871261597, + "grad_norm": 0.2863459587097168, "learning_rate": 1.341025641025641e-05, - "loss": 0.1195, + "loss": 0.0729, "step": 6925 }, { "epoch": 7.654185022026431, - "grad_norm": 0.6647460460662842, + "grad_norm": 0.5745854377746582, "learning_rate": 1.3384615384615384e-05, - "loss": 0.1501, + "loss": 0.0995, "step": 6950 }, { "epoch": 7.681718061674009, - "grad_norm": 0.5829716324806213, + "grad_norm": 0.42818760871887207, "learning_rate": 1.335897435897436e-05, - "loss": 0.1402, + "loss": 0.0932, "step": 6975 }, { "epoch": 7.709251101321586, - "grad_norm": 0.7427933216094971, + "grad_norm": 0.6487565636634827, "learning_rate": 1.3333333333333333e-05, - "loss": 0.1173, + "loss": 0.0682, "step": 7000 }, { "epoch": 7.709251101321586, - "eval_cer": 64.11449795375603, - "eval_loss": 0.8565544486045837, - "eval_runtime": 853.5046, - "eval_samples_per_second": 12.397, - "eval_steps_per_second": 3.1, - "eval_wer": 97.64262140499764, + "eval_cer": 27.35776598305687, + "eval_loss": 0.7991169095039368, + "eval_runtime": 1721.4291, + "eval_samples_per_second": 6.147, + "eval_steps_per_second": 1.537, + "eval_wer": 84.45073078736445, "step": 7000 }, { "epoch": 7.736784140969163, - "grad_norm": 0.7469947338104248, + "grad_norm": 0.7766274809837341, "learning_rate": 1.3307692307692309e-05, - "loss": 0.146, + "loss": 0.0903, "step": 7025 }, { "epoch": 7.76431718061674, - "grad_norm": 0.962263822555542, + "grad_norm": 0.41921526193618774, "learning_rate": 1.3282051282051282e-05, - "loss": 0.1349, + "loss": 0.087, "step": 7050 }, { "epoch": 7.791850220264317, - "grad_norm": 0.7025478482246399, + "grad_norm": 0.5744315385818481, "learning_rate": 1.3256410256410258e-05, - "loss": 0.129, + "loss": 0.0823, "step": 7075 }, { "epoch": 7.819383259911894, - "grad_norm": 0.749464750289917, + "grad_norm": 0.7453629970550537, "learning_rate": 1.3230769230769231e-05, - "loss": 0.1276, + "loss": 0.0752, "step": 7100 }, { "epoch": 7.846916299559472, - "grad_norm": 0.6161931157112122, + "grad_norm": 0.4299808442592621, "learning_rate": 1.3205128205128207e-05, - "loss": 0.1196, + "loss": 0.0725, "step": 7125 }, { "epoch": 7.8744493392070485, - "grad_norm": 0.43383580446243286, + "grad_norm": 0.6843218207359314, "learning_rate": 1.317948717948718e-05, - "loss": 0.1511, + "loss": 0.0956, "step": 7150 }, { "epoch": 7.901982378854625, - "grad_norm": 0.5156465768814087, + "grad_norm": 0.7576065063476562, "learning_rate": 1.3153846153846156e-05, - "loss": 0.141, + "loss": 0.0878, "step": 7175 }, { "epoch": 7.929515418502203, - "grad_norm": 0.6636006236076355, + "grad_norm": 0.5355419516563416, "learning_rate": 1.312820512820513e-05, - "loss": 0.152, + "loss": 0.1018, "step": 7200 }, { "epoch": 7.95704845814978, - "grad_norm": 0.6761754751205444, + "grad_norm": 0.5288059711456299, "learning_rate": 1.3102564102564103e-05, - "loss": 0.1379, + "loss": 0.0858, "step": 7225 }, { "epoch": 7.984581497797357, - "grad_norm": 0.5854198336601257, + "grad_norm": 0.4639795124530792, "learning_rate": 1.3076923076923078e-05, - "loss": 0.1406, + "loss": 0.0867, "step": 7250 }, { "epoch": 8.012114537444933, - "grad_norm": 0.3060972988605499, + "grad_norm": 0.4118720591068268, "learning_rate": 1.3051282051282052e-05, - "loss": 0.1423, + "loss": 0.0966, "step": 7275 }, { "epoch": 8.039647577092511, - "grad_norm": 0.37324124574661255, + "grad_norm": 0.2552703320980072, "learning_rate": 1.3025641025641027e-05, - "loss": 0.0909, + "loss": 0.0644, "step": 7300 }, { "epoch": 8.067180616740089, - "grad_norm": 0.4420897960662842, + "grad_norm": 0.4209102690219879, "learning_rate": 1.3000000000000001e-05, - "loss": 0.091, + "loss": 0.0534, "step": 7325 }, { "epoch": 8.094713656387665, - "grad_norm": 0.5292598605155945, + "grad_norm": 0.4842354655265808, "learning_rate": 1.2974358974358976e-05, - "loss": 0.088, + "loss": 0.0567, "step": 7350 }, { "epoch": 8.122246696035242, - "grad_norm": 0.6191997528076172, + "grad_norm": 0.5336529612541199, "learning_rate": 1.294871794871795e-05, - "loss": 0.0857, + "loss": 0.0581, "step": 7375 }, { "epoch": 8.14977973568282, - "grad_norm": 0.4383416473865509, + "grad_norm": 0.3102397918701172, "learning_rate": 1.2923076923076925e-05, - "loss": 0.0832, + "loss": 0.0502, "step": 7400 }, { "epoch": 8.177312775330396, - "grad_norm": 0.2875024378299713, + "grad_norm": 0.29148268699645996, "learning_rate": 1.2897435897435899e-05, - "loss": 0.0872, + "loss": 0.0494, "step": 7425 }, { "epoch": 8.204845814977974, - "grad_norm": 0.2749011218547821, + "grad_norm": 0.11677376180887222, "learning_rate": 1.2871794871794874e-05, - "loss": 0.0784, + "loss": 0.0428, "step": 7450 }, { "epoch": 8.232378854625551, - "grad_norm": 0.6115961074829102, + "grad_norm": 0.5054081082344055, "learning_rate": 1.2846153846153848e-05, - "loss": 0.1014, + "loss": 0.0613, "step": 7475 }, { "epoch": 8.259911894273127, - "grad_norm": 0.30915337800979614, + "grad_norm": 0.16621895134449005, "learning_rate": 1.2820512820512823e-05, - "loss": 0.0788, + "loss": 0.0465, "step": 7500 }, { "epoch": 8.287444933920705, - "grad_norm": 0.8303879499435425, + "grad_norm": 0.6071426272392273, "learning_rate": 1.2794871794871795e-05, - "loss": 0.0815, + "loss": 0.0466, "step": 7525 }, { "epoch": 8.314977973568283, - "grad_norm": 0.48807233572006226, + "grad_norm": 0.4491996765136719, "learning_rate": 1.2769230769230769e-05, - "loss": 0.083, + "loss": 0.0504, "step": 7550 }, { "epoch": 8.342511013215859, - "grad_norm": 0.5944768786430359, + "grad_norm": 0.5606942176818848, "learning_rate": 1.2743589743589744e-05, - "loss": 0.0873, + "loss": 0.0493, "step": 7575 }, { "epoch": 8.370044052863436, - "grad_norm": 0.4144470691680908, + "grad_norm": 0.6338093280792236, "learning_rate": 1.2717948717948718e-05, - "loss": 0.0749, + "loss": 0.0437, "step": 7600 }, { "epoch": 8.397577092511014, - "grad_norm": 0.6508732438087463, + "grad_norm": 0.50752192735672, "learning_rate": 1.2692307692307693e-05, - "loss": 0.0946, + "loss": 0.0558, "step": 7625 }, { "epoch": 8.42511013215859, - "grad_norm": 0.3488626778125763, + "grad_norm": 0.16351301968097687, "learning_rate": 1.2666666666666667e-05, - "loss": 0.081, + "loss": 0.0479, "step": 7650 }, { "epoch": 8.452643171806168, - "grad_norm": 0.6012532114982605, + "grad_norm": 0.392193078994751, "learning_rate": 1.2641025641025642e-05, - "loss": 0.0875, + "loss": 0.0542, "step": 7675 }, { "epoch": 8.480176211453745, - "grad_norm": 0.43506327271461487, + "grad_norm": 0.23874300718307495, "learning_rate": 1.2615384615384616e-05, - "loss": 0.0868, + "loss": 0.0516, "step": 7700 }, { "epoch": 8.507709251101321, - "grad_norm": 0.35185477137565613, + "grad_norm": 0.31626826524734497, "learning_rate": 1.2589743589743591e-05, - "loss": 0.0842, + "loss": 0.0486, "step": 7725 }, { "epoch": 8.535242290748899, - "grad_norm": 0.6967858076095581, + "grad_norm": 0.5571256875991821, "learning_rate": 1.2564102564102565e-05, - "loss": 0.0957, + "loss": 0.0665, "step": 7750 }, { "epoch": 8.562775330396477, - "grad_norm": 0.5462283492088318, + "grad_norm": 0.3961975872516632, "learning_rate": 1.253846153846154e-05, - "loss": 0.0942, + "loss": 0.0587, "step": 7775 }, { "epoch": 8.590308370044053, - "grad_norm": 0.524748682975769, + "grad_norm": 0.33032137155532837, "learning_rate": 1.2512820512820514e-05, - "loss": 0.1017, + "loss": 0.0631, "step": 7800 }, { "epoch": 8.61784140969163, - "grad_norm": 0.3966872990131378, + "grad_norm": 0.1791045367717743, "learning_rate": 1.2487179487179487e-05, - "loss": 0.1023, + "loss": 0.0669, "step": 7825 }, { "epoch": 8.645374449339208, - "grad_norm": 0.9616275429725647, + "grad_norm": 0.3944324851036072, "learning_rate": 1.2461538461538463e-05, - "loss": 0.0797, + "loss": 0.0439, "step": 7850 }, { "epoch": 8.672907488986784, - "grad_norm": 0.3389146029949188, + "grad_norm": 0.13560180366039276, "learning_rate": 1.2435897435897436e-05, - "loss": 0.0753, + "loss": 0.0403, "step": 7875 }, { "epoch": 8.700440528634362, - "grad_norm": 0.7579898834228516, + "grad_norm": 0.4744511842727661, "learning_rate": 1.2410256410256412e-05, - "loss": 0.0971, + "loss": 0.0595, "step": 7900 }, { "epoch": 8.72797356828194, - "grad_norm": 0.43244099617004395, + "grad_norm": 0.5268675684928894, "learning_rate": 1.2384615384615385e-05, - "loss": 0.0939, + "loss": 0.0568, "step": 7925 }, { "epoch": 8.755506607929515, - "grad_norm": 0.4598844051361084, + "grad_norm": 0.33561986684799194, "learning_rate": 1.235897435897436e-05, - "loss": 0.0944, + "loss": 0.057, "step": 7950 }, { "epoch": 8.783039647577093, - "grad_norm": 0.5808974504470825, + "grad_norm": 0.23346789181232452, "learning_rate": 1.2333333333333334e-05, - "loss": 0.0958, + "loss": 0.05, "step": 7975 }, { "epoch": 8.810572687224669, - "grad_norm": 0.8299708366394043, + "grad_norm": 0.5446615815162659, "learning_rate": 1.230769230769231e-05, - "loss": 0.1049, + "loss": 0.0647, "step": 8000 }, { "epoch": 8.810572687224669, - "eval_cer": 70.65044191991564, - "eval_loss": 0.8805521130561829, - "eval_runtime": 902.4134, - "eval_samples_per_second": 11.725, - "eval_steps_per_second": 2.932, - "eval_wer": 97.72748703441772, + "eval_cer": 25.62619650820217, + "eval_loss": 0.8131716847419739, + "eval_runtime": 1709.9866, + "eval_samples_per_second": 6.188, + "eval_steps_per_second": 1.547, + "eval_wer": 84.64875058934464, "step": 8000 }, { "epoch": 8.838105726872246, - "grad_norm": 0.5200393795967102, + "grad_norm": 0.37270498275756836, "learning_rate": 1.2282051282051283e-05, - "loss": 0.0886, + "loss": 0.0486, "step": 8025 }, { "epoch": 8.865638766519824, - "grad_norm": 0.610178530216217, + "grad_norm": 0.4193556308746338, "learning_rate": 1.2256410256410259e-05, - "loss": 0.0957, + "loss": 0.0544, "step": 8050 }, { "epoch": 8.8931718061674, - "grad_norm": 0.4926294684410095, + "grad_norm": 0.4220805764198303, "learning_rate": 1.2230769230769232e-05, - "loss": 0.0893, + "loss": 0.0555, "step": 8075 }, { "epoch": 8.920704845814978, - "grad_norm": 0.3815154731273651, + "grad_norm": 0.12924405932426453, "learning_rate": 1.2205128205128208e-05, - "loss": 0.1048, + "loss": 0.0654, "step": 8100 }, { "epoch": 8.948237885462555, - "grad_norm": 0.44610753655433655, + "grad_norm": 0.2983054518699646, "learning_rate": 1.217948717948718e-05, - "loss": 0.0903, + "loss": 0.0481, "step": 8125 }, { "epoch": 8.975770925110131, - "grad_norm": 0.5451329946517944, + "grad_norm": 0.5196457505226135, "learning_rate": 1.2153846153846153e-05, - "loss": 0.0779, + "loss": 0.0455, "step": 8150 }, { "epoch": 9.003303964757709, - "grad_norm": 0.27041003108024597, + "grad_norm": 0.1929197460412979, "learning_rate": 1.2128205128205129e-05, - "loss": 0.0943, + "loss": 0.0587, "step": 8175 }, { "epoch": 9.030837004405287, - "grad_norm": 0.5706424117088318, + "grad_norm": 0.6114550828933716, "learning_rate": 1.2102564102564102e-05, - "loss": 0.0544, + "loss": 0.0375, "step": 8200 }, { "epoch": 9.058370044052863, - "grad_norm": 0.2460174858570099, + "grad_norm": 0.3071931004524231, "learning_rate": 1.2076923076923078e-05, - "loss": 0.0542, + "loss": 0.032, "step": 8225 }, { "epoch": 9.08590308370044, - "grad_norm": 0.44156837463378906, + "grad_norm": 0.3815561532974243, "learning_rate": 1.2051282051282051e-05, - "loss": 0.048, + "loss": 0.0297, "step": 8250 }, { "epoch": 9.113436123348018, - "grad_norm": 0.5182222127914429, + "grad_norm": 0.3786160349845886, "learning_rate": 1.2025641025641027e-05, - "loss": 0.0551, + "loss": 0.0357, "step": 8275 }, { "epoch": 9.140969162995594, - "grad_norm": 0.25801339745521545, + "grad_norm": 0.32819467782974243, "learning_rate": 1.2e-05, - "loss": 0.0574, + "loss": 0.0362, "step": 8300 }, { "epoch": 9.168502202643172, - "grad_norm": 0.32541367411613464, + "grad_norm": 0.2228395938873291, "learning_rate": 1.1974358974358976e-05, - "loss": 0.0465, + "loss": 0.0259, "step": 8325 }, { "epoch": 9.19603524229075, - "grad_norm": 0.3748158812522888, + "grad_norm": 0.3630577325820923, "learning_rate": 1.194871794871795e-05, - "loss": 0.0628, + "loss": 0.0388, "step": 8350 }, { "epoch": 9.223568281938325, - "grad_norm": 0.37736037373542786, + "grad_norm": 0.24628213047981262, "learning_rate": 1.1923076923076925e-05, - "loss": 0.0563, + "loss": 0.0344, "step": 8375 }, { "epoch": 9.251101321585903, - "grad_norm": 0.37839052081108093, + "grad_norm": 0.4021029472351074, "learning_rate": 1.1897435897435898e-05, - "loss": 0.0535, + "loss": 0.0322, "step": 8400 }, { "epoch": 9.27863436123348, - "grad_norm": 0.3533216714859009, + "grad_norm": 0.1852307766675949, "learning_rate": 1.1871794871794872e-05, - "loss": 0.053, + "loss": 0.0327, "step": 8425 }, { "epoch": 9.306167400881057, - "grad_norm": 0.3839458227157593, + "grad_norm": 0.8122808337211609, "learning_rate": 1.1846153846153847e-05, - "loss": 0.0621, + "loss": 0.0373, "step": 8450 }, { "epoch": 9.333700440528634, - "grad_norm": 0.4322117567062378, + "grad_norm": 0.4952065348625183, "learning_rate": 1.1820512820512821e-05, - "loss": 0.0523, + "loss": 0.0344, "step": 8475 }, { "epoch": 9.361233480176212, - "grad_norm": 0.29340413212776184, + "grad_norm": 0.2687460482120514, "learning_rate": 1.1794871794871796e-05, - "loss": 0.0618, + "loss": 0.0449, "step": 8500 }, { "epoch": 9.388766519823788, - "grad_norm": 0.1959504932165146, + "grad_norm": 0.46548476815223694, "learning_rate": 1.176923076923077e-05, - "loss": 0.0593, + "loss": 0.0347, "step": 8525 }, { "epoch": 9.416299559471366, - "grad_norm": 0.3742198050022125, + "grad_norm": 0.271555632352829, "learning_rate": 1.1743589743589745e-05, - "loss": 0.0615, + "loss": 0.0372, "step": 8550 }, { "epoch": 9.443832599118943, - "grad_norm": 0.2696450650691986, + "grad_norm": 0.2767009735107422, "learning_rate": 1.1717948717948719e-05, - "loss": 0.0552, + "loss": 0.0341, "step": 8575 }, { "epoch": 9.47136563876652, - "grad_norm": 0.5510464310646057, + "grad_norm": 0.5092193484306335, "learning_rate": 1.1692307692307694e-05, - "loss": 0.0572, + "loss": 0.0344, "step": 8600 }, { "epoch": 9.498898678414097, - "grad_norm": 0.6056195497512817, + "grad_norm": 0.4733143448829651, "learning_rate": 1.1666666666666668e-05, - "loss": 0.0694, + "loss": 0.0442, "step": 8625 }, { "epoch": 9.526431718061675, - "grad_norm": 0.2911369204521179, + "grad_norm": 0.3013637661933899, "learning_rate": 1.1641025641025643e-05, - "loss": 0.0683, + "loss": 0.0437, "step": 8650 }, { "epoch": 9.55396475770925, - "grad_norm": 0.3952886760234833, + "grad_norm": 0.23593106865882874, "learning_rate": 1.1615384615384617e-05, - "loss": 0.0641, + "loss": 0.0345, "step": 8675 }, { "epoch": 9.581497797356828, - "grad_norm": 0.4399433732032776, + "grad_norm": 0.42476484179496765, "learning_rate": 1.1589743589743592e-05, - "loss": 0.0507, + "loss": 0.033, "step": 8700 }, { "epoch": 9.609030837004406, - "grad_norm": 0.19196133315563202, + "grad_norm": 0.1042499765753746, "learning_rate": 1.1564102564102566e-05, - "loss": 0.0637, + "loss": 0.0452, "step": 8725 }, { "epoch": 9.636563876651982, - "grad_norm": 0.6704760193824768, + "grad_norm": 0.4548315405845642, "learning_rate": 1.1538461538461538e-05, - "loss": 0.0628, + "loss": 0.049, "step": 8750 }, { "epoch": 9.66409691629956, - "grad_norm": 0.4588681757450104, + "grad_norm": 0.749373733997345, "learning_rate": 1.1512820512820513e-05, - "loss": 0.0587, + "loss": 0.0397, "step": 8775 }, { "epoch": 9.691629955947137, - "grad_norm": 0.49800875782966614, + "grad_norm": 0.24911393225193024, "learning_rate": 1.1487179487179487e-05, - "loss": 0.0534, + "loss": 0.0327, "step": 8800 }, { "epoch": 9.719162995594713, - "grad_norm": 0.5300059914588928, + "grad_norm": 0.3840325176715851, "learning_rate": 1.1461538461538462e-05, - "loss": 0.0659, + "loss": 0.0378, "step": 8825 }, { "epoch": 9.746696035242291, - "grad_norm": 0.4262904226779938, + "grad_norm": 0.3074113726615906, "learning_rate": 1.1435897435897436e-05, - "loss": 0.0688, + "loss": 0.0424, "step": 8850 }, { "epoch": 9.774229074889869, - "grad_norm": 0.5263908505439758, + "grad_norm": 0.3504096269607544, "learning_rate": 1.1410256410256411e-05, - "loss": 0.068, + "loss": 0.0408, "step": 8875 }, { "epoch": 9.801762114537445, - "grad_norm": 0.6390700340270996, + "grad_norm": 0.7079716324806213, "learning_rate": 1.1384615384615385e-05, - "loss": 0.0595, + "loss": 0.0362, "step": 8900 }, { "epoch": 9.829295154185022, - "grad_norm": 0.3971773684024811, + "grad_norm": 0.46274736523628235, "learning_rate": 1.135897435897436e-05, - "loss": 0.0715, + "loss": 0.0444, "step": 8925 }, { "epoch": 9.8568281938326, - "grad_norm": 0.2393706738948822, + "grad_norm": 0.30366581678390503, "learning_rate": 1.1333333333333334e-05, - "loss": 0.0619, + "loss": 0.0411, "step": 8950 }, { "epoch": 9.884361233480176, - "grad_norm": 0.7413886785507202, + "grad_norm": 0.394626647233963, "learning_rate": 1.1307692307692309e-05, - "loss": 0.0543, + "loss": 0.0347, "step": 8975 }, { "epoch": 9.911894273127754, - "grad_norm": 0.4195553660392761, + "grad_norm": 0.2617024779319763, "learning_rate": 1.1282051282051283e-05, - "loss": 0.0566, + "loss": 0.0343, "step": 9000 }, { "epoch": 9.911894273127754, - "eval_cer": 66.49834351678837, - "eval_loss": 0.9025006294250488, - "eval_runtime": 869.4795, - "eval_samples_per_second": 12.169, - "eval_steps_per_second": 3.043, - "eval_wer": 97.7934936350778, + "eval_cer": 24.69478293764974, + "eval_loss": 0.8281993865966797, + "eval_runtime": 1707.5191, + "eval_samples_per_second": 6.197, + "eval_steps_per_second": 1.55, + "eval_wer": 84.82791136256482, "step": 9000 }, { "epoch": 9.939427312775331, - "grad_norm": 0.26785850524902344, + "grad_norm": 0.5920683145523071, "learning_rate": 1.1256410256410258e-05, - "loss": 0.0617, + "loss": 0.0415, "step": 9025 }, { "epoch": 9.966960352422907, - "grad_norm": 0.7492098808288574, + "grad_norm": 0.5178209543228149, "learning_rate": 1.1230769230769232e-05, - "loss": 0.0673, + "loss": 0.0385, "step": 9050 }, { "epoch": 9.994493392070485, - "grad_norm": 0.4261627495288849, + "grad_norm": 0.33022773265838623, "learning_rate": 1.1205128205128205e-05, - "loss": 0.0596, + "loss": 0.038, "step": 9075 }, { "epoch": 10.022026431718063, - "grad_norm": 0.271913081407547, + "grad_norm": 0.5157202482223511, "learning_rate": 1.117948717948718e-05, - "loss": 0.0387, + "loss": 0.0304, "step": 9100 }, { "epoch": 10.049559471365638, - "grad_norm": 0.3687051832675934, + "grad_norm": 0.28111565113067627, "learning_rate": 1.1153846153846154e-05, - "loss": 0.0355, + "loss": 0.0271, "step": 9125 }, { "epoch": 10.077092511013216, - "grad_norm": 0.5124948620796204, + "grad_norm": 0.3325743079185486, "learning_rate": 1.112820512820513e-05, - "loss": 0.047, + "loss": 0.0348, "step": 9150 }, { "epoch": 10.104625550660794, - "grad_norm": 0.5072323679924011, + "grad_norm": 0.39298519492149353, "learning_rate": 1.1102564102564103e-05, - "loss": 0.0311, + "loss": 0.0221, "step": 9175 }, { "epoch": 10.13215859030837, - "grad_norm": 0.7538577914237976, + "grad_norm": 0.7207234501838684, "learning_rate": 1.1076923076923079e-05, - "loss": 0.0392, + "loss": 0.0316, "step": 9200 }, { "epoch": 10.159691629955947, - "grad_norm": 0.14610818028450012, + "grad_norm": 0.12185712903738022, "learning_rate": 1.1051282051282052e-05, - "loss": 0.0337, + "loss": 0.0333, "step": 9225 }, { "epoch": 10.187224669603523, - "grad_norm": 0.2696049213409424, + "grad_norm": 0.18927183747291565, "learning_rate": 1.1025641025641028e-05, - "loss": 0.0424, + "loss": 0.0317, "step": 9250 }, { "epoch": 10.214757709251101, - "grad_norm": 0.1662781834602356, + "grad_norm": 0.5243281722068787, "learning_rate": 1.1000000000000001e-05, - "loss": 0.0329, + "loss": 0.0238, "step": 9275 }, { "epoch": 10.242290748898679, - "grad_norm": 0.23444922268390656, + "grad_norm": 0.25754281878471375, "learning_rate": 1.0974358974358977e-05, - "loss": 0.0329, + "loss": 0.0265, "step": 9300 }, { "epoch": 10.269823788546255, - "grad_norm": 0.5237591862678528, + "grad_norm": 0.3999498188495636, "learning_rate": 1.094871794871795e-05, - "loss": 0.0403, + "loss": 0.0271, "step": 9325 }, { "epoch": 10.297356828193832, - "grad_norm": 0.46771717071533203, + "grad_norm": 0.5361197590827942, "learning_rate": 1.0923076923076922e-05, - "loss": 0.0396, + "loss": 0.0299, "step": 9350 }, { "epoch": 10.32488986784141, - "grad_norm": 0.24174697697162628, + "grad_norm": 0.4799012839794159, "learning_rate": 1.0897435897435898e-05, - "loss": 0.0483, + "loss": 0.0337, "step": 9375 }, { "epoch": 10.352422907488986, - "grad_norm": 0.38117629289627075, + "grad_norm": 0.29171785712242126, "learning_rate": 1.0871794871794871e-05, - "loss": 0.0337, + "loss": 0.0194, "step": 9400 }, { "epoch": 10.379955947136564, - "grad_norm": 0.2335430532693863, + "grad_norm": 0.158452108502388, "learning_rate": 1.0846153846153847e-05, - "loss": 0.046, + "loss": 0.0293, "step": 9425 }, { "epoch": 10.407488986784141, - "grad_norm": 0.1994549036026001, + "grad_norm": 0.1673414409160614, "learning_rate": 1.082051282051282e-05, - "loss": 0.0389, + "loss": 0.0245, "step": 9450 }, { "epoch": 10.435022026431717, - "grad_norm": 0.42172032594680786, + "grad_norm": 0.4289039075374603, "learning_rate": 1.0794871794871796e-05, - "loss": 0.0332, + "loss": 0.0251, "step": 9475 }, { "epoch": 10.462555066079295, - "grad_norm": 0.32804402709007263, + "grad_norm": 0.43541353940963745, "learning_rate": 1.076923076923077e-05, - "loss": 0.0356, + "loss": 0.0283, "step": 9500 }, { "epoch": 10.490088105726873, - "grad_norm": 0.26200738549232483, + "grad_norm": 0.15916317701339722, "learning_rate": 1.0743589743589745e-05, - "loss": 0.0495, + "loss": 0.0336, "step": 9525 }, { "epoch": 10.517621145374449, - "grad_norm": 0.24157360196113586, + "grad_norm": 0.3281770944595337, "learning_rate": 1.0717948717948718e-05, - "loss": 0.0364, + "loss": 0.0217, "step": 9550 }, { "epoch": 10.545154185022026, - "grad_norm": 0.20026101171970367, + "grad_norm": 0.12852789461612701, "learning_rate": 1.0692307692307694e-05, - "loss": 0.0374, + "loss": 0.0261, "step": 9575 }, { "epoch": 10.572687224669604, - "grad_norm": 0.24734953045845032, + "grad_norm": 0.3133266270160675, "learning_rate": 1.0666666666666667e-05, - "loss": 0.0373, + "loss": 0.0258, "step": 9600 }, { "epoch": 10.60022026431718, - "grad_norm": 0.4558570384979248, + "grad_norm": 0.49409055709838867, "learning_rate": 1.0641025641025643e-05, - "loss": 0.0399, + "loss": 0.0241, "step": 9625 }, { "epoch": 10.627753303964758, - "grad_norm": 0.18524861335754395, + "grad_norm": 0.1980574131011963, "learning_rate": 1.0615384615384616e-05, - "loss": 0.0386, + "loss": 0.0264, "step": 9650 }, { "epoch": 10.655286343612335, - "grad_norm": 0.25492531061172485, + "grad_norm": 0.3089769184589386, "learning_rate": 1.058974358974359e-05, - "loss": 0.043, + "loss": 0.0301, "step": 9675 }, { "epoch": 10.682819383259911, - "grad_norm": 0.4922308921813965, + "grad_norm": 0.4599588215351105, "learning_rate": 1.0564102564102565e-05, - "loss": 0.0457, + "loss": 0.0297, "step": 9700 }, { "epoch": 10.710352422907489, - "grad_norm": 0.2943499982357025, + "grad_norm": 0.3897559344768524, "learning_rate": 1.0538461538461539e-05, - "loss": 0.0431, + "loss": 0.026, "step": 9725 }, { "epoch": 10.737885462555067, - "grad_norm": 0.1940225064754486, + "grad_norm": 0.26833340525627136, "learning_rate": 1.0512820512820514e-05, - "loss": 0.0403, + "loss": 0.0297, "step": 9750 }, { "epoch": 10.765418502202643, - "grad_norm": 0.1552239954471588, + "grad_norm": 0.1307441145181656, "learning_rate": 1.0487179487179488e-05, - "loss": 0.0367, + "loss": 0.0269, "step": 9775 }, { "epoch": 10.79295154185022, - "grad_norm": 0.49517762660980225, + "grad_norm": 0.5322534441947937, "learning_rate": 1.0461538461538463e-05, - "loss": 0.0425, + "loss": 0.0308, "step": 9800 }, { "epoch": 10.820484581497798, - "grad_norm": 0.611227810382843, + "grad_norm": 0.42848142981529236, "learning_rate": 1.0435897435897437e-05, - "loss": 0.0385, + "loss": 0.0264, "step": 9825 }, { "epoch": 10.848017621145374, - "grad_norm": 0.19623863697052002, + "grad_norm": 0.24415309727191925, "learning_rate": 1.0410256410256412e-05, - "loss": 0.0364, + "loss": 0.0258, "step": 9850 }, { "epoch": 10.875550660792952, - "grad_norm": 0.4200350344181061, + "grad_norm": 0.2621011734008789, "learning_rate": 1.0384615384615386e-05, - "loss": 0.0408, + "loss": 0.025, "step": 9875 }, { "epoch": 10.90308370044053, - "grad_norm": 0.5670203566551208, + "grad_norm": 0.3474062383174896, "learning_rate": 1.0358974358974361e-05, - "loss": 0.0333, + "loss": 0.0239, "step": 9900 }, { "epoch": 10.930616740088105, - "grad_norm": 0.4078093469142914, + "grad_norm": 0.16342134773731232, "learning_rate": 1.0333333333333335e-05, - "loss": 0.0319, + "loss": 0.0213, "step": 9925 }, { "epoch": 10.958149779735683, - "grad_norm": 0.24493761360645294, + "grad_norm": 0.12468931823968887, "learning_rate": 1.0307692307692307e-05, - "loss": 0.0414, + "loss": 0.0233, "step": 9950 }, { "epoch": 10.98568281938326, - "grad_norm": 0.39487889409065247, + "grad_norm": 0.42761552333831787, "learning_rate": 1.0282051282051282e-05, - "loss": 0.046, + "loss": 0.0253, "step": 9975 }, { "epoch": 11.013215859030836, - "grad_norm": 0.09149141609668732, + "grad_norm": 0.16973541676998138, "learning_rate": 1.0256410256410256e-05, - "loss": 0.037, + "loss": 0.0181, "step": 10000 }, { "epoch": 11.013215859030836, - "eval_cer": 63.11544942854195, - "eval_loss": 0.9283918738365173, - "eval_runtime": 860.3587, - "eval_samples_per_second": 12.298, - "eval_steps_per_second": 3.075, - "eval_wer": 97.53889674681754, + "eval_cer": 24.361766762578384, + "eval_loss": 0.8395898342132568, + "eval_runtime": 1712.1817, + "eval_samples_per_second": 6.18, + "eval_steps_per_second": 1.545, + "eval_wer": 83.80009429514381, "step": 10000 }, { "epoch": 11.040748898678414, - "grad_norm": 0.4592762887477875, + "grad_norm": 0.20153629779815674, "learning_rate": 1.0230769230769231e-05, - "loss": 0.0276, + "loss": 0.0168, "step": 10025 }, { "epoch": 11.068281938325992, - "grad_norm": 0.15113013982772827, + "grad_norm": 0.10129767656326294, "learning_rate": 1.0205128205128205e-05, - "loss": 0.0259, + "loss": 0.0166, "step": 10050 }, { "epoch": 11.095814977973568, - "grad_norm": 0.1503317505121231, + "grad_norm": 0.18236328661441803, "learning_rate": 1.017948717948718e-05, - "loss": 0.0276, + "loss": 0.0207, "step": 10075 }, { "epoch": 11.123348017621145, - "grad_norm": 0.14569109678268433, + "grad_norm": 0.1023649126291275, "learning_rate": 1.0153846153846154e-05, - "loss": 0.0238, + "loss": 0.0151, "step": 10100 }, { "epoch": 11.150881057268723, - "grad_norm": 0.37791919708251953, + "grad_norm": 0.2005663514137268, "learning_rate": 1.012820512820513e-05, - "loss": 0.022, + "loss": 0.0139, "step": 10125 }, { "epoch": 11.178414096916299, - "grad_norm": 0.2850872874259949, + "grad_norm": 0.5880993008613586, "learning_rate": 1.0102564102564103e-05, - "loss": 0.0245, + "loss": 0.0166, "step": 10150 }, { "epoch": 11.205947136563877, - "grad_norm": 0.3191249370574951, + "grad_norm": 0.18318556249141693, "learning_rate": 1.0076923076923078e-05, - "loss": 0.0247, + "loss": 0.0147, "step": 10175 }, { "epoch": 11.233480176211454, - "grad_norm": 0.23166881501674652, + "grad_norm": 0.10596877336502075, "learning_rate": 1.0051282051282052e-05, - "loss": 0.0247, + "loss": 0.0186, "step": 10200 }, { "epoch": 11.26101321585903, - "grad_norm": 0.24626369774341583, + "grad_norm": 0.21908675134181976, "learning_rate": 1.0025641025641027e-05, - "loss": 0.0246, + "loss": 0.0167, "step": 10225 }, { "epoch": 11.288546255506608, - "grad_norm": 0.2534284293651581, + "grad_norm": 0.3086978495121002, "learning_rate": 1e-05, - "loss": 0.0219, + "loss": 0.0155, "step": 10250 }, { "epoch": 11.316079295154186, - "grad_norm": 0.170914426445961, + "grad_norm": 0.09563290327787399, "learning_rate": 9.974358974358974e-06, - "loss": 0.0263, + "loss": 0.0201, "step": 10275 }, { "epoch": 11.343612334801762, - "grad_norm": 0.20018179714679718, + "grad_norm": 0.44515764713287354, "learning_rate": 9.94871794871795e-06, - "loss": 0.0258, + "loss": 0.0168, "step": 10300 }, { "epoch": 11.37114537444934, - "grad_norm": 0.25566136837005615, + "grad_norm": 0.3357783854007721, "learning_rate": 9.923076923076923e-06, - "loss": 0.0229, + "loss": 0.0144, "step": 10325 }, { "epoch": 11.398678414096917, - "grad_norm": 0.16708512604236603, + "grad_norm": 0.10834582895040512, "learning_rate": 9.897435897435899e-06, - "loss": 0.0288, + "loss": 0.0207, "step": 10350 }, { "epoch": 11.426211453744493, - "grad_norm": 0.1267959177494049, + "grad_norm": 0.36438989639282227, "learning_rate": 9.871794871794872e-06, - "loss": 0.0227, + "loss": 0.0159, "step": 10375 }, { "epoch": 11.45374449339207, - "grad_norm": 0.14873263239860535, + "grad_norm": 0.2105102390050888, "learning_rate": 9.846153846153848e-06, - "loss": 0.0293, + "loss": 0.0174, "step": 10400 }, { "epoch": 11.481277533039648, - "grad_norm": 0.4109511077404022, + "grad_norm": 0.29330697655677795, "learning_rate": 9.820512820512821e-06, - "loss": 0.0254, + "loss": 0.0139, "step": 10425 }, { "epoch": 11.508810572687224, - "grad_norm": 0.16823522746562958, + "grad_norm": 0.14886349439620972, "learning_rate": 9.794871794871795e-06, - "loss": 0.0307, + "loss": 0.0168, "step": 10450 }, { "epoch": 11.536343612334802, - "grad_norm": 0.24721799790859222, + "grad_norm": 0.2385999709367752, "learning_rate": 9.76923076923077e-06, - "loss": 0.0226, + "loss": 0.0153, "step": 10475 }, { "epoch": 11.56387665198238, - "grad_norm": 0.1115945503115654, + "grad_norm": 0.12921461462974548, "learning_rate": 9.743589743589744e-06, - "loss": 0.0243, + "loss": 0.0211, "step": 10500 }, { "epoch": 11.591409691629956, - "grad_norm": 0.1863705813884735, + "grad_norm": 0.18354228138923645, "learning_rate": 9.71794871794872e-06, - "loss": 0.0283, + "loss": 0.0182, "step": 10525 }, { "epoch": 11.618942731277533, - "grad_norm": 0.1940588504076004, + "grad_norm": 0.31387805938720703, "learning_rate": 9.692307692307693e-06, - "loss": 0.0243, + "loss": 0.0149, "step": 10550 }, { "epoch": 11.646475770925111, - "grad_norm": 0.4765683114528656, + "grad_norm": 0.2267555296421051, "learning_rate": 9.666666666666667e-06, - "loss": 0.0286, + "loss": 0.0158, "step": 10575 }, { "epoch": 11.674008810572687, - "grad_norm": 0.22577764093875885, + "grad_norm": 0.20056696236133575, "learning_rate": 9.641025641025642e-06, - "loss": 0.0251, + "loss": 0.0185, "step": 10600 }, { "epoch": 11.701541850220265, - "grad_norm": 0.351157009601593, + "grad_norm": 0.33203795552253723, "learning_rate": 9.615384615384616e-06, - "loss": 0.0268, + "loss": 0.0162, "step": 10625 }, { "epoch": 11.729074889867842, - "grad_norm": 0.10466675460338593, + "grad_norm": 0.20103588700294495, "learning_rate": 9.589743589743591e-06, - "loss": 0.0248, + "loss": 0.0168, "step": 10650 }, { "epoch": 11.756607929515418, - "grad_norm": 0.14528048038482666, + "grad_norm": 0.2045770138502121, "learning_rate": 9.564102564102565e-06, - "loss": 0.0243, + "loss": 0.0151, "step": 10675 }, { "epoch": 11.784140969162996, - "grad_norm": 0.48743611574172974, + "grad_norm": 0.35391268134117126, "learning_rate": 9.53846153846154e-06, - "loss": 0.0273, + "loss": 0.0159, "step": 10700 }, { "epoch": 11.811674008810574, - "grad_norm": 0.1229957789182663, + "grad_norm": 0.29899677634239197, "learning_rate": 9.512820512820514e-06, - "loss": 0.0227, + "loss": 0.0161, "step": 10725 }, { "epoch": 11.83920704845815, - "grad_norm": 0.13707463443279266, + "grad_norm": 0.15067006647586823, "learning_rate": 9.487179487179487e-06, - "loss": 0.028, + "loss": 0.0213, "step": 10750 }, { "epoch": 11.866740088105727, - "grad_norm": 0.1293453723192215, + "grad_norm": 0.09212782979011536, "learning_rate": 9.461538461538463e-06, - "loss": 0.0203, + "loss": 0.018, "step": 10775 }, { "epoch": 11.894273127753303, - "grad_norm": 0.30864667892456055, + "grad_norm": 0.2554011642932892, "learning_rate": 9.435897435897436e-06, - "loss": 0.0271, + "loss": 0.0171, "step": 10800 }, { "epoch": 11.92180616740088, - "grad_norm": 0.19544030725955963, + "grad_norm": 0.15091727674007416, "learning_rate": 9.410256410256412e-06, - "loss": 0.0235, + "loss": 0.0173, "step": 10825 }, { "epoch": 11.949339207048459, - "grad_norm": 0.24226927757263184, + "grad_norm": 0.5245828628540039, "learning_rate": 9.384615384615385e-06, - "loss": 0.0275, + "loss": 0.0143, "step": 10850 }, { "epoch": 11.976872246696034, - "grad_norm": 0.31063777208328247, + "grad_norm": 0.2978862524032593, "learning_rate": 9.358974358974359e-06, - "loss": 0.0336, + "loss": 0.018, "step": 10875 }, { "epoch": 12.004405286343612, - "grad_norm": 0.08906027674674988, + "grad_norm": 0.07283396273851395, "learning_rate": 9.333333333333334e-06, - "loss": 0.03, + "loss": 0.0182, "step": 10900 }, { "epoch": 12.03193832599119, - "grad_norm": 0.17722788453102112, + "grad_norm": 0.13879446685314178, "learning_rate": 9.307692307692308e-06, - "loss": 0.016, + "loss": 0.0127, "step": 10925 }, { "epoch": 12.059471365638766, - "grad_norm": 0.1557055562734604, + "grad_norm": 0.16657480597496033, "learning_rate": 9.282051282051283e-06, - "loss": 0.0164, + "loss": 0.014, "step": 10950 }, { "epoch": 12.087004405286343, - "grad_norm": 0.1017475500702858, + "grad_norm": 0.16087482869625092, "learning_rate": 9.256410256410257e-06, - "loss": 0.015, + "loss": 0.0112, "step": 10975 }, { "epoch": 12.114537444933921, - "grad_norm": 0.1605156809091568, + "grad_norm": 0.18895640969276428, "learning_rate": 9.230769230769232e-06, - "loss": 0.0139, + "loss": 0.0117, "step": 11000 }, { "epoch": 12.114537444933921, - "eval_cer": 60.90584985040065, - "eval_loss": 0.9458209872245789, - "eval_runtime": 832.2415, - "eval_samples_per_second": 12.714, - "eval_steps_per_second": 3.179, - "eval_wer": 97.02970297029702, + "eval_cer": 24.002957596322496, + "eval_loss": 0.8591621518135071, + "eval_runtime": 1710.0216, + "eval_samples_per_second": 6.188, + "eval_steps_per_second": 1.547, + "eval_wer": 84.15841584158416, "step": 11000 }, { "epoch": 12.142070484581497, - "grad_norm": 0.24084699153900146, + "grad_norm": 0.08321177214384079, "learning_rate": 9.205128205128206e-06, - "loss": 0.0161, + "loss": 0.0117, "step": 11025 }, { "epoch": 12.169603524229075, - "grad_norm": 0.10124333202838898, + "grad_norm": 0.20632462203502655, "learning_rate": 9.17948717948718e-06, - "loss": 0.0145, + "loss": 0.0131, "step": 11050 }, { "epoch": 12.197136563876652, - "grad_norm": 0.6229532957077026, + "grad_norm": 0.4616119861602783, "learning_rate": 9.153846153846155e-06, - "loss": 0.0203, + "loss": 0.0184, "step": 11075 }, { "epoch": 12.224669603524228, - "grad_norm": 0.13394711911678314, + "grad_norm": 0.89581298828125, "learning_rate": 9.128205128205129e-06, - "loss": 0.0184, + "loss": 0.0155, "step": 11100 }, { "epoch": 12.252202643171806, - "grad_norm": 0.200868159532547, + "grad_norm": 0.06262637674808502, "learning_rate": 9.102564102564104e-06, - "loss": 0.0176, + "loss": 0.0114, "step": 11125 }, { "epoch": 12.279735682819384, - "grad_norm": 0.2978365123271942, + "grad_norm": 0.30928879976272583, "learning_rate": 9.076923076923078e-06, - "loss": 0.017, + "loss": 0.0152, "step": 11150 }, { "epoch": 12.30726872246696, - "grad_norm": 0.07799817621707916, + "grad_norm": 0.36566054821014404, "learning_rate": 9.051282051282051e-06, - "loss": 0.017, + "loss": 0.011, "step": 11175 }, { "epoch": 12.334801762114537, - "grad_norm": 0.20449844002723694, + "grad_norm": 0.21459051966667175, "learning_rate": 9.025641025641027e-06, - "loss": 0.0174, + "loss": 0.0127, "step": 11200 }, { "epoch": 12.362334801762115, - "grad_norm": 0.21774497628211975, + "grad_norm": 0.35859718918800354, "learning_rate": 9e-06, - "loss": 0.0147, + "loss": 0.0125, "step": 11225 }, { "epoch": 12.389867841409691, - "grad_norm": 0.2724749445915222, + "grad_norm": 0.5205895304679871, "learning_rate": 8.974358974358976e-06, - "loss": 0.0161, + "loss": 0.0135, "step": 11250 }, { "epoch": 12.417400881057269, - "grad_norm": 0.09058816730976105, + "grad_norm": 0.09275946021080017, "learning_rate": 8.94871794871795e-06, - "loss": 0.0161, + "loss": 0.0124, "step": 11275 }, { "epoch": 12.444933920704846, - "grad_norm": 0.15494456887245178, + "grad_norm": 0.4651426076889038, "learning_rate": 8.923076923076925e-06, - "loss": 0.0183, + "loss": 0.0118, "step": 11300 }, { "epoch": 12.472466960352422, - "grad_norm": 0.16612644493579865, + "grad_norm": 0.1863466203212738, "learning_rate": 8.897435897435898e-06, - "loss": 0.0164, + "loss": 0.0116, "step": 11325 }, { "epoch": 12.5, - "grad_norm": 0.20393668115139008, + "grad_norm": 0.467035174369812, "learning_rate": 8.871794871794872e-06, - "loss": 0.0193, + "loss": 0.0111, "step": 11350 }, { "epoch": 12.527533039647578, - "grad_norm": 0.1749681979417801, + "grad_norm": 0.13402460515499115, "learning_rate": 8.846153846153847e-06, - "loss": 0.0196, + "loss": 0.0147, "step": 11375 }, { "epoch": 12.555066079295154, - "grad_norm": 0.1079695075750351, + "grad_norm": 0.14492958784103394, "learning_rate": 8.820512820512821e-06, - "loss": 0.0203, + "loss": 0.0128, "step": 11400 }, { "epoch": 12.582599118942731, - "grad_norm": 0.2510460317134857, + "grad_norm": 0.17025631666183472, "learning_rate": 8.794871794871796e-06, - "loss": 0.0174, + "loss": 0.0135, "step": 11425 }, { "epoch": 12.610132158590309, - "grad_norm": 0.11694565415382385, + "grad_norm": 0.43454456329345703, "learning_rate": 8.76923076923077e-06, - "loss": 0.0163, + "loss": 0.0141, "step": 11450 }, { "epoch": 12.637665198237885, - "grad_norm": 0.37202000617980957, + "grad_norm": 0.4362468421459198, "learning_rate": 8.743589743589743e-06, - "loss": 0.0181, + "loss": 0.0167, "step": 11475 }, { "epoch": 12.665198237885463, - "grad_norm": 0.16289299726486206, + "grad_norm": 0.20894859731197357, "learning_rate": 8.717948717948719e-06, - "loss": 0.0184, + "loss": 0.019, "step": 11500 }, { "epoch": 12.69273127753304, - "grad_norm": 0.22636614739894867, + "grad_norm": 0.37119993567466736, "learning_rate": 8.692307692307692e-06, - "loss": 0.0183, + "loss": 0.0136, "step": 11525 }, { "epoch": 12.720264317180616, - "grad_norm": 0.1628304421901703, + "grad_norm": 0.06687525659799576, "learning_rate": 8.666666666666668e-06, - "loss": 0.0195, + "loss": 0.0133, "step": 11550 }, { "epoch": 12.747797356828194, - "grad_norm": 0.1048491895198822, + "grad_norm": 0.1411520540714264, "learning_rate": 8.641025641025641e-06, - "loss": 0.0154, + "loss": 0.0103, "step": 11575 }, { "epoch": 12.775330396475772, - "grad_norm": 0.4491131007671356, + "grad_norm": 0.5004802346229553, "learning_rate": 8.615384615384617e-06, - "loss": 0.0179, + "loss": 0.0139, "step": 11600 }, { "epoch": 12.802863436123348, - "grad_norm": 0.28522998094558716, + "grad_norm": 0.12091943621635437, "learning_rate": 8.58974358974359e-06, - "loss": 0.0189, + "loss": 0.0151, "step": 11625 }, { "epoch": 12.830396475770925, - "grad_norm": 0.24292542040348053, + "grad_norm": 0.39368936419487, "learning_rate": 8.564102564102564e-06, - "loss": 0.0165, + "loss": 0.0155, "step": 11650 }, { "epoch": 12.857929515418503, - "grad_norm": 0.44718441367149353, + "grad_norm": 0.1569003313779831, "learning_rate": 8.53846153846154e-06, - "loss": 0.0182, + "loss": 0.0144, "step": 11675 }, { "epoch": 12.885462555066079, - "grad_norm": 0.1012931615114212, + "grad_norm": 0.2494770586490631, "learning_rate": 8.512820512820513e-06, - "loss": 0.0183, + "loss": 0.0154, "step": 11700 }, { "epoch": 12.912995594713657, - "grad_norm": 0.14866550266742706, + "grad_norm": 0.2350165992975235, "learning_rate": 8.487179487179488e-06, - "loss": 0.0191, + "loss": 0.0154, "step": 11725 }, { "epoch": 12.940528634361234, - "grad_norm": 0.2306414097547531, + "grad_norm": 0.25457248091697693, "learning_rate": 8.461538461538462e-06, - "loss": 0.0202, + "loss": 0.0146, "step": 11750 }, { "epoch": 12.96806167400881, - "grad_norm": 0.2795422077178955, + "grad_norm": 0.5396599769592285, "learning_rate": 8.435897435897436e-06, - "loss": 0.0209, + "loss": 0.0164, "step": 11775 }, { "epoch": 12.995594713656388, - "grad_norm": 0.22507907450199127, + "grad_norm": 0.08093304932117462, "learning_rate": 8.410256410256411e-06, - "loss": 0.021, + "loss": 0.0152, "step": 11800 }, { "epoch": 13.023127753303966, - "grad_norm": 0.10657211393117905, + "grad_norm": 0.2992173433303833, "learning_rate": 8.384615384615385e-06, - "loss": 0.0133, + "loss": 0.0135, "step": 11825 }, { "epoch": 13.050660792951541, - "grad_norm": 0.3987150490283966, + "grad_norm": 0.19341129064559937, "learning_rate": 8.35897435897436e-06, - "loss": 0.0123, + "loss": 0.0126, "step": 11850 }, { "epoch": 13.07819383259912, - "grad_norm": 0.16128714382648468, + "grad_norm": 0.3333016633987427, "learning_rate": 8.333333333333334e-06, - "loss": 0.0122, + "loss": 0.0172, "step": 11875 }, { "epoch": 13.105726872246697, - "grad_norm": 0.13785377144813538, + "grad_norm": 0.22165773808956146, "learning_rate": 8.307692307692309e-06, - "loss": 0.0101, + "loss": 0.0107, "step": 11900 }, { "epoch": 13.133259911894273, - "grad_norm": 0.180278480052948, + "grad_norm": 0.14357928931713104, "learning_rate": 8.282051282051283e-06, - "loss": 0.0126, + "loss": 0.0125, "step": 11925 }, { "epoch": 13.16079295154185, - "grad_norm": 0.06596548855304718, + "grad_norm": 0.46441957354545593, "learning_rate": 8.256410256410256e-06, - "loss": 0.0105, + "loss": 0.0098, "step": 11950 }, { "epoch": 13.188325991189428, - "grad_norm": 0.17440514266490936, + "grad_norm": 0.0940885990858078, "learning_rate": 8.230769230769232e-06, - "loss": 0.0109, + "loss": 0.0126, "step": 11975 }, { "epoch": 13.215859030837004, - "grad_norm": 0.16661639511585236, + "grad_norm": 0.09288233518600464, "learning_rate": 8.205128205128205e-06, - "loss": 0.013, + "loss": 0.0111, "step": 12000 }, { "epoch": 13.215859030837004, - "eval_cer": 57.880618573246366, - "eval_loss": 0.9624491930007935, - "eval_runtime": 831.8616, - "eval_samples_per_second": 12.72, - "eval_steps_per_second": 3.181, - "eval_wer": 96.82225365393681, + "eval_cer": 24.353742276432083, + "eval_loss": 0.8609709739685059, + "eval_runtime": 1703.4403, + "eval_samples_per_second": 6.212, + "eval_steps_per_second": 1.553, + "eval_wer": 83.83781235266385, "step": 12000 }, { "epoch": 13.243392070484582, - "grad_norm": 0.23276664316654205, + "grad_norm": 0.2091829776763916, "learning_rate": 8.17948717948718e-06, - "loss": 0.0127, + "loss": 0.0098, "step": 12025 }, { "epoch": 13.270925110132158, - "grad_norm": 0.11635535210371017, + "grad_norm": 0.02503281459212303, "learning_rate": 8.153846153846154e-06, - "loss": 0.0095, + "loss": 0.0066, "step": 12050 }, { "epoch": 13.298458149779735, - "grad_norm": 0.16364231705665588, + "grad_norm": 0.2379840463399887, "learning_rate": 8.12820512820513e-06, - "loss": 0.0114, + "loss": 0.0106, "step": 12075 }, { "epoch": 13.325991189427313, - "grad_norm": 0.13409483432769775, + "grad_norm": 0.06553735584020615, "learning_rate": 8.102564102564103e-06, - "loss": 0.0122, + "loss": 0.0114, "step": 12100 }, { "epoch": 13.353524229074889, - "grad_norm": 0.10241974890232086, + "grad_norm": 0.05876093730330467, "learning_rate": 8.076923076923077e-06, - "loss": 0.0121, + "loss": 0.0089, "step": 12125 }, { "epoch": 13.381057268722467, - "grad_norm": 0.07747479528188705, + "grad_norm": 0.0722096711397171, "learning_rate": 8.051282051282052e-06, - "loss": 0.0125, + "loss": 0.01, "step": 12150 }, { "epoch": 13.408590308370044, - "grad_norm": 0.08573091775178909, + "grad_norm": 0.16798360645771027, "learning_rate": 8.025641025641026e-06, - "loss": 0.0125, + "loss": 0.0112, "step": 12175 }, { "epoch": 13.43612334801762, - "grad_norm": 0.15942828357219696, + "grad_norm": 0.08319924026727676, "learning_rate": 8.000000000000001e-06, - "loss": 0.0127, + "loss": 0.0164, "step": 12200 }, { "epoch": 13.463656387665198, - "grad_norm": 0.22725528478622437, + "grad_norm": 0.03909989818930626, "learning_rate": 7.974358974358975e-06, - "loss": 0.0135, + "loss": 0.0124, "step": 12225 }, { "epoch": 13.491189427312776, - "grad_norm": 0.07710346579551697, + "grad_norm": 0.3100120425224304, "learning_rate": 7.948717948717949e-06, - "loss": 0.01, + "loss": 0.0103, "step": 12250 }, { "epoch": 13.518722466960352, - "grad_norm": 0.06472910940647125, + "grad_norm": 0.20275218784809113, "learning_rate": 7.923076923076924e-06, - "loss": 0.0126, + "loss": 0.0124, "step": 12275 }, { "epoch": 13.54625550660793, - "grad_norm": 0.20756784081459045, + "grad_norm": 0.43703603744506836, "learning_rate": 7.897435897435898e-06, - "loss": 0.0135, + "loss": 0.0126, "step": 12300 }, { "epoch": 13.573788546255507, - "grad_norm": 0.1374279409646988, + "grad_norm": 0.17036165297031403, "learning_rate": 7.871794871794873e-06, - "loss": 0.0128, + "loss": 0.0123, "step": 12325 }, { "epoch": 13.601321585903083, - "grad_norm": 0.08940647542476654, + "grad_norm": 0.2403183877468109, "learning_rate": 7.846153846153847e-06, - "loss": 0.0122, + "loss": 0.0124, "step": 12350 }, { "epoch": 13.62885462555066, - "grad_norm": 0.174547016620636, + "grad_norm": 0.14506351947784424, "learning_rate": 7.820512820512822e-06, - "loss": 0.0131, + "loss": 0.0113, "step": 12375 }, { "epoch": 13.656387665198238, - "grad_norm": 0.08119652420282364, + "grad_norm": 0.17597095668315887, "learning_rate": 7.794871794871796e-06, - "loss": 0.0128, + "loss": 0.013, "step": 12400 }, { "epoch": 13.683920704845814, - "grad_norm": 0.08605458587408066, + "grad_norm": 0.10913864523172379, "learning_rate": 7.76923076923077e-06, - "loss": 0.0121, + "loss": 0.0111, "step": 12425 }, { "epoch": 13.711453744493392, - "grad_norm": 0.06772664934396744, + "grad_norm": 0.09308384358882904, "learning_rate": 7.743589743589745e-06, - "loss": 0.0124, + "loss": 0.0107, "step": 12450 }, { "epoch": 13.73898678414097, - "grad_norm": 0.1438221037387848, + "grad_norm": 0.15387412905693054, "learning_rate": 7.717948717948718e-06, - "loss": 0.0118, + "loss": 0.0081, "step": 12475 }, { "epoch": 13.766519823788546, - "grad_norm": 0.24703101813793182, + "grad_norm": 0.1064794659614563, "learning_rate": 7.692307692307694e-06, - "loss": 0.013, + "loss": 0.0103, "step": 12500 }, { "epoch": 13.794052863436123, - "grad_norm": 0.06869100034236908, + "grad_norm": 0.461704283952713, "learning_rate": 7.666666666666667e-06, - "loss": 0.0105, + "loss": 0.0099, "step": 12525 }, { "epoch": 13.821585903083701, - "grad_norm": 0.2140737622976303, + "grad_norm": 0.1432102471590042, "learning_rate": 7.641025641025641e-06, - "loss": 0.0153, + "loss": 0.012, "step": 12550 }, { "epoch": 13.849118942731277, - "grad_norm": 0.14644251763820648, + "grad_norm": 0.07016371935606003, "learning_rate": 7.615384615384615e-06, - "loss": 0.0113, + "loss": 0.0102, "step": 12575 }, { "epoch": 13.876651982378855, - "grad_norm": 0.0690101683139801, + "grad_norm": 0.07148536294698715, "learning_rate": 7.58974358974359e-06, - "loss": 0.0114, + "loss": 0.0135, "step": 12600 }, { "epoch": 13.904185022026432, - "grad_norm": 0.07625989615917206, + "grad_norm": 0.11293036490678787, "learning_rate": 7.564102564102564e-06, - "loss": 0.0117, + "loss": 0.0119, "step": 12625 }, { "epoch": 13.931718061674008, - "grad_norm": 0.06748715043067932, + "grad_norm": 0.12203430384397507, "learning_rate": 7.538461538461539e-06, - "loss": 0.0126, + "loss": 0.0133, "step": 12650 }, { "epoch": 13.959251101321586, - "grad_norm": 0.11845114827156067, + "grad_norm": 0.18345963954925537, "learning_rate": 7.512820512820513e-06, - "loss": 0.0121, + "loss": 0.0131, "step": 12675 }, { "epoch": 13.986784140969164, - "grad_norm": 0.16305984556674957, + "grad_norm": 0.135273739695549, "learning_rate": 7.487179487179488e-06, - "loss": 0.0142, + "loss": 0.0144, "step": 12700 }, { "epoch": 14.01431718061674, - "grad_norm": 0.04868720471858978, + "grad_norm": 0.07620234042406082, "learning_rate": 7.461538461538462e-06, - "loss": 0.0101, + "loss": 0.0095, "step": 12725 }, { "epoch": 14.041850220264317, - "grad_norm": 0.19972330331802368, + "grad_norm": 0.02833453379571438, "learning_rate": 7.435897435897437e-06, - "loss": 0.0078, + "loss": 0.0064, "step": 12750 }, { "epoch": 14.069383259911895, - "grad_norm": 0.06579900532960892, + "grad_norm": 0.19002589583396912, "learning_rate": 7.410256410256411e-06, - "loss": 0.0097, + "loss": 0.0088, "step": 12775 }, { "epoch": 14.09691629955947, - "grad_norm": 0.07141165435314178, + "grad_norm": 0.15754035115242004, "learning_rate": 7.384615384615386e-06, - "loss": 0.0075, + "loss": 0.0055, "step": 12800 }, { "epoch": 14.124449339207048, - "grad_norm": 0.0851076990365982, + "grad_norm": 0.43471795320510864, "learning_rate": 7.35897435897436e-06, - "loss": 0.0087, + "loss": 0.0088, "step": 12825 }, { "epoch": 14.151982378854626, - "grad_norm": 0.08377552032470703, + "grad_norm": 0.2886705696582794, "learning_rate": 7.333333333333333e-06, - "loss": 0.0097, + "loss": 0.0074, "step": 12850 }, { "epoch": 14.179515418502202, - "grad_norm": 0.05744962766766548, + "grad_norm": 0.2692849040031433, "learning_rate": 7.307692307692308e-06, - "loss": 0.007, + "loss": 0.0141, "step": 12875 }, { "epoch": 14.20704845814978, - "grad_norm": 0.04978534206748009, + "grad_norm": 0.12292686104774475, "learning_rate": 7.282051282051282e-06, - "loss": 0.0079, + "loss": 0.0151, "step": 12900 }, { "epoch": 14.234581497797357, - "grad_norm": 0.07835716754198074, + "grad_norm": 0.36948704719543457, "learning_rate": 7.256410256410257e-06, - "loss": 0.0082, + "loss": 0.0083, "step": 12925 }, { "epoch": 14.262114537444933, - "grad_norm": 0.1253698170185089, + "grad_norm": 0.19203431904315948, "learning_rate": 7.230769230769231e-06, - "loss": 0.0073, + "loss": 0.0066, "step": 12950 }, { "epoch": 14.289647577092511, - "grad_norm": 0.06475073099136353, + "grad_norm": 0.13032004237174988, "learning_rate": 7.205128205128206e-06, - "loss": 0.0076, + "loss": 0.0117, "step": 12975 }, { "epoch": 14.317180616740089, - "grad_norm": 0.05178418755531311, + "grad_norm": 0.4582443833351135, "learning_rate": 7.17948717948718e-06, - "loss": 0.008, + "loss": 0.0088, "step": 13000 }, { "epoch": 14.317180616740089, - "eval_cer": 57.12803640824, - "eval_loss": 0.979995608329773, - "eval_runtime": 852.1277, - "eval_samples_per_second": 12.417, - "eval_steps_per_second": 3.105, - "eval_wer": 96.71852899575671, + "eval_cer": 24.632306581224995, + "eval_loss": 0.8743005394935608, + "eval_runtime": 1717.1726, + "eval_samples_per_second": 6.162, + "eval_steps_per_second": 1.541, + "eval_wer": 84.0924092409241, "step": 13000 }, { "epoch": 14.344713656387665, - "grad_norm": 0.07015621662139893, + "grad_norm": 0.17969651520252228, "learning_rate": 7.153846153846155e-06, - "loss": 0.0081, + "loss": 0.0088, "step": 13025 }, { "epoch": 14.372246696035242, - "grad_norm": 0.061675600707530975, + "grad_norm": 0.2400025725364685, "learning_rate": 7.128205128205129e-06, - "loss": 0.0076, + "loss": 0.0112, "step": 13050 }, { "epoch": 14.39977973568282, - "grad_norm": 0.1807292252779007, + "grad_norm": 0.1150280013680458, "learning_rate": 7.102564102564104e-06, - "loss": 0.009, + "loss": 0.0079, "step": 13075 }, { "epoch": 14.427312775330396, - "grad_norm": 0.05348524823784828, + "grad_norm": 0.676898717880249, "learning_rate": 7.076923076923078e-06, - "loss": 0.0085, + "loss": 0.0097, "step": 13100 }, { "epoch": 14.454845814977974, - "grad_norm": 0.040988489985466, + "grad_norm": 0.22521457076072693, "learning_rate": 7.051282051282053e-06, - "loss": 0.0082, + "loss": 0.0067, "step": 13125 }, { "epoch": 14.482378854625551, - "grad_norm": 0.06997233629226685, + "grad_norm": 0.05301696062088013, "learning_rate": 7.025641025641025e-06, - "loss": 0.0081, + "loss": 0.0082, "step": 13150 }, { "epoch": 14.509911894273127, - "grad_norm": 0.08544085174798965, + "grad_norm": 0.056506820023059845, "learning_rate": 7e-06, - "loss": 0.0074, + "loss": 0.0068, "step": 13175 }, { "epoch": 14.537444933920705, - "grad_norm": 0.16296857595443726, + "grad_norm": 0.31176385283470154, "learning_rate": 6.974358974358974e-06, - "loss": 0.0086, + "loss": 0.0076, "step": 13200 }, { "epoch": 14.564977973568283, - "grad_norm": 0.04533977061510086, + "grad_norm": 0.27583009004592896, "learning_rate": 6.948717948717949e-06, - "loss": 0.0074, + "loss": 0.0069, "step": 13225 }, { "epoch": 14.592511013215859, - "grad_norm": 0.11487758159637451, + "grad_norm": 0.04734191671013832, "learning_rate": 6.923076923076923e-06, - "loss": 0.0088, + "loss": 0.0102, "step": 13250 }, { "epoch": 14.620044052863436, - "grad_norm": 0.05602938309311867, + "grad_norm": 0.26911285519599915, "learning_rate": 6.897435897435898e-06, - "loss": 0.0078, + "loss": 0.0101, "step": 13275 }, { "epoch": 14.647577092511014, - "grad_norm": 0.06876658648252487, + "grad_norm": 0.07513406872749329, "learning_rate": 6.871794871794872e-06, - "loss": 0.009, + "loss": 0.008, "step": 13300 }, { "epoch": 14.67511013215859, - "grad_norm": 0.21141541004180908, + "grad_norm": 0.2528563141822815, "learning_rate": 6.846153846153847e-06, - "loss": 0.008, + "loss": 0.0065, "step": 13325 }, { "epoch": 14.702643171806168, - "grad_norm": 0.25995275378227234, + "grad_norm": 0.052279576659202576, "learning_rate": 6.820512820512821e-06, - "loss": 0.0083, + "loss": 0.0092, "step": 13350 }, { "epoch": 14.730176211453745, - "grad_norm": 0.07600809633731842, + "grad_norm": 0.16867485642433167, "learning_rate": 6.794871794871796e-06, - "loss": 0.0088, + "loss": 0.0063, "step": 13375 }, { "epoch": 14.757709251101321, - "grad_norm": 0.08052767813205719, + "grad_norm": 0.09278307855129242, "learning_rate": 6.76923076923077e-06, - "loss": 0.0086, + "loss": 0.007, "step": 13400 }, { "epoch": 14.785242290748899, - "grad_norm": 0.0512065626680851, + "grad_norm": 0.03741838410496712, "learning_rate": 6.743589743589745e-06, - "loss": 0.0077, + "loss": 0.0049, "step": 13425 }, { "epoch": 14.812775330396477, - "grad_norm": 0.06173788756132126, + "grad_norm": 0.07582259178161621, "learning_rate": 6.717948717948718e-06, - "loss": 0.0082, + "loss": 0.0089, "step": 13450 }, { "epoch": 14.840308370044053, - "grad_norm": 0.05340331420302391, + "grad_norm": 0.22878766059875488, "learning_rate": 6.692307692307692e-06, - "loss": 0.0082, + "loss": 0.006, "step": 13475 }, { "epoch": 14.86784140969163, - "grad_norm": 0.05996181070804596, + "grad_norm": 0.19926956295967102, "learning_rate": 6.666666666666667e-06, - "loss": 0.0087, + "loss": 0.0079, "step": 13500 }, { "epoch": 14.895374449339208, - "grad_norm": 0.05534656345844269, + "grad_norm": 0.08697038888931274, "learning_rate": 6.641025641025641e-06, "loss": 0.0092, "step": 13525 }, { "epoch": 14.922907488986784, - "grad_norm": 0.0450417622923851, + "grad_norm": 0.05428479611873627, "learning_rate": 6.615384615384616e-06, - "loss": 0.0072, + "loss": 0.0055, "step": 13550 }, { "epoch": 14.950440528634362, - "grad_norm": 0.07858394831418991, + "grad_norm": 0.06628289073705673, "learning_rate": 6.58974358974359e-06, - "loss": 0.0081, + "loss": 0.0068, "step": 13575 }, { "epoch": 14.97797356828194, - "grad_norm": 0.12585800886154175, + "grad_norm": 0.07876273989677429, "learning_rate": 6.564102564102565e-06, - "loss": 0.0097, + "loss": 0.0073, "step": 13600 }, { "epoch": 15.005506607929515, - "grad_norm": 0.08572439104318619, + "grad_norm": 0.047706760466098785, "learning_rate": 6.538461538461539e-06, - "loss": 0.0096, + "loss": 0.0087, "step": 13625 }, { "epoch": 15.033039647577093, - "grad_norm": 0.051955390721559525, + "grad_norm": 0.06642945855855942, "learning_rate": 6.512820512820514e-06, - "loss": 0.0057, + "loss": 0.0043, "step": 13650 }, { "epoch": 15.060572687224669, - "grad_norm": 0.04480992630124092, + "grad_norm": 0.028193380683660507, "learning_rate": 6.487179487179488e-06, - "loss": 0.0052, + "loss": 0.0038, "step": 13675 }, { "epoch": 15.088105726872246, - "grad_norm": 0.03293057531118393, + "grad_norm": 0.11379621922969818, "learning_rate": 6.461538461538463e-06, - "loss": 0.0057, + "loss": 0.0049, "step": 13700 }, { "epoch": 15.115638766519824, - "grad_norm": 0.0531000941991806, + "grad_norm": 0.09332601726055145, "learning_rate": 6.435897435897437e-06, - "loss": 0.0059, + "loss": 0.0082, "step": 13725 }, { "epoch": 15.1431718061674, - "grad_norm": 0.030901802703738213, + "grad_norm": 0.029045449569821358, "learning_rate": 6.410256410256412e-06, - "loss": 0.0058, + "loss": 0.0054, "step": 13750 }, { "epoch": 15.170704845814978, - "grad_norm": 0.04868703335523605, + "grad_norm": 0.025950396433472633, "learning_rate": 6.384615384615384e-06, - "loss": 0.0056, + "loss": 0.0061, "step": 13775 }, { "epoch": 15.198237885462555, - "grad_norm": 0.045157793909311295, + "grad_norm": 0.09153343737125397, "learning_rate": 6.358974358974359e-06, - "loss": 0.0054, + "loss": 0.0064, "step": 13800 }, { "epoch": 15.225770925110131, - "grad_norm": 0.07468298077583313, + "grad_norm": 0.02982979267835617, "learning_rate": 6.333333333333333e-06, - "loss": 0.0071, + "loss": 0.0079, "step": 13825 }, { "epoch": 15.253303964757709, - "grad_norm": 0.06386591494083405, + "grad_norm": 0.3530775010585785, "learning_rate": 6.307692307692308e-06, - "loss": 0.006, + "loss": 0.0075, "step": 13850 }, { "epoch": 15.280837004405287, - "grad_norm": 0.04140784963965416, + "grad_norm": 0.7259305715560913, "learning_rate": 6.282051282051282e-06, - "loss": 0.006, + "loss": 0.0057, "step": 13875 }, { "epoch": 15.308370044052863, - "grad_norm": 0.09238462150096893, + "grad_norm": 0.04625716805458069, "learning_rate": 6.256410256410257e-06, - "loss": 0.0061, + "loss": 0.0051, "step": 13900 }, { "epoch": 15.33590308370044, - "grad_norm": 0.2956899106502533, + "grad_norm": 0.04628950357437134, "learning_rate": 6.230769230769231e-06, - "loss": 0.006, + "loss": 0.0059, "step": 13925 }, { "epoch": 15.363436123348018, - "grad_norm": 0.048573561012744904, + "grad_norm": 0.020349033176898956, "learning_rate": 6.205128205128206e-06, - "loss": 0.0069, + "loss": 0.0088, "step": 13950 }, { "epoch": 15.390969162995594, - "grad_norm": 0.17084334790706635, + "grad_norm": 0.9620158672332764, "learning_rate": 6.17948717948718e-06, - "loss": 0.0068, + "loss": 0.006, "step": 13975 }, { "epoch": 15.418502202643172, - "grad_norm": 0.06327039748430252, + "grad_norm": 0.12168940901756287, "learning_rate": 6.153846153846155e-06, - "loss": 0.0062, + "loss": 0.0112, "step": 14000 }, { "epoch": 15.418502202643172, - "eval_cer": 55.30074627721161, - "eval_loss": 0.9948021769523621, - "eval_runtime": 827.2524, - "eval_samples_per_second": 12.791, - "eval_steps_per_second": 3.199, - "eval_wer": 96.67138142385667, + "eval_cer": 24.93437116687492, + "eval_loss": 0.8769130110740662, + "eval_runtime": 1701.8659, + "eval_samples_per_second": 6.217, + "eval_steps_per_second": 1.555, + "eval_wer": 84.18670438472418, "step": 14000 }, { "epoch": 15.44603524229075, - "grad_norm": 0.14174892008304596, + "grad_norm": 0.0730656310915947, "learning_rate": 6.128205128205129e-06, - "loss": 0.0061, + "loss": 0.0051, "step": 14025 }, { "epoch": 15.473568281938325, - "grad_norm": 0.05173831433057785, + "grad_norm": 0.38274314999580383, "learning_rate": 6.102564102564104e-06, - "loss": 0.0051, + "loss": 0.0118, "step": 14050 }, { "epoch": 15.501101321585903, - "grad_norm": 0.10093101114034653, + "grad_norm": 0.6983866691589355, "learning_rate": 6.076923076923077e-06, - "loss": 0.0061, + "loss": 0.0126, "step": 14075 }, { "epoch": 15.52863436123348, - "grad_norm": 0.03614628687500954, + "grad_norm": 0.14446529746055603, "learning_rate": 6.051282051282051e-06, - "loss": 0.0052, + "loss": 0.0063, "step": 14100 }, { "epoch": 15.556167400881057, - "grad_norm": 0.13150542974472046, + "grad_norm": 0.03284487873315811, "learning_rate": 6.025641025641026e-06, - "loss": 0.0055, + "loss": 0.0072, "step": 14125 }, { "epoch": 15.583700440528634, - "grad_norm": 0.13639768958091736, + "grad_norm": 0.14825402200222015, "learning_rate": 6e-06, - "loss": 0.0058, + "loss": 0.0074, "step": 14150 }, { "epoch": 15.611233480176212, - "grad_norm": 0.04362841695547104, + "grad_norm": 0.08878491818904877, "learning_rate": 5.974358974358975e-06, - "loss": 0.0064, + "loss": 0.0067, "step": 14175 }, { "epoch": 15.638766519823788, - "grad_norm": 0.054506704211235046, + "grad_norm": 0.36057594418525696, "learning_rate": 5.948717948717949e-06, - "loss": 0.0059, + "loss": 0.0112, "step": 14200 }, { "epoch": 15.666299559471366, - "grad_norm": 0.059661999344825745, + "grad_norm": 0.09728468954563141, "learning_rate": 5.923076923076924e-06, - "loss": 0.0055, + "loss": 0.007, "step": 14225 }, { "epoch": 15.693832599118943, - "grad_norm": 0.06359941512346268, + "grad_norm": 0.2802368402481079, "learning_rate": 5.897435897435898e-06, - "loss": 0.0063, + "loss": 0.0065, "step": 14250 }, { "epoch": 15.72136563876652, - "grad_norm": 0.05899536609649658, + "grad_norm": 0.05950731784105301, "learning_rate": 5.871794871794873e-06, - "loss": 0.0062, + "loss": 0.0086, "step": 14275 }, { "epoch": 15.748898678414097, - "grad_norm": 0.04062044993042946, + "grad_norm": 0.025011401623487473, "learning_rate": 5.846153846153847e-06, - "loss": 0.006, + "loss": 0.0042, "step": 14300 }, { "epoch": 15.776431718061675, - "grad_norm": 0.07000496983528137, + "grad_norm": 0.037724148482084274, "learning_rate": 5.820512820512822e-06, - "loss": 0.0057, + "loss": 0.0043, "step": 14325 }, { "epoch": 15.80396475770925, - "grad_norm": 0.14449462294578552, + "grad_norm": 0.05708827078342438, "learning_rate": 5.794871794871796e-06, - "loss": 0.0061, + "loss": 0.0051, "step": 14350 }, { "epoch": 15.831497797356828, - "grad_norm": 0.03734823316335678, + "grad_norm": 0.1876123994588852, "learning_rate": 5.769230769230769e-06, - "loss": 0.0055, + "loss": 0.0044, "step": 14375 }, { "epoch": 15.859030837004406, - "grad_norm": 0.08209270238876343, + "grad_norm": 0.059942033141851425, "learning_rate": 5.743589743589743e-06, - "loss": 0.0059, + "loss": 0.0049, "step": 14400 }, { "epoch": 15.886563876651982, - "grad_norm": 0.11653455346822739, + "grad_norm": 0.10881105065345764, "learning_rate": 5.717948717948718e-06, - "loss": 0.0059, + "loss": 0.0053, "step": 14425 }, { "epoch": 15.91409691629956, - "grad_norm": 0.09956145286560059, + "grad_norm": 0.22075070440769196, "learning_rate": 5.692307692307692e-06, - "loss": 0.006, + "loss": 0.0055, "step": 14450 }, { "epoch": 15.941629955947137, - "grad_norm": 0.06071498617529869, + "grad_norm": 0.2150040864944458, "learning_rate": 5.666666666666667e-06, - "loss": 0.0056, + "loss": 0.0077, "step": 14475 }, { "epoch": 15.969162995594713, - "grad_norm": 0.05018926039338112, + "grad_norm": 0.259221613407135, "learning_rate": 5.641025641025641e-06, - "loss": 0.0054, + "loss": 0.0044, "step": 14500 }, { "epoch": 15.996696035242291, - "grad_norm": 0.04555997997522354, + "grad_norm": 0.08321662992238998, "learning_rate": 5.615384615384616e-06, - "loss": 0.0056, + "loss": 0.0052, "step": 14525 }, { "epoch": 16.024229074889867, - "grad_norm": 0.03836214169859886, + "grad_norm": 0.07159877568483353, "learning_rate": 5.58974358974359e-06, - "loss": 0.0048, + "loss": 0.0043, "step": 14550 }, { "epoch": 16.051762114537446, - "grad_norm": 0.03030628338456154, + "grad_norm": 0.04299210384488106, "learning_rate": 5.564102564102565e-06, - "loss": 0.0043, + "loss": 0.0061, "step": 14575 }, { "epoch": 16.079295154185022, - "grad_norm": 0.027087492868304253, + "grad_norm": 0.013192574493587017, "learning_rate": 5.538461538461539e-06, - "loss": 0.0041, + "loss": 0.0069, "step": 14600 }, { "epoch": 16.106828193832598, - "grad_norm": 0.12359996885061264, + "grad_norm": 0.027391914278268814, "learning_rate": 5.512820512820514e-06, - "loss": 0.0042, + "loss": 0.0038, "step": 14625 }, { "epoch": 16.134361233480178, - "grad_norm": 0.026834385469555855, + "grad_norm": 0.04832287132740021, "learning_rate": 5.487179487179488e-06, - "loss": 0.0049, + "loss": 0.0048, "step": 14650 }, { "epoch": 16.161894273127754, - "grad_norm": 0.09593966603279114, + "grad_norm": 0.18682819604873657, "learning_rate": 5.461538461538461e-06, - "loss": 0.0045, + "loss": 0.0039, "step": 14675 }, { "epoch": 16.18942731277533, - "grad_norm": 0.03235497698187828, + "grad_norm": 0.0538317896425724, "learning_rate": 5.435897435897436e-06, - "loss": 0.0043, + "loss": 0.0061, "step": 14700 }, { "epoch": 16.21696035242291, - "grad_norm": 0.032602131366729736, + "grad_norm": 0.024896269664168358, "learning_rate": 5.41025641025641e-06, - "loss": 0.0055, + "loss": 0.0063, "step": 14725 }, { "epoch": 16.244493392070485, - "grad_norm": 0.0448690690100193, + "grad_norm": 0.0841188058257103, "learning_rate": 5.384615384615385e-06, - "loss": 0.0042, + "loss": 0.0033, "step": 14750 }, { "epoch": 16.27202643171806, - "grad_norm": 0.03582735359668732, + "grad_norm": 0.04172555357217789, "learning_rate": 5.358974358974359e-06, - "loss": 0.0045, + "loss": 0.0057, "step": 14775 }, { "epoch": 16.29955947136564, - "grad_norm": 0.039651673287153244, + "grad_norm": 0.026171807199716568, "learning_rate": 5.333333333333334e-06, - "loss": 0.0045, + "loss": 0.0056, "step": 14800 }, { "epoch": 16.327092511013216, - "grad_norm": 0.029694747179746628, + "grad_norm": 0.3256167471408844, "learning_rate": 5.307692307692308e-06, - "loss": 0.0041, + "loss": 0.0065, "step": 14825 }, { "epoch": 16.354625550660792, - "grad_norm": 0.027761176228523254, + "grad_norm": 0.08626765012741089, "learning_rate": 5.282051282051283e-06, - "loss": 0.0045, + "loss": 0.0031, "step": 14850 }, { "epoch": 16.38215859030837, - "grad_norm": 0.038026902824640274, + "grad_norm": 0.02025369182229042, "learning_rate": 5.256410256410257e-06, - "loss": 0.005, + "loss": 0.0052, "step": 14875 }, { "epoch": 16.409691629955947, - "grad_norm": 0.04538935795426369, + "grad_norm": 0.030549725517630577, "learning_rate": 5.230769230769232e-06, - "loss": 0.0046, + "loss": 0.0045, "step": 14900 }, { "epoch": 16.437224669603523, - "grad_norm": 0.0366770476102829, + "grad_norm": 0.03368716686964035, "learning_rate": 5.205128205128206e-06, - "loss": 0.0044, + "loss": 0.0043, "step": 14925 }, { "epoch": 16.464757709251103, - "grad_norm": 0.04620107263326645, + "grad_norm": 0.35517045855522156, "learning_rate": 5.179487179487181e-06, - "loss": 0.0049, + "loss": 0.0065, "step": 14950 }, { "epoch": 16.49229074889868, - "grad_norm": 0.026445144787430763, + "grad_norm": 0.01714472658932209, "learning_rate": 5.1538461538461534e-06, - "loss": 0.0048, + "loss": 0.0063, "step": 14975 }, { "epoch": 16.519823788546255, - "grad_norm": 0.36674734950065613, + "grad_norm": 1.1677449941635132, "learning_rate": 5.128205128205128e-06, - "loss": 0.0044, + "loss": 0.0109, "step": 15000 }, { "epoch": 16.519823788546255, - "eval_cer": 57.25986725207204, - "eval_loss": 1.008760690689087, - "eval_runtime": 842.7849, - "eval_samples_per_second": 12.555, - "eval_steps_per_second": 3.14, - "eval_wer": 96.68081093823669, + "eval_cer": 24.621416207169304, + "eval_loss": 0.8773795962333679, + "eval_runtime": 1704.8877, + "eval_samples_per_second": 6.206, + "eval_steps_per_second": 1.552, + "eval_wer": 84.67703913248468, "step": 15000 }, { "epoch": 16.547356828193834, - "grad_norm": 0.10816098749637604, + "grad_norm": 0.10291285067796707, "learning_rate": 5.1025641025641024e-06, - "loss": 0.0046, + "loss": 0.0041, "step": 15025 }, { "epoch": 16.57488986784141, - "grad_norm": 0.047246623784303665, + "grad_norm": 0.1358381062746048, "learning_rate": 5.076923076923077e-06, - "loss": 0.0042, + "loss": 0.0066, "step": 15050 }, { "epoch": 16.602422907488986, - "grad_norm": 0.029092887416481972, + "grad_norm": 0.020193297415971756, "learning_rate": 5.051282051282051e-06, - "loss": 0.0047, + "loss": 0.0054, "step": 15075 }, { "epoch": 16.629955947136565, - "grad_norm": 0.041775159537792206, + "grad_norm": 0.03404547646641731, "learning_rate": 5.025641025641026e-06, - "loss": 0.0039, + "loss": 0.003, "step": 15100 }, { "epoch": 16.65748898678414, - "grad_norm": 0.04027463123202324, + "grad_norm": 0.07087010145187378, "learning_rate": 5e-06, - "loss": 0.0046, + "loss": 0.0043, "step": 15125 }, { "epoch": 16.685022026431717, - "grad_norm": 0.025997977703809738, + "grad_norm": 0.01731196418404579, "learning_rate": 4.974358974358975e-06, - "loss": 0.0039, + "loss": 0.0054, "step": 15150 }, { "epoch": 16.712555066079297, - "grad_norm": 0.038961417973041534, + "grad_norm": 0.3552390933036804, "learning_rate": 4.948717948717949e-06, - "loss": 0.0043, + "loss": 0.0056, "step": 15175 }, { "epoch": 16.740088105726873, - "grad_norm": 0.06082303076982498, + "grad_norm": 0.626363217830658, "learning_rate": 4.923076923076924e-06, - "loss": 0.0048, + "loss": 0.0079, "step": 15200 }, { "epoch": 16.76762114537445, - "grad_norm": 0.027074238285422325, + "grad_norm": 0.0181206613779068, "learning_rate": 4.8974358974358975e-06, - "loss": 0.0046, + "loss": 0.006, "step": 15225 }, { "epoch": 16.795154185022028, - "grad_norm": 0.03542567789554596, + "grad_norm": 0.09998787939548492, "learning_rate": 4.871794871794872e-06, - "loss": 0.0045, + "loss": 0.0055, "step": 15250 }, { "epoch": 16.822687224669604, - "grad_norm": 0.04304325580596924, + "grad_norm": 0.1320696324110031, "learning_rate": 4.8461538461538465e-06, - "loss": 0.0044, + "loss": 0.004, "step": 15275 }, { "epoch": 16.85022026431718, - "grad_norm": 0.08053645491600037, + "grad_norm": 0.09725293517112732, "learning_rate": 4.820512820512821e-06, - "loss": 0.0045, + "loss": 0.0047, "step": 15300 }, { "epoch": 16.87775330396476, - "grad_norm": 0.0946226641535759, + "grad_norm": 0.11701034754514694, "learning_rate": 4.7948717948717955e-06, - "loss": 0.0064, + "loss": 0.0038, "step": 15325 }, { "epoch": 16.905286343612335, - "grad_norm": 0.03868434950709343, + "grad_norm": 0.046660326421260834, "learning_rate": 4.76923076923077e-06, - "loss": 0.0042, + "loss": 0.0052, "step": 15350 }, { "epoch": 16.93281938325991, - "grad_norm": 0.07756806910037994, + "grad_norm": 0.03657762333750725, "learning_rate": 4.743589743589744e-06, - "loss": 0.0041, + "loss": 0.004, "step": 15375 }, { "epoch": 16.96035242290749, - "grad_norm": 0.03353738412261009, + "grad_norm": 0.13504423201084137, "learning_rate": 4.717948717948718e-06, - "loss": 0.0048, + "loss": 0.0038, "step": 15400 }, { "epoch": 16.987885462555067, - "grad_norm": 0.12302552163600922, + "grad_norm": 0.08526286482810974, "learning_rate": 4.692307692307693e-06, - "loss": 0.0048, + "loss": 0.0066, "step": 15425 }, { "epoch": 17.015418502202643, - "grad_norm": 0.040505509823560715, + "grad_norm": 0.02115774340927601, "learning_rate": 4.666666666666667e-06, - "loss": 0.004, + "loss": 0.0046, "step": 15450 }, { "epoch": 17.042951541850222, - "grad_norm": 0.0250072181224823, + "grad_norm": 0.2753286063671112, "learning_rate": 4.641025641025642e-06, - "loss": 0.0048, + "loss": 0.0045, "step": 15475 }, { "epoch": 17.070484581497798, - "grad_norm": 0.027064405381679535, + "grad_norm": 0.03063320554792881, "learning_rate": 4.615384615384616e-06, - "loss": 0.0029, + "loss": 0.0045, "step": 15500 }, { "epoch": 17.098017621145374, - "grad_norm": 0.021600468084216118, + "grad_norm": 0.010663969442248344, "learning_rate": 4.58974358974359e-06, - "loss": 0.0033, + "loss": 0.003, "step": 15525 }, { "epoch": 17.125550660792953, - "grad_norm": 0.024508068338036537, + "grad_norm": 0.22800272703170776, "learning_rate": 4.564102564102564e-06, - "loss": 0.0033, + "loss": 0.002, "step": 15550 }, { "epoch": 17.15308370044053, - "grad_norm": 0.05471380800008774, + "grad_norm": 0.22873614728450775, "learning_rate": 4.538461538461539e-06, - "loss": 0.0042, + "loss": 0.002, "step": 15575 }, { "epoch": 17.180616740088105, - "grad_norm": 0.019899729639291763, + "grad_norm": 0.011216685175895691, "learning_rate": 4.512820512820513e-06, - "loss": 0.003, + "loss": 0.0032, "step": 15600 }, { "epoch": 17.208149779735685, - "grad_norm": 0.03066374734044075, + "grad_norm": 0.4962778091430664, "learning_rate": 4.487179487179488e-06, "loss": 0.0038, "step": 15625 }, { "epoch": 17.23568281938326, - "grad_norm": 0.043680623173713684, + "grad_norm": 0.023572538048028946, "learning_rate": 4.461538461538462e-06, - "loss": 0.0041, + "loss": 0.0031, "step": 15650 }, { "epoch": 17.263215859030836, - "grad_norm": 0.03351521119475365, + "grad_norm": 0.032316386699676514, "learning_rate": 4.435897435897436e-06, - "loss": 0.0039, + "loss": 0.0024, "step": 15675 }, { "epoch": 17.290748898678412, - "grad_norm": 0.18343234062194824, + "grad_norm": 0.025533461943268776, "learning_rate": 4.4102564102564104e-06, - "loss": 0.0032, + "loss": 0.0024, "step": 15700 }, { "epoch": 17.318281938325992, - "grad_norm": 0.10387376695871353, + "grad_norm": 0.02447574771940708, "learning_rate": 4.384615384615385e-06, - "loss": 0.0033, + "loss": 0.0057, "step": 15725 }, { "epoch": 17.345814977973568, - "grad_norm": 0.02336566336452961, + "grad_norm": 0.021540969610214233, "learning_rate": 4.358974358974359e-06, - "loss": 0.0042, + "loss": 0.0035, "step": 15750 }, { "epoch": 17.373348017621144, - "grad_norm": 0.022074054926633835, + "grad_norm": 0.022210588678717613, "learning_rate": 4.333333333333334e-06, - "loss": 0.0032, + "loss": 0.0029, "step": 15775 }, { "epoch": 17.400881057268723, - "grad_norm": 0.035601481795310974, + "grad_norm": 0.01674061268568039, "learning_rate": 4.307692307692308e-06, - "loss": 0.003, + "loss": 0.0037, "step": 15800 }, { "epoch": 17.4284140969163, - "grad_norm": 0.028988130390644073, + "grad_norm": 0.013861955143511295, "learning_rate": 4.282051282051282e-06, - "loss": 0.0034, + "loss": 0.0023, "step": 15825 }, { "epoch": 17.455947136563875, - "grad_norm": 0.037720389664173126, + "grad_norm": 0.023190615698695183, "learning_rate": 4.2564102564102566e-06, - "loss": 0.0041, + "loss": 0.0045, "step": 15850 }, { "epoch": 17.483480176211454, - "grad_norm": 0.038582686334848404, + "grad_norm": 0.015583349391818047, "learning_rate": 4.230769230769231e-06, - "loss": 0.0042, + "loss": 0.0037, "step": 15875 }, { "epoch": 17.51101321585903, - "grad_norm": 0.032543014734983444, + "grad_norm": 0.013716256245970726, "learning_rate": 4.2051282051282055e-06, - "loss": 0.0046, + "loss": 0.0035, "step": 15900 }, { "epoch": 17.538546255506606, - "grad_norm": 0.033068541437387466, + "grad_norm": 0.016609976068139076, "learning_rate": 4.17948717948718e-06, - "loss": 0.0037, + "loss": 0.0041, "step": 15925 }, { "epoch": 17.566079295154186, - "grad_norm": 0.0271660964936018, + "grad_norm": 0.01952126808464527, "learning_rate": 4.1538461538461545e-06, - "loss": 0.0041, + "loss": 0.0037, "step": 15950 }, { "epoch": 17.59361233480176, - "grad_norm": 0.03645619750022888, + "grad_norm": 0.17321471869945526, "learning_rate": 4.128205128205128e-06, "loss": 0.0039, "step": 15975 }, { "epoch": 17.621145374449338, - "grad_norm": 0.033829644322395325, + "grad_norm": 0.01522456482052803, "learning_rate": 4.102564102564103e-06, - "loss": 0.0034, + "loss": 0.0032, "step": 16000 }, { "epoch": 17.621145374449338, - "eval_cer": 55.244001696605636, - "eval_loss": 1.0241984128952026, - "eval_runtime": 821.5561, - "eval_samples_per_second": 12.879, - "eval_steps_per_second": 3.221, - "eval_wer": 96.5959453088166, + "eval_cer": 23.317437208395905, + "eval_loss": 0.8810132741928101, + "eval_runtime": 1715.3061, + "eval_samples_per_second": 6.169, + "eval_steps_per_second": 1.543, + "eval_wer": 82.65912305516267, "step": 16000 }, { "epoch": 17.648678414096917, - "grad_norm": 0.03022758848965168, + "grad_norm": 0.30109259486198425, "learning_rate": 4.076923076923077e-06, - "loss": 0.0031, + "loss": 0.0026, "step": 16025 }, { "epoch": 17.676211453744493, - "grad_norm": 0.02969919890165329, + "grad_norm": 0.06414441019296646, "learning_rate": 4.051282051282052e-06, - "loss": 0.0036, + "loss": 0.0032, "step": 16050 }, { "epoch": 17.70374449339207, - "grad_norm": 0.04370042681694031, + "grad_norm": 0.12120723724365234, "learning_rate": 4.025641025641026e-06, - "loss": 0.0038, + "loss": 0.0026, "step": 16075 }, { "epoch": 17.73127753303965, - "grad_norm": 0.04208499938249588, + "grad_norm": 0.19122646749019623, "learning_rate": 4.000000000000001e-06, - "loss": 0.0034, + "loss": 0.0039, "step": 16100 }, { "epoch": 17.758810572687224, - "grad_norm": 0.07844261825084686, + "grad_norm": 0.03467703238129616, "learning_rate": 3.974358974358974e-06, - "loss": 0.0032, + "loss": 0.0027, "step": 16125 }, { "epoch": 17.7863436123348, - "grad_norm": 0.02524634823203087, + "grad_norm": 0.01050791796296835, "learning_rate": 3.948717948717949e-06, - "loss": 0.0037, + "loss": 0.0024, "step": 16150 }, { "epoch": 17.81387665198238, - "grad_norm": 0.027894780039787292, + "grad_norm": 0.01552590075880289, "learning_rate": 3.923076923076923e-06, - "loss": 0.0035, + "loss": 0.003, "step": 16175 }, { "epoch": 17.841409691629956, - "grad_norm": 0.11929473280906677, + "grad_norm": 0.021685760468244553, "learning_rate": 3.897435897435898e-06, - "loss": 0.0035, + "loss": 0.0028, "step": 16200 }, { "epoch": 17.86894273127753, - "grad_norm": 0.02329305373132229, + "grad_norm": 0.012615197338163853, "learning_rate": 3.871794871794872e-06, - "loss": 0.0033, + "loss": 0.0018, "step": 16225 }, { "epoch": 17.89647577092511, - "grad_norm": 0.03569497913122177, + "grad_norm": 0.024285893887281418, "learning_rate": 3.846153846153847e-06, - "loss": 0.0033, + "loss": 0.0034, "step": 16250 }, { "epoch": 17.924008810572687, - "grad_norm": 0.025960877537727356, + "grad_norm": 0.019548427313566208, "learning_rate": 3.8205128205128204e-06, - "loss": 0.0036, + "loss": 0.0046, "step": 16275 }, { "epoch": 17.951541850220263, - "grad_norm": 0.0266879815608263, + "grad_norm": 0.014185987412929535, "learning_rate": 3.794871794871795e-06, - "loss": 0.004, + "loss": 0.0042, "step": 16300 }, { "epoch": 17.979074889867842, - "grad_norm": 0.035963866859674454, + "grad_norm": 0.2013942152261734, "learning_rate": 3.7692307692307694e-06, - "loss": 0.0036, + "loss": 0.0025, "step": 16325 }, { "epoch": 18.006607929515418, - "grad_norm": 0.06047583743929863, + "grad_norm": 0.01142708957195282, "learning_rate": 3.743589743589744e-06, - "loss": 0.0032, + "loss": 0.0028, "step": 16350 }, { "epoch": 18.034140969162994, - "grad_norm": 0.024352600798010826, + "grad_norm": 0.1827182024717331, "learning_rate": 3.7179487179487184e-06, "loss": 0.0027, "step": 16375 }, { "epoch": 18.061674008810574, - "grad_norm": 0.02058643475174904, + "grad_norm": 0.008858841843903065, "learning_rate": 3.692307692307693e-06, - "loss": 0.0031, + "loss": 0.0022, "step": 16400 }, { "epoch": 18.08920704845815, - "grad_norm": 0.0343441516160965, + "grad_norm": 0.037348657846450806, "learning_rate": 3.6666666666666666e-06, - "loss": 0.0028, + "loss": 0.004, "step": 16425 }, { "epoch": 18.116740088105725, - "grad_norm": 0.029145579785108566, + "grad_norm": 0.014842098578810692, "learning_rate": 3.641025641025641e-06, - "loss": 0.0025, + "loss": 0.003, "step": 16450 }, { "epoch": 18.144273127753305, - "grad_norm": 0.02418331801891327, + "grad_norm": 0.012190734967589378, "learning_rate": 3.6153846153846156e-06, - "loss": 0.0027, + "loss": 0.0047, "step": 16475 }, { "epoch": 18.17180616740088, - "grad_norm": 0.017598390579223633, + "grad_norm": 0.010254699736833572, "learning_rate": 3.58974358974359e-06, - "loss": 0.0028, + "loss": 0.0018, "step": 16500 }, { "epoch": 18.199339207048457, - "grad_norm": 0.020629288628697395, + "grad_norm": 0.012803646735846996, "learning_rate": 3.5641025641025646e-06, - "loss": 0.0027, + "loss": 0.0018, "step": 16525 }, { "epoch": 18.226872246696036, - "grad_norm": 0.020629016682505608, + "grad_norm": 0.010007087141275406, "learning_rate": 3.538461538461539e-06, - "loss": 0.0029, + "loss": 0.0037, "step": 16550 }, { "epoch": 18.254405286343612, - "grad_norm": 0.034342389553785324, + "grad_norm": 0.010007468052208424, "learning_rate": 3.5128205128205127e-06, - "loss": 0.0028, + "loss": 0.0017, "step": 16575 }, { "epoch": 18.281938325991188, - "grad_norm": 0.19688080251216888, + "grad_norm": 0.021304214373230934, "learning_rate": 3.487179487179487e-06, - "loss": 0.0029, + "loss": 0.0017, "step": 16600 }, { "epoch": 18.309471365638768, - "grad_norm": 0.020140135660767555, + "grad_norm": 0.00610103365033865, "learning_rate": 3.4615384615384617e-06, - "loss": 0.0031, + "loss": 0.0017, "step": 16625 }, { "epoch": 18.337004405286343, - "grad_norm": 0.2628467082977295, + "grad_norm": 0.17184419929981232, "learning_rate": 3.435897435897436e-06, - "loss": 0.0037, + "loss": 0.0023, "step": 16650 }, { "epoch": 18.36453744493392, - "grad_norm": 0.08471482247114182, + "grad_norm": 0.010224095545709133, "learning_rate": 3.4102564102564107e-06, - "loss": 0.0035, + "loss": 0.0028, "step": 16675 }, { "epoch": 18.3920704845815, - "grad_norm": 0.05477755516767502, + "grad_norm": 0.016741087660193443, "learning_rate": 3.384615384615385e-06, - "loss": 0.0031, + "loss": 0.002, "step": 16700 }, { "epoch": 18.419603524229075, - "grad_norm": 0.020290255546569824, + "grad_norm": 0.01324927993118763, "learning_rate": 3.358974358974359e-06, - "loss": 0.0027, + "loss": 0.0017, "step": 16725 }, { "epoch": 18.44713656387665, - "grad_norm": 0.022715341299772263, + "grad_norm": 0.14577801525592804, "learning_rate": 3.3333333333333333e-06, - "loss": 0.003, + "loss": 0.0025, "step": 16750 }, { "epoch": 18.47466960352423, - "grad_norm": 0.04665736109018326, + "grad_norm": 0.0260769035667181, "learning_rate": 3.307692307692308e-06, - "loss": 0.0034, + "loss": 0.0018, "step": 16775 }, { "epoch": 18.502202643171806, - "grad_norm": 0.024362141266465187, + "grad_norm": 0.01632179506123066, "learning_rate": 3.2820512820512823e-06, - "loss": 0.0034, + "loss": 0.0041, "step": 16800 }, { "epoch": 18.529735682819382, - "grad_norm": 0.027004770934581757, + "grad_norm": 0.014896622858941555, "learning_rate": 3.256410256410257e-06, - "loss": 0.0031, + "loss": 0.0018, "step": 16825 }, { "epoch": 18.55726872246696, - "grad_norm": 0.02136850170791149, + "grad_norm": 0.014535325579345226, "learning_rate": 3.2307692307692313e-06, - "loss": 0.0034, + "loss": 0.0022, "step": 16850 }, { "epoch": 18.584801762114537, - "grad_norm": 0.024859808385372162, + "grad_norm": 0.011787498369812965, "learning_rate": 3.205128205128206e-06, - "loss": 0.003, + "loss": 0.0016, "step": 16875 }, { "epoch": 18.612334801762113, - "grad_norm": 0.020625699311494827, + "grad_norm": 0.04083514213562012, "learning_rate": 3.1794871794871795e-06, - "loss": 0.0028, + "loss": 0.0017, "step": 16900 }, { "epoch": 18.639867841409693, - "grad_norm": 0.0206185020506382, + "grad_norm": 0.16764149069786072, "learning_rate": 3.153846153846154e-06, - "loss": 0.0031, + "loss": 0.0024, "step": 16925 }, { "epoch": 18.66740088105727, - "grad_norm": 0.022832127287983894, + "grad_norm": 0.008704649284482002, "learning_rate": 3.1282051282051284e-06, - "loss": 0.003, + "loss": 0.0015, "step": 16950 }, { "epoch": 18.694933920704845, - "grad_norm": 0.022303447127342224, + "grad_norm": 0.007399390451610088, "learning_rate": 3.102564102564103e-06, - "loss": 0.0028, + "loss": 0.0014, "step": 16975 }, { "epoch": 18.722466960352424, - "grad_norm": 0.023096712306141853, + "grad_norm": 0.016065089032053947, "learning_rate": 3.0769230769230774e-06, - "loss": 0.0029, + "loss": 0.0017, "step": 17000 }, { "epoch": 18.722466960352424, - "eval_cer": 55.69451927596207, - "eval_loss": 1.0366687774658203, - "eval_runtime": 832.9104, - "eval_samples_per_second": 12.704, - "eval_steps_per_second": 3.177, - "eval_wer": 96.5865157944366, + "eval_cer": 22.853163367074387, + "eval_loss": 0.8870487809181213, + "eval_runtime": 1702.6926, + "eval_samples_per_second": 6.214, + "eval_steps_per_second": 1.554, + "eval_wer": 82.998585572843, "step": 17000 }, { "epoch": 18.75, - "grad_norm": 0.03265475109219551, + "grad_norm": 0.04777693375945091, "learning_rate": 3.051282051282052e-06, - "loss": 0.003, + "loss": 0.0023, "step": 17025 }, { "epoch": 18.777533039647576, - "grad_norm": 0.02906380034983158, + "grad_norm": 0.012851621024310589, "learning_rate": 3.0256410256410256e-06, - "loss": 0.0027, + "loss": 0.0016, "step": 17050 }, { "epoch": 18.805066079295155, - "grad_norm": 0.02245141565799713, + "grad_norm": 0.07990699261426926, "learning_rate": 3e-06, - "loss": 0.0029, + "loss": 0.0016, "step": 17075 }, { "epoch": 18.83259911894273, - "grad_norm": 0.028345687314867973, + "grad_norm": 0.011805381625890732, "learning_rate": 2.9743589743589746e-06, - "loss": 0.0029, + "loss": 0.0027, "step": 17100 }, { "epoch": 18.860132158590307, - "grad_norm": 0.0485895536839962, + "grad_norm": 0.14670372009277344, "learning_rate": 2.948717948717949e-06, - "loss": 0.0039, + "loss": 0.0026, "step": 17125 }, { "epoch": 18.887665198237887, - "grad_norm": 0.028899550437927246, + "grad_norm": 0.023519041016697884, "learning_rate": 2.9230769230769236e-06, - "loss": 0.0032, + "loss": 0.0028, "step": 17150 }, { "epoch": 18.915198237885463, - "grad_norm": 0.021577881649136543, + "grad_norm": 0.021847659721970558, "learning_rate": 2.897435897435898e-06, - "loss": 0.0033, + "loss": 0.0015, "step": 17175 }, { "epoch": 18.94273127753304, - "grad_norm": 0.03163857385516167, + "grad_norm": 0.013796437531709671, "learning_rate": 2.8717948717948717e-06, - "loss": 0.0029, + "loss": 0.0023, "step": 17200 }, { "epoch": 18.970264317180618, - "grad_norm": 0.026857255026698112, + "grad_norm": 0.1518554836511612, "learning_rate": 2.846153846153846e-06, - "loss": 0.0029, + "loss": 0.0016, "step": 17225 }, { "epoch": 18.997797356828194, - "grad_norm": 0.027735862880945206, + "grad_norm": 0.012883415445685387, "learning_rate": 2.8205128205128207e-06, - "loss": 0.0029, + "loss": 0.0019, "step": 17250 }, { "epoch": 19.02533039647577, - "grad_norm": 0.017564741894602776, + "grad_norm": 0.01099941972643137, "learning_rate": 2.794871794871795e-06, - "loss": 0.0025, + "loss": 0.0022, "step": 17275 }, { "epoch": 19.05286343612335, - "grad_norm": 0.018339525908231735, + "grad_norm": 0.006992665119469166, "learning_rate": 2.7692307692307697e-06, - "loss": 0.0023, + "loss": 0.0011, "step": 17300 }, { "epoch": 19.080396475770925, - "grad_norm": 0.04713983088731766, + "grad_norm": 0.012264972552657127, "learning_rate": 2.743589743589744e-06, - "loss": 0.0026, + "loss": 0.0014, "step": 17325 }, { "epoch": 19.1079295154185, - "grad_norm": 0.1079772338271141, + "grad_norm": 0.04312492161989212, "learning_rate": 2.717948717948718e-06, - "loss": 0.0025, + "loss": 0.0012, "step": 17350 }, { "epoch": 19.13546255506608, - "grad_norm": 0.017452696338295937, + "grad_norm": 0.008214226923882961, "learning_rate": 2.6923076923076923e-06, - "loss": 0.0022, + "loss": 0.0011, "step": 17375 }, { "epoch": 19.162995594713657, - "grad_norm": 0.017971495166420937, + "grad_norm": 0.009182457812130451, "learning_rate": 2.666666666666667e-06, - "loss": 0.0028, + "loss": 0.0011, "step": 17400 }, { "epoch": 19.190528634361232, - "grad_norm": 0.022472327575087547, + "grad_norm": 0.009743117727339268, "learning_rate": 2.6410256410256413e-06, - "loss": 0.0023, + "loss": 0.001, "step": 17425 }, { "epoch": 19.218061674008812, - "grad_norm": 0.024212457239627838, + "grad_norm": 0.011959163472056389, "learning_rate": 2.615384615384616e-06, - "loss": 0.0023, + "loss": 0.001, "step": 17450 }, { "epoch": 19.245594713656388, - "grad_norm": 0.018772531300783157, + "grad_norm": 0.033681828528642654, "learning_rate": 2.5897435897435903e-06, - "loss": 0.0024, + "loss": 0.0019, "step": 17475 }, { "epoch": 19.273127753303964, - "grad_norm": 0.01784471981227398, + "grad_norm": 0.012354315258562565, "learning_rate": 2.564102564102564e-06, - "loss": 0.0022, + "loss": 0.0028, "step": 17500 }, { "epoch": 19.300660792951543, - "grad_norm": 0.020429756492376328, + "grad_norm": 0.01059970073401928, "learning_rate": 2.5384615384615385e-06, - "loss": 0.0026, + "loss": 0.0018, "step": 17525 }, { "epoch": 19.32819383259912, - "grad_norm": 0.022216424345970154, + "grad_norm": 0.007629127707332373, "learning_rate": 2.512820512820513e-06, - "loss": 0.0023, + "loss": 0.001, "step": 17550 }, { "epoch": 19.355726872246695, - "grad_norm": 0.017014402896165848, + "grad_norm": 0.0125362453982234, "learning_rate": 2.4871794871794875e-06, - "loss": 0.0022, + "loss": 0.001, "step": 17575 }, { "epoch": 19.383259911894275, - "grad_norm": 0.02979693002998829, + "grad_norm": 0.01261002104729414, "learning_rate": 2.461538461538462e-06, - "loss": 0.0024, + "loss": 0.0014, "step": 17600 }, { "epoch": 19.41079295154185, - "grad_norm": 0.07756248861551285, + "grad_norm": 0.010447504930198193, "learning_rate": 2.435897435897436e-06, - "loss": 0.0038, + "loss": 0.0021, "step": 17625 }, { "epoch": 19.438325991189426, - "grad_norm": 0.027045181021094322, + "grad_norm": 0.009724145755171776, "learning_rate": 2.4102564102564105e-06, - "loss": 0.003, + "loss": 0.0021, "step": 17650 }, { "epoch": 19.465859030837006, - "grad_norm": 0.02446981891989708, + "grad_norm": 0.008591737598180771, "learning_rate": 2.384615384615385e-06, - "loss": 0.0034, + "loss": 0.0013, "step": 17675 }, { "epoch": 19.493392070484582, - "grad_norm": 0.01992960087954998, + "grad_norm": 0.008385499939322472, "learning_rate": 2.358974358974359e-06, - "loss": 0.0025, + "loss": 0.0017, "step": 17700 }, { "epoch": 19.520925110132158, - "grad_norm": 0.030592037364840508, + "grad_norm": 0.04597390815615654, "learning_rate": 2.3333333333333336e-06, - "loss": 0.0035, + "loss": 0.0013, "step": 17725 }, { "epoch": 19.548458149779737, - "grad_norm": 0.018608825281262398, + "grad_norm": 0.00930617842823267, "learning_rate": 2.307692307692308e-06, - "loss": 0.0028, + "loss": 0.0016, "step": 17750 }, { "epoch": 19.575991189427313, - "grad_norm": 0.021949810907244682, + "grad_norm": 0.009862055070698261, "learning_rate": 2.282051282051282e-06, - "loss": 0.0026, + "loss": 0.0014, "step": 17775 }, { "epoch": 19.60352422907489, - "grad_norm": 0.029381688684225082, + "grad_norm": 0.01388918049633503, "learning_rate": 2.2564102564102566e-06, - "loss": 0.0023, + "loss": 0.0011, "step": 17800 }, { "epoch": 19.63105726872247, - "grad_norm": 0.023357443511486053, + "grad_norm": 0.010380508378148079, "learning_rate": 2.230769230769231e-06, - "loss": 0.0027, + "loss": 0.0022, "step": 17825 }, { "epoch": 19.658590308370044, - "grad_norm": 0.014633470214903355, + "grad_norm": 0.003493061987683177, "learning_rate": 2.2051282051282052e-06, - "loss": 0.0022, + "loss": 0.001, "step": 17850 }, { "epoch": 19.68612334801762, - "grad_norm": 0.018193107098340988, + "grad_norm": 0.00607143621891737, "learning_rate": 2.1794871794871797e-06, - "loss": 0.0025, + "loss": 0.0016, "step": 17875 }, { "epoch": 19.7136563876652, - "grad_norm": 0.0176758524030447, + "grad_norm": 0.007698683068156242, "learning_rate": 2.153846153846154e-06, - "loss": 0.0022, + "loss": 0.0029, "step": 17900 }, { "epoch": 19.741189427312776, - "grad_norm": 0.021504636853933334, + "grad_norm": 0.007107453886419535, "learning_rate": 2.1282051282051283e-06, - "loss": 0.0023, + "loss": 0.0018, "step": 17925 }, { "epoch": 19.76872246696035, - "grad_norm": 0.09170462936162949, + "grad_norm": 0.0059033227153122425, "learning_rate": 2.1025641025641028e-06, - "loss": 0.0026, + "loss": 0.001, "step": 17950 }, { "epoch": 19.79625550660793, - "grad_norm": 0.01736604981124401, + "grad_norm": 0.005275961942970753, "learning_rate": 2.0769230769230773e-06, - "loss": 0.0023, + "loss": 0.0026, "step": 17975 }, { "epoch": 19.823788546255507, - "grad_norm": 0.03202914819121361, + "grad_norm": 0.016638007014989853, "learning_rate": 2.0512820512820513e-06, - "loss": 0.0022, + "loss": 0.0019, "step": 18000 }, { "epoch": 19.823788546255507, - "eval_cer": 55.551798058074354, - "eval_loss": 1.0446730852127075, - "eval_runtime": 801.1525, - "eval_samples_per_second": 13.207, - "eval_steps_per_second": 3.303, - "eval_wer": 96.61480433757662, + "eval_cer": 22.66344158747263, + "eval_loss": 0.8900153040885925, + "eval_runtime": 1717.0751, + "eval_samples_per_second": 6.162, + "eval_steps_per_second": 1.541, + "eval_wer": 82.50825082508251, "step": 18000 }, { "epoch": 19.851321585903083, - "grad_norm": 0.0205672699958086, + "grad_norm": 0.0051730177365243435, "learning_rate": 2.025641025641026e-06, - "loss": 0.0034, + "loss": 0.0013, "step": 18025 }, { "epoch": 19.878854625550662, - "grad_norm": 0.017052460461854935, + "grad_norm": 0.00516405189409852, "learning_rate": 2.0000000000000003e-06, - "loss": 0.0024, + "loss": 0.0018, "step": 18050 }, { "epoch": 19.90638766519824, - "grad_norm": 0.023273777216672897, + "grad_norm": 0.006816135719418526, "learning_rate": 1.9743589743589744e-06, - "loss": 0.0024, + "loss": 0.001, "step": 18075 }, { "epoch": 19.933920704845814, - "grad_norm": 0.01854720339179039, + "grad_norm": 0.005780714098364115, "learning_rate": 1.948717948717949e-06, - "loss": 0.0029, + "loss": 0.0009, "step": 18100 }, { "epoch": 19.961453744493394, - "grad_norm": 0.023288726806640625, + "grad_norm": 0.007895824499428272, "learning_rate": 1.9230769230769234e-06, - "loss": 0.0025, + "loss": 0.0011, "step": 18125 }, { "epoch": 19.98898678414097, - "grad_norm": 0.019170600920915604, + "grad_norm": 0.00839215237647295, "learning_rate": 1.8974358974358975e-06, - "loss": 0.0024, + "loss": 0.0011, "step": 18150 }, { "epoch": 20.016519823788546, - "grad_norm": 0.013864605687558651, + "grad_norm": 0.0035141175612807274, "learning_rate": 1.871794871794872e-06, - "loss": 0.0021, + "loss": 0.0011, "step": 18175 }, { "epoch": 20.044052863436125, - "grad_norm": 0.015261122956871986, + "grad_norm": 0.008937545120716095, "learning_rate": 1.8461538461538465e-06, - "loss": 0.002, + "loss": 0.0009, "step": 18200 }, { "epoch": 20.0715859030837, - "grad_norm": 0.015079254284501076, + "grad_norm": 0.0037842292804270983, "learning_rate": 1.8205128205128205e-06, - "loss": 0.0024, + "loss": 0.0011, "step": 18225 }, { "epoch": 20.099118942731277, - "grad_norm": 0.013841504231095314, + "grad_norm": 0.003870155429467559, "learning_rate": 1.794871794871795e-06, - "loss": 0.003, + "loss": 0.0009, "step": 18250 }, { "epoch": 20.126651982378856, - "grad_norm": 0.017009438946843147, + "grad_norm": 0.003817240707576275, "learning_rate": 1.7692307692307695e-06, - "loss": 0.002, + "loss": 0.0009, "step": 18275 }, { "epoch": 20.154185022026432, - "grad_norm": 0.01796025224030018, + "grad_norm": 0.007133571431040764, "learning_rate": 1.7435897435897436e-06, - "loss": 0.0019, + "loss": 0.0008, "step": 18300 }, { "epoch": 20.181718061674008, - "grad_norm": 0.020462974905967712, + "grad_norm": 0.011461510322988033, "learning_rate": 1.717948717948718e-06, - "loss": 0.002, + "loss": 0.0007, "step": 18325 }, { "epoch": 20.209251101321588, - "grad_norm": 0.0168469101190567, + "grad_norm": 0.003969813231378794, "learning_rate": 1.6923076923076926e-06, - "loss": 0.002, + "loss": 0.001, "step": 18350 }, { "epoch": 20.236784140969164, - "grad_norm": 0.015358548611402512, + "grad_norm": 0.007272036280483007, "learning_rate": 1.6666666666666667e-06, - "loss": 0.0019, + "loss": 0.001, "step": 18375 }, { "epoch": 20.26431718061674, - "grad_norm": 0.01623690128326416, + "grad_norm": 0.006936676800251007, "learning_rate": 1.6410256410256412e-06, - "loss": 0.0019, + "loss": 0.0009, "step": 18400 }, { "epoch": 20.291850220264315, - "grad_norm": 0.016147859394550323, + "grad_norm": 0.005403169430792332, "learning_rate": 1.6153846153846157e-06, - "loss": 0.002, + "loss": 0.0007, "step": 18425 }, { "epoch": 20.319383259911895, - "grad_norm": 0.023021413013339043, + "grad_norm": 0.009516764432191849, "learning_rate": 1.5897435897435897e-06, - "loss": 0.0023, + "loss": 0.0029, "step": 18450 }, { "epoch": 20.34691629955947, - "grad_norm": 0.0137328477576375, + "grad_norm": 0.003727905685082078, "learning_rate": 1.5641025641025642e-06, - "loss": 0.0019, + "loss": 0.0008, "step": 18475 }, { "epoch": 20.374449339207047, - "grad_norm": 0.01765141263604164, + "grad_norm": 0.006022660061717033, "learning_rate": 1.5384615384615387e-06, - "loss": 0.0022, + "loss": 0.002, "step": 18500 }, { "epoch": 20.401982378854626, - "grad_norm": 0.015655307099223137, + "grad_norm": 0.004205208737403154, "learning_rate": 1.5128205128205128e-06, - "loss": 0.0038, + "loss": 0.001, "step": 18525 }, { "epoch": 20.429515418502202, - "grad_norm": 0.021192258223891258, + "grad_norm": 0.10070935636758804, "learning_rate": 1.4871794871794873e-06, - "loss": 0.0021, + "loss": 0.0009, "step": 18550 }, { "epoch": 20.457048458149778, - "grad_norm": 0.014702214859426022, + "grad_norm": 0.004871605895459652, "learning_rate": 1.4615384615384618e-06, - "loss": 0.0019, + "loss": 0.0009, "step": 18575 }, { "epoch": 20.484581497797357, - "grad_norm": 0.018568340688943863, + "grad_norm": 0.005528348032385111, "learning_rate": 1.4358974358974359e-06, - "loss": 0.0018, + "loss": 0.0008, "step": 18600 }, { "epoch": 20.512114537444933, - "grad_norm": 0.020032202824950218, + "grad_norm": 0.007922505959868431, "learning_rate": 1.4102564102564104e-06, - "loss": 0.002, + "loss": 0.0007, "step": 18625 }, { "epoch": 20.53964757709251, - "grad_norm": 0.01590747945010662, + "grad_norm": 0.004503941163420677, "learning_rate": 1.3846153846153848e-06, - "loss": 0.002, + "loss": 0.001, "step": 18650 }, { "epoch": 20.56718061674009, - "grad_norm": 0.014293953776359558, + "grad_norm": 0.04012945666909218, "learning_rate": 1.358974358974359e-06, - "loss": 0.002, + "loss": 0.0011, "step": 18675 }, { "epoch": 20.594713656387665, - "grad_norm": 0.0199781134724617, + "grad_norm": 0.011533623561263084, "learning_rate": 1.3333333333333334e-06, - "loss": 0.0019, + "loss": 0.0011, "step": 18700 }, { "epoch": 20.62224669603524, - "grad_norm": 0.018757140263915062, + "grad_norm": 0.008248466067016125, "learning_rate": 1.307692307692308e-06, - "loss": 0.0022, + "loss": 0.0009, "step": 18725 }, { "epoch": 20.64977973568282, - "grad_norm": 0.021107446402311325, + "grad_norm": 0.004799861926585436, "learning_rate": 1.282051282051282e-06, - "loss": 0.0029, + "loss": 0.0007, "step": 18750 }, { "epoch": 20.677312775330396, - "grad_norm": 0.018470246344804764, + "grad_norm": 0.006359547842293978, "learning_rate": 1.2564102564102565e-06, - "loss": 0.0021, + "loss": 0.0007, "step": 18775 }, { "epoch": 20.704845814977972, - "grad_norm": 0.01821320876479149, + "grad_norm": 0.006216075737029314, "learning_rate": 1.230769230769231e-06, - "loss": 0.0022, + "loss": 0.001, "step": 18800 }, { "epoch": 20.73237885462555, - "grad_norm": 0.15323257446289062, + "grad_norm": 0.08518233150243759, "learning_rate": 1.2051282051282053e-06, - "loss": 0.0024, + "loss": 0.0012, "step": 18825 }, { "epoch": 20.759911894273127, - "grad_norm": 0.015295284800231457, + "grad_norm": 0.004133372101932764, "learning_rate": 1.1794871794871795e-06, - "loss": 0.002, + "loss": 0.001, "step": 18850 }, { "epoch": 20.787444933920703, - "grad_norm": 0.015194980427622795, + "grad_norm": 0.006971430499106646, "learning_rate": 1.153846153846154e-06, - "loss": 0.0018, + "loss": 0.0014, "step": 18875 }, { "epoch": 20.814977973568283, - "grad_norm": 0.05270170047879219, + "grad_norm": 0.005109596531838179, "learning_rate": 1.1282051282051283e-06, - "loss": 0.0024, + "loss": 0.0011, "step": 18900 }, { "epoch": 20.84251101321586, - "grad_norm": 0.01960138976573944, + "grad_norm": 0.038249921053647995, "learning_rate": 1.1025641025641026e-06, - "loss": 0.0021, + "loss": 0.0012, "step": 18925 }, { "epoch": 20.870044052863435, - "grad_norm": 0.02073553018271923, + "grad_norm": 0.008875112980604172, "learning_rate": 1.076923076923077e-06, - "loss": 0.0019, + "loss": 0.0007, "step": 18950 }, { "epoch": 20.897577092511014, - "grad_norm": 0.01615351065993309, + "grad_norm": 0.0044938609935343266, "learning_rate": 1.0512820512820514e-06, - "loss": 0.002, + "loss": 0.0011, "step": 18975 }, { "epoch": 20.92511013215859, - "grad_norm": 0.021563587710261345, + "grad_norm": 0.07247400283813477, "learning_rate": 1.0256410256410257e-06, - "loss": 0.0021, + "loss": 0.0008, "step": 19000 }, { "epoch": 20.92511013215859, - "eval_cer": 55.589054600896446, - "eval_loss": 1.0507194995880127, - "eval_runtime": 844.8487, - "eval_samples_per_second": 12.524, - "eval_steps_per_second": 3.132, - "eval_wer": 96.57708628005658, + "eval_cer": 22.58778214666468, + "eval_loss": 0.892371654510498, + "eval_runtime": 1719.93, + "eval_samples_per_second": 6.152, + "eval_steps_per_second": 1.538, + "eval_wer": 82.47996228194248, "step": 19000 }, { "epoch": 20.952643171806166, - "grad_norm": 0.016109561547636986, + "grad_norm": 0.006040550768375397, "learning_rate": 1.0000000000000002e-06, - "loss": 0.002, + "loss": 0.0007, "step": 19025 }, { "epoch": 20.980176211453745, - "grad_norm": 0.016952887177467346, + "grad_norm": 0.00338306394405663, "learning_rate": 9.743589743589745e-07, - "loss": 0.002, + "loss": 0.001, "step": 19050 }, { "epoch": 21.00770925110132, - "grad_norm": 0.01466713659465313, + "grad_norm": 0.007667516358196735, "learning_rate": 9.487179487179487e-07, - "loss": 0.002, + "loss": 0.0012, "step": 19075 }, { "epoch": 21.035242290748897, - "grad_norm": 0.01427449006587267, + "grad_norm": 0.0036987056955695152, "learning_rate": 9.230769230769232e-07, - "loss": 0.002, + "loss": 0.0006, "step": 19100 }, { "epoch": 21.062775330396477, - "grad_norm": 0.016093429177999496, + "grad_norm": 0.0036683231592178345, "learning_rate": 8.974358974358975e-07, - "loss": 0.0018, + "loss": 0.0011, "step": 19125 }, { "epoch": 21.090308370044053, - "grad_norm": 0.019426781684160233, + "grad_norm": 0.007168483920395374, "learning_rate": 8.717948717948718e-07, - "loss": 0.0018, + "loss": 0.0009, "step": 19150 }, { "epoch": 21.11784140969163, - "grad_norm": 0.0124832633882761, + "grad_norm": 0.0029900213703513145, "learning_rate": 8.461538461538463e-07, "loss": 0.0017, "step": 19175 }, { "epoch": 21.145374449339208, - "grad_norm": 0.01551234070211649, + "grad_norm": 0.00418079923838377, "learning_rate": 8.205128205128206e-07, - "loss": 0.0018, + "loss": 0.0009, "step": 19200 }, { "epoch": 21.172907488986784, - "grad_norm": 0.01290995441377163, + "grad_norm": 0.003424633527174592, "learning_rate": 7.948717948717949e-07, - "loss": 0.0019, + "loss": 0.0007, "step": 19225 }, { "epoch": 21.20044052863436, - "grad_norm": 0.012107312679290771, + "grad_norm": 0.0028422100003808737, "learning_rate": 7.692307692307694e-07, - "loss": 0.0018, + "loss": 0.0006, "step": 19250 }, { "epoch": 21.22797356828194, - "grad_norm": 0.013243271969258785, + "grad_norm": 0.004691548179835081, "learning_rate": 7.435897435897436e-07, - "loss": 0.0018, + "loss": 0.0006, "step": 19275 }, { "epoch": 21.255506607929515, - "grad_norm": 0.01567436195909977, + "grad_norm": 0.004589064046740532, "learning_rate": 7.179487179487179e-07, - "loss": 0.0017, + "loss": 0.0005, "step": 19300 }, { "epoch": 21.28303964757709, - "grad_norm": 0.017800329253077507, + "grad_norm": 0.005557245574891567, "learning_rate": 6.923076923076924e-07, - "loss": 0.0017, + "loss": 0.0011, "step": 19325 }, { "epoch": 21.31057268722467, - "grad_norm": 0.012769469991326332, + "grad_norm": 0.0031431138049811125, "learning_rate": 6.666666666666667e-07, - "loss": 0.0018, + "loss": 0.0006, "step": 19350 }, { "epoch": 21.338105726872246, - "grad_norm": 0.013936811126768589, + "grad_norm": 0.004688850603997707, "learning_rate": 6.41025641025641e-07, - "loss": 0.0018, + "loss": 0.0007, "step": 19375 }, { "epoch": 21.365638766519822, - "grad_norm": 0.017832236364483833, + "grad_norm": 0.007398667279630899, "learning_rate": 6.153846153846155e-07, - "loss": 0.0018, + "loss": 0.0006, "step": 19400 }, { "epoch": 21.393171806167402, - "grad_norm": 0.016330501064658165, + "grad_norm": 0.005217025522142649, "learning_rate": 5.897435897435898e-07, - "loss": 0.0019, + "loss": 0.0008, "step": 19425 }, { "epoch": 21.420704845814978, - "grad_norm": 0.012162838131189346, + "grad_norm": 0.004331599920988083, "learning_rate": 5.641025641025642e-07, - "loss": 0.0018, + "loss": 0.0006, "step": 19450 }, { "epoch": 21.448237885462554, - "grad_norm": 0.01499269250780344, + "grad_norm": 0.004927519708871841, "learning_rate": 5.384615384615386e-07, - "loss": 0.0019, + "loss": 0.0009, "step": 19475 }, { "epoch": 21.475770925110133, - "grad_norm": 0.013169058598577976, + "grad_norm": 0.0034796635154634714, "learning_rate": 5.128205128205128e-07, - "loss": 0.0019, + "loss": 0.001, "step": 19500 }, { "epoch": 21.50330396475771, - "grad_norm": 0.011718913912773132, + "grad_norm": 0.00347193144261837, "learning_rate": 4.871794871794872e-07, - "loss": 0.0018, + "loss": 0.0006, "step": 19525 }, { "epoch": 21.530837004405285, - "grad_norm": 0.01436688657850027, + "grad_norm": 0.0074023474007844925, "learning_rate": 4.615384615384616e-07, - "loss": 0.0019, + "loss": 0.0006, "step": 19550 }, { "epoch": 21.558370044052865, - "grad_norm": 0.012899577617645264, + "grad_norm": 0.0036716184113174677, "learning_rate": 4.358974358974359e-07, - "loss": 0.0016, + "loss": 0.0006, "step": 19575 }, { "epoch": 21.58590308370044, - "grad_norm": 0.018741106614470482, + "grad_norm": 0.006558453664183617, "learning_rate": 4.102564102564103e-07, - "loss": 0.0018, + "loss": 0.0007, "step": 19600 }, { "epoch": 21.613436123348016, - "grad_norm": 0.011879649944603443, + "grad_norm": 0.0030144904740154743, "learning_rate": 3.846153846153847e-07, - "loss": 0.0018, + "loss": 0.0007, "step": 19625 }, { "epoch": 21.640969162995596, - "grad_norm": 0.01298064086586237, + "grad_norm": 0.0037687935400754213, "learning_rate": 3.5897435897435896e-07, - "loss": 0.0018, + "loss": 0.0007, "step": 19650 }, { "epoch": 21.66850220264317, - "grad_norm": 0.0132521390914917, + "grad_norm": 0.0722261294722557, "learning_rate": 3.3333333333333335e-07, - "loss": 0.0017, + "loss": 0.0007, "step": 19675 }, { "epoch": 21.696035242290748, - "grad_norm": 0.012232212349772453, + "grad_norm": 0.0034861781168729067, "learning_rate": 3.0769230769230774e-07, - "loss": 0.0022, + "loss": 0.0007, "step": 19700 }, { "epoch": 21.723568281938327, - "grad_norm": 0.0125159602612257, + "grad_norm": 0.004740406293421984, "learning_rate": 2.820512820512821e-07, - "loss": 0.0021, + "loss": 0.0009, "step": 19725 }, { "epoch": 21.751101321585903, - "grad_norm": 0.012911227531731129, + "grad_norm": 0.0040426794439554214, "learning_rate": 2.564102564102564e-07, - "loss": 0.0018, + "loss": 0.0007, "step": 19750 }, { "epoch": 21.77863436123348, - "grad_norm": 0.016304660588502884, + "grad_norm": 0.005103557836264372, "learning_rate": 2.307692307692308e-07, - "loss": 0.0018, + "loss": 0.0006, "step": 19775 }, { "epoch": 21.80616740088106, - "grad_norm": 0.0178163331001997, + "grad_norm": 0.007594733498990536, "learning_rate": 2.0512820512820514e-07, - "loss": 0.0018, + "loss": 0.0006, "step": 19800 }, { "epoch": 21.833700440528634, - "grad_norm": 0.013485315255820751, + "grad_norm": 0.004270041361451149, "learning_rate": 1.7948717948717948e-07, - "loss": 0.0017, + "loss": 0.0007, "step": 19825 }, { "epoch": 21.86123348017621, - "grad_norm": 0.021611526608467102, + "grad_norm": 0.00658000260591507, "learning_rate": 1.5384615384615387e-07, - "loss": 0.0018, + "loss": 0.0006, "step": 19850 }, { "epoch": 21.88876651982379, - "grad_norm": 0.014628293924033642, + "grad_norm": 0.004829788114875555, "learning_rate": 1.282051282051282e-07, - "loss": 0.0017, + "loss": 0.0005, "step": 19875 }, { "epoch": 21.916299559471366, - "grad_norm": 0.013321286998689175, + "grad_norm": 0.004017261788249016, "learning_rate": 1.0256410256410257e-07, - "loss": 0.0017, + "loss": 0.0006, "step": 19900 }, { "epoch": 21.94383259911894, - "grad_norm": 0.016186168417334557, + "grad_norm": 0.005543394014239311, "learning_rate": 7.692307692307694e-08, - "loss": 0.0018, + "loss": 0.0009, "step": 19925 }, { "epoch": 21.97136563876652, - "grad_norm": 0.015817852690815926, + "grad_norm": 0.006894242484122515, "learning_rate": 5.1282051282051286e-08, - "loss": 0.0017, + "loss": 0.0006, "step": 19950 }, { "epoch": 21.998898678414097, - "grad_norm": 0.01383238285779953, + "grad_norm": 0.004000292159616947, "learning_rate": 2.5641025641025643e-08, - "loss": 0.0018, + "loss": 0.0007, "step": 19975 }, { "epoch": 22.026431718061673, - "grad_norm": 0.0143059641122818, + "grad_norm": 0.004270936828106642, "learning_rate": 0.0, - "loss": 0.0017, + "loss": 0.0006, "step": 20000 }, { "epoch": 22.026431718061673, - "eval_cer": 54.87888757694909, - "eval_loss": 1.0545215606689453, - "eval_runtime": 819.2896, - "eval_samples_per_second": 12.915, - "eval_steps_per_second": 3.23, - "eval_wer": 96.57708628005658, + "eval_cer": 22.62675822223241, + "eval_loss": 0.8947405219078064, + "eval_runtime": 1706.5603, + "eval_samples_per_second": 6.2, + "eval_steps_per_second": 1.55, + "eval_wer": 82.34794908062236, "step": 20000 }, { "epoch": 22.026431718061673, "step": 20000, - "total_flos": 3.376341480070185e+19, - "train_loss": 0.2651471851706505, - "train_runtime": 28635.2259, - "train_samples_per_second": 22.35, - "train_steps_per_second": 0.698 + "total_flos": 3.4362863729801953e+20, + "train_loss": 0.2743150826841593, + "train_runtime": 135533.426, + "train_samples_per_second": 4.722, + "train_steps_per_second": 0.148 } ], "logging_steps": 25, @@ -5835,7 +5835,7 @@ "attributes": {} } }, - "total_flos": 3.376341480070185e+19, + "total_flos": 3.4362863729801953e+20, "train_batch_size": 4, "trial_name": null, "trial_params": null