diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,5842 @@ +{ + "best_metric": 115.08722300801509, + "best_model_checkpoint": "./iteboshi_student_model_temp/checkpoint-19000", + "epoch": 22.026431718061673, + "eval_steps": 1000, + "global_step": 20000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02753303964757709, + "grad_norm": 15.011414527893066, + "learning_rate": 1.0000000000000002e-06, + "loss": 3.3645, + "step": 25 + }, + { + "epoch": 0.05506607929515418, + "grad_norm": 4.264221668243408, + "learning_rate": 2.0000000000000003e-06, + "loss": 2.6877, + "step": 50 + }, + { + "epoch": 0.08259911894273128, + "grad_norm": 2.5181801319122314, + "learning_rate": 3e-06, + "loss": 1.6827, + "step": 75 + }, + { + "epoch": 0.11013215859030837, + "grad_norm": 1.4112682342529297, + "learning_rate": 4.000000000000001e-06, + "loss": 1.1058, + "step": 100 + }, + { + "epoch": 0.13766519823788545, + "grad_norm": 1.4806125164031982, + "learning_rate": 5e-06, + "loss": 0.9359, + "step": 125 + }, + { + "epoch": 0.16519823788546256, + "grad_norm": 1.3319814205169678, + "learning_rate": 6e-06, + "loss": 0.9862, + "step": 150 + }, + { + "epoch": 0.19273127753303965, + "grad_norm": 1.4169176816940308, + "learning_rate": 7e-06, + "loss": 0.8884, + "step": 175 + }, + { + "epoch": 0.22026431718061673, + "grad_norm": 1.5349735021591187, + "learning_rate": 8.000000000000001e-06, + "loss": 0.9031, + "step": 200 + }, + { + "epoch": 0.24779735682819384, + "grad_norm": 1.4419125318527222, + "learning_rate": 9e-06, + "loss": 0.8748, + "step": 225 + }, + { + "epoch": 0.2753303964757709, + "grad_norm": 1.49337637424469, + "learning_rate": 1e-05, + "loss": 0.8685, + "step": 250 + }, + { + "epoch": 0.30286343612334804, + "grad_norm": 1.4557123184204102, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.8104, + "step": 275 + }, + { + "epoch": 0.3303964757709251, + "grad_norm": 1.2917561531066895, + "learning_rate": 1.2e-05, + "loss": 0.818, + "step": 300 + }, + { + "epoch": 0.3579295154185022, + "grad_norm": 1.221163034439087, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.8098, + "step": 325 + }, + { + "epoch": 0.3854625550660793, + "grad_norm": 1.2775193452835083, + "learning_rate": 1.4e-05, + "loss": 0.8419, + "step": 350 + }, + { + "epoch": 0.4129955947136564, + "grad_norm": 1.3367918729782104, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.7582, + "step": 375 + }, + { + "epoch": 0.44052863436123346, + "grad_norm": 1.5160075426101685, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.8009, + "step": 400 + }, + { + "epoch": 0.46806167400881055, + "grad_norm": 1.368986964225769, + "learning_rate": 1.7e-05, + "loss": 0.7772, + "step": 425 + }, + { + "epoch": 0.4955947136563877, + "grad_norm": 1.3891838788986206, + "learning_rate": 1.8e-05, + "loss": 0.7927, + "step": 450 + }, + { + "epoch": 0.5231277533039648, + "grad_norm": 1.1749062538146973, + "learning_rate": 1.9e-05, + "loss": 0.7827, + "step": 475 + }, + { + "epoch": 0.5506607929515418, + "grad_norm": 1.3586732149124146, + "learning_rate": 2e-05, + "loss": 0.7839, + "step": 500 + }, + { + "epoch": 0.5781938325991189, + "grad_norm": 1.2230278253555298, + "learning_rate": 1.9974358974358975e-05, + "loss": 0.7555, + "step": 525 + }, + { + "epoch": 0.6057268722466961, + "grad_norm": 1.2390245199203491, + "learning_rate": 1.994871794871795e-05, + "loss": 0.7625, + "step": 550 + }, + { + "epoch": 0.6332599118942731, + "grad_norm": 1.3164693117141724, + "learning_rate": 1.9923076923076926e-05, + "loss": 0.7323, + "step": 575 + }, + { + "epoch": 0.6607929515418502, + "grad_norm": 1.3300975561141968, + "learning_rate": 1.98974358974359e-05, + "loss": 0.7453, + "step": 600 + }, + { + "epoch": 0.6883259911894273, + "grad_norm": 1.4462056159973145, + "learning_rate": 1.9871794871794873e-05, + "loss": 0.7177, + "step": 625 + }, + { + "epoch": 0.7158590308370044, + "grad_norm": 1.4526742696762085, + "learning_rate": 1.9846153846153847e-05, + "loss": 0.7449, + "step": 650 + }, + { + "epoch": 0.7433920704845814, + "grad_norm": 1.0672987699508667, + "learning_rate": 1.9820512820512824e-05, + "loss": 0.7286, + "step": 675 + }, + { + "epoch": 0.7709251101321586, + "grad_norm": 1.0843076705932617, + "learning_rate": 1.9794871794871798e-05, + "loss": 0.7252, + "step": 700 + }, + { + "epoch": 0.7984581497797357, + "grad_norm": 1.2082732915878296, + "learning_rate": 1.976923076923077e-05, + "loss": 0.7562, + "step": 725 + }, + { + "epoch": 0.8259911894273128, + "grad_norm": 1.2885624170303345, + "learning_rate": 1.9743589743589745e-05, + "loss": 0.7521, + "step": 750 + }, + { + "epoch": 0.8535242290748899, + "grad_norm": 1.263953447341919, + "learning_rate": 1.9717948717948722e-05, + "loss": 0.688, + "step": 775 + }, + { + "epoch": 0.8810572687224669, + "grad_norm": 1.4190903902053833, + "learning_rate": 1.9692307692307696e-05, + "loss": 0.7404, + "step": 800 + }, + { + "epoch": 0.9085903083700441, + "grad_norm": 1.023093342781067, + "learning_rate": 1.9666666666666666e-05, + "loss": 0.6488, + "step": 825 + }, + { + "epoch": 0.9361233480176211, + "grad_norm": 1.170280933380127, + "learning_rate": 1.9641025641025643e-05, + "loss": 0.6808, + "step": 850 + }, + { + "epoch": 0.9636563876651982, + "grad_norm": 0.9678378105163574, + "learning_rate": 1.9615384615384617e-05, + "loss": 0.7139, + "step": 875 + }, + { + "epoch": 0.9911894273127754, + "grad_norm": 1.1732635498046875, + "learning_rate": 1.958974358974359e-05, + "loss": 0.6886, + "step": 900 + }, + { + "epoch": 1.0187224669603525, + "grad_norm": 0.9801719784736633, + "learning_rate": 1.9564102564102564e-05, + "loss": 0.6295, + "step": 925 + }, + { + "epoch": 1.0462555066079295, + "grad_norm": 1.0827771425247192, + "learning_rate": 1.953846153846154e-05, + "loss": 0.575, + "step": 950 + }, + { + "epoch": 1.0737885462555066, + "grad_norm": 1.4441510438919067, + "learning_rate": 1.9512820512820515e-05, + "loss": 0.5829, + "step": 975 + }, + { + "epoch": 1.1013215859030836, + "grad_norm": 1.0029908418655396, + "learning_rate": 1.9487179487179488e-05, + "loss": 0.5915, + "step": 1000 + }, + { + "epoch": 1.1013215859030836, + "eval_cer": 65.05164330012725, + "eval_loss": 0.7333521246910095, + "eval_runtime": 476.6198, + "eval_samples_per_second": 22.2, + "eval_steps_per_second": 5.552, + "eval_wer": 158.28382838283827, + "step": 1000 + }, + { + "epoch": 1.1288546255506609, + "grad_norm": 1.2138437032699585, + "learning_rate": 1.9461538461538462e-05, + "loss": 0.5968, + "step": 1025 + }, + { + "epoch": 1.1563876651982379, + "grad_norm": 1.1982746124267578, + "learning_rate": 1.943589743589744e-05, + "loss": 0.6058, + "step": 1050 + }, + { + "epoch": 1.183920704845815, + "grad_norm": 1.052754282951355, + "learning_rate": 1.9410256410256413e-05, + "loss": 0.5817, + "step": 1075 + }, + { + "epoch": 1.2114537444933922, + "grad_norm": 1.165330171585083, + "learning_rate": 1.9384615384615386e-05, + "loss": 0.557, + "step": 1100 + }, + { + "epoch": 1.2389867841409692, + "grad_norm": 0.9782461524009705, + "learning_rate": 1.935897435897436e-05, + "loss": 0.6146, + "step": 1125 + }, + { + "epoch": 1.2665198237885462, + "grad_norm": 1.2477244138717651, + "learning_rate": 1.9333333333333333e-05, + "loss": 0.575, + "step": 1150 + }, + { + "epoch": 1.2940528634361232, + "grad_norm": 0.9106314778327942, + "learning_rate": 1.930769230769231e-05, + "loss": 0.5746, + "step": 1175 + }, + { + "epoch": 1.3215859030837005, + "grad_norm": 1.1797159910202026, + "learning_rate": 1.9282051282051284e-05, + "loss": 0.5875, + "step": 1200 + }, + { + "epoch": 1.3491189427312775, + "grad_norm": 1.2631396055221558, + "learning_rate": 1.9256410256410258e-05, + "loss": 0.5728, + "step": 1225 + }, + { + "epoch": 1.3766519823788546, + "grad_norm": 1.024104118347168, + "learning_rate": 1.923076923076923e-05, + "loss": 0.6005, + "step": 1250 + }, + { + "epoch": 1.4041850220264318, + "grad_norm": 1.0408469438552856, + "learning_rate": 1.920512820512821e-05, + "loss": 0.5665, + "step": 1275 + }, + { + "epoch": 1.4317180616740088, + "grad_norm": 1.000183343887329, + "learning_rate": 1.9179487179487182e-05, + "loss": 0.5731, + "step": 1300 + }, + { + "epoch": 1.4592511013215859, + "grad_norm": 1.0863183736801147, + "learning_rate": 1.9153846153846156e-05, + "loss": 0.6083, + "step": 1325 + }, + { + "epoch": 1.4867841409691631, + "grad_norm": 1.1477770805358887, + "learning_rate": 1.912820512820513e-05, + "loss": 0.6137, + "step": 1350 + }, + { + "epoch": 1.51431718061674, + "grad_norm": 1.1528939008712769, + "learning_rate": 1.9102564102564106e-05, + "loss": 0.5898, + "step": 1375 + }, + { + "epoch": 1.5418502202643172, + "grad_norm": 0.9799731373786926, + "learning_rate": 1.907692307692308e-05, + "loss": 0.6014, + "step": 1400 + }, + { + "epoch": 1.5693832599118944, + "grad_norm": 1.0169357061386108, + "learning_rate": 1.905128205128205e-05, + "loss": 0.6065, + "step": 1425 + }, + { + "epoch": 1.5969162995594712, + "grad_norm": 1.0970255136489868, + "learning_rate": 1.9025641025641027e-05, + "loss": 0.5905, + "step": 1450 + }, + { + "epoch": 1.6244493392070485, + "grad_norm": 1.1324760913848877, + "learning_rate": 1.9e-05, + "loss": 0.5664, + "step": 1475 + }, + { + "epoch": 1.6519823788546255, + "grad_norm": 1.0170447826385498, + "learning_rate": 1.8974358974358975e-05, + "loss": 0.5502, + "step": 1500 + }, + { + "epoch": 1.6795154185022025, + "grad_norm": 1.145984172821045, + "learning_rate": 1.894871794871795e-05, + "loss": 0.5548, + "step": 1525 + }, + { + "epoch": 1.7070484581497798, + "grad_norm": 1.0441069602966309, + "learning_rate": 1.8923076923076925e-05, + "loss": 0.5479, + "step": 1550 + }, + { + "epoch": 1.7345814977973568, + "grad_norm": 1.049399495124817, + "learning_rate": 1.88974358974359e-05, + "loss": 0.5633, + "step": 1575 + }, + { + "epoch": 1.7621145374449338, + "grad_norm": 1.0441521406173706, + "learning_rate": 1.8871794871794873e-05, + "loss": 0.5587, + "step": 1600 + }, + { + "epoch": 1.789647577092511, + "grad_norm": 1.2298874855041504, + "learning_rate": 1.8846153846153846e-05, + "loss": 0.6115, + "step": 1625 + }, + { + "epoch": 1.8171806167400881, + "grad_norm": 1.3439756631851196, + "learning_rate": 1.8820512820512823e-05, + "loss": 0.6115, + "step": 1650 + }, + { + "epoch": 1.8447136563876652, + "grad_norm": 1.3676002025604248, + "learning_rate": 1.8794871794871797e-05, + "loss": 0.611, + "step": 1675 + }, + { + "epoch": 1.8722466960352424, + "grad_norm": 0.9913831949234009, + "learning_rate": 1.876923076923077e-05, + "loss": 0.58, + "step": 1700 + }, + { + "epoch": 1.8997797356828194, + "grad_norm": 0.9699712991714478, + "learning_rate": 1.8743589743589744e-05, + "loss": 0.5701, + "step": 1725 + }, + { + "epoch": 1.9273127753303965, + "grad_norm": 1.0531800985336304, + "learning_rate": 1.8717948717948718e-05, + "loss": 0.5763, + "step": 1750 + }, + { + "epoch": 1.9548458149779737, + "grad_norm": 1.2362020015716553, + "learning_rate": 1.8692307692307695e-05, + "loss": 0.5716, + "step": 1775 + }, + { + "epoch": 1.9823788546255505, + "grad_norm": 1.1315600872039795, + "learning_rate": 1.866666666666667e-05, + "loss": 0.5633, + "step": 1800 + }, + { + "epoch": 2.0099118942731278, + "grad_norm": 1.0342429876327515, + "learning_rate": 1.8641025641025642e-05, + "loss": 0.5612, + "step": 1825 + }, + { + "epoch": 2.037444933920705, + "grad_norm": 0.8838308453559875, + "learning_rate": 1.8615384615384616e-05, + "loss": 0.4806, + "step": 1850 + }, + { + "epoch": 2.064977973568282, + "grad_norm": 1.3863153457641602, + "learning_rate": 1.8589743589743593e-05, + "loss": 0.4645, + "step": 1875 + }, + { + "epoch": 2.092511013215859, + "grad_norm": 0.6479698419570923, + "learning_rate": 1.8564102564102567e-05, + "loss": 0.4494, + "step": 1900 + }, + { + "epoch": 2.1200440528634363, + "grad_norm": 1.058168649673462, + "learning_rate": 1.853846153846154e-05, + "loss": 0.5045, + "step": 1925 + }, + { + "epoch": 2.147577092511013, + "grad_norm": 1.0008894205093384, + "learning_rate": 1.8512820512820514e-05, + "loss": 0.437, + "step": 1950 + }, + { + "epoch": 2.1751101321585904, + "grad_norm": 1.0125486850738525, + "learning_rate": 1.848717948717949e-05, + "loss": 0.4645, + "step": 1975 + }, + { + "epoch": 2.202643171806167, + "grad_norm": 0.9113588929176331, + "learning_rate": 1.8461538461538465e-05, + "loss": 0.4754, + "step": 2000 + }, + { + "epoch": 2.202643171806167, + "eval_cer": 66.80212763518392, + "eval_loss": 0.680008590221405, + "eval_runtime": 471.9604, + "eval_samples_per_second": 22.419, + "eval_steps_per_second": 5.606, + "eval_wer": 176.77510608203676, + "step": 2000 + }, + { + "epoch": 2.2301762114537445, + "grad_norm": 0.8999997973442078, + "learning_rate": 1.8435897435897435e-05, + "loss": 0.4651, + "step": 2025 + }, + { + "epoch": 2.2577092511013217, + "grad_norm": 1.2379090785980225, + "learning_rate": 1.8410256410256412e-05, + "loss": 0.4663, + "step": 2050 + }, + { + "epoch": 2.2852422907488985, + "grad_norm": 0.773048460483551, + "learning_rate": 1.8384615384615386e-05, + "loss": 0.4431, + "step": 2075 + }, + { + "epoch": 2.3127753303964758, + "grad_norm": 0.908449649810791, + "learning_rate": 1.835897435897436e-05, + "loss": 0.4742, + "step": 2100 + }, + { + "epoch": 2.340308370044053, + "grad_norm": 0.7912779450416565, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.4306, + "step": 2125 + }, + { + "epoch": 2.36784140969163, + "grad_norm": 1.0128527879714966, + "learning_rate": 1.830769230769231e-05, + "loss": 0.452, + "step": 2150 + }, + { + "epoch": 2.395374449339207, + "grad_norm": 0.9093798398971558, + "learning_rate": 1.8282051282051284e-05, + "loss": 0.4623, + "step": 2175 + }, + { + "epoch": 2.4229074889867843, + "grad_norm": 0.8354513049125671, + "learning_rate": 1.8256410256410257e-05, + "loss": 0.4436, + "step": 2200 + }, + { + "epoch": 2.450440528634361, + "grad_norm": 0.9825035929679871, + "learning_rate": 1.823076923076923e-05, + "loss": 0.4354, + "step": 2225 + }, + { + "epoch": 2.4779735682819384, + "grad_norm": 0.9167904257774353, + "learning_rate": 1.8205128205128208e-05, + "loss": 0.4573, + "step": 2250 + }, + { + "epoch": 2.505506607929515, + "grad_norm": 0.855309009552002, + "learning_rate": 1.817948717948718e-05, + "loss": 0.4975, + "step": 2275 + }, + { + "epoch": 2.5330396475770924, + "grad_norm": 0.9728146195411682, + "learning_rate": 1.8153846153846155e-05, + "loss": 0.4755, + "step": 2300 + }, + { + "epoch": 2.5605726872246697, + "grad_norm": 0.9784273505210876, + "learning_rate": 1.812820512820513e-05, + "loss": 0.4574, + "step": 2325 + }, + { + "epoch": 2.5881057268722465, + "grad_norm": 1.0068365335464478, + "learning_rate": 1.8102564102564102e-05, + "loss": 0.468, + "step": 2350 + }, + { + "epoch": 2.6156387665198237, + "grad_norm": 0.883303701877594, + "learning_rate": 1.807692307692308e-05, + "loss": 0.4711, + "step": 2375 + }, + { + "epoch": 2.643171806167401, + "grad_norm": 0.9032068252563477, + "learning_rate": 1.8051282051282053e-05, + "loss": 0.446, + "step": 2400 + }, + { + "epoch": 2.670704845814978, + "grad_norm": 0.887459397315979, + "learning_rate": 1.8025641025641027e-05, + "loss": 0.441, + "step": 2425 + }, + { + "epoch": 2.698237885462555, + "grad_norm": 0.9685478210449219, + "learning_rate": 1.8e-05, + "loss": 0.4884, + "step": 2450 + }, + { + "epoch": 2.7257709251101323, + "grad_norm": 0.9034000039100647, + "learning_rate": 1.7974358974358977e-05, + "loss": 0.419, + "step": 2475 + }, + { + "epoch": 2.753303964757709, + "grad_norm": 1.1092092990875244, + "learning_rate": 1.794871794871795e-05, + "loss": 0.4934, + "step": 2500 + }, + { + "epoch": 2.7808370044052864, + "grad_norm": 0.9451773166656494, + "learning_rate": 1.7923076923076925e-05, + "loss": 0.4584, + "step": 2525 + }, + { + "epoch": 2.8083700440528636, + "grad_norm": 0.9756285548210144, + "learning_rate": 1.78974358974359e-05, + "loss": 0.4273, + "step": 2550 + }, + { + "epoch": 2.8359030837004404, + "grad_norm": 1.0345897674560547, + "learning_rate": 1.7871794871794875e-05, + "loss": 0.4462, + "step": 2575 + }, + { + "epoch": 2.8634361233480177, + "grad_norm": 0.7886134386062622, + "learning_rate": 1.784615384615385e-05, + "loss": 0.4645, + "step": 2600 + }, + { + "epoch": 2.890969162995595, + "grad_norm": 1.0247100591659546, + "learning_rate": 1.7820512820512823e-05, + "loss": 0.4285, + "step": 2625 + }, + { + "epoch": 2.9185022026431717, + "grad_norm": 1.1661251783370972, + "learning_rate": 1.7794871794871796e-05, + "loss": 0.4543, + "step": 2650 + }, + { + "epoch": 2.946035242290749, + "grad_norm": 0.8847468495368958, + "learning_rate": 1.776923076923077e-05, + "loss": 0.4672, + "step": 2675 + }, + { + "epoch": 2.9735682819383262, + "grad_norm": 1.077468991279602, + "learning_rate": 1.7743589743589744e-05, + "loss": 0.4421, + "step": 2700 + }, + { + "epoch": 3.001101321585903, + "grad_norm": 0.8612393140792847, + "learning_rate": 1.7717948717948717e-05, + "loss": 0.4854, + "step": 2725 + }, + { + "epoch": 3.0286343612334803, + "grad_norm": 0.7957535982131958, + "learning_rate": 1.7692307692307694e-05, + "loss": 0.3886, + "step": 2750 + }, + { + "epoch": 3.056167400881057, + "grad_norm": 0.8133636713027954, + "learning_rate": 1.7666666666666668e-05, + "loss": 0.3436, + "step": 2775 + }, + { + "epoch": 3.0837004405286343, + "grad_norm": 0.9280221462249756, + "learning_rate": 1.7641025641025642e-05, + "loss": 0.3586, + "step": 2800 + }, + { + "epoch": 3.1112334801762116, + "grad_norm": 0.6199328303337097, + "learning_rate": 1.7615384615384615e-05, + "loss": 0.3373, + "step": 2825 + }, + { + "epoch": 3.1387665198237884, + "grad_norm": 0.8616262674331665, + "learning_rate": 1.7589743589743592e-05, + "loss": 0.3515, + "step": 2850 + }, + { + "epoch": 3.1662995594713657, + "grad_norm": 0.8298392295837402, + "learning_rate": 1.7564102564102566e-05, + "loss": 0.3781, + "step": 2875 + }, + { + "epoch": 3.193832599118943, + "grad_norm": 1.0421444177627563, + "learning_rate": 1.753846153846154e-05, + "loss": 0.3673, + "step": 2900 + }, + { + "epoch": 3.2213656387665197, + "grad_norm": 0.769826352596283, + "learning_rate": 1.7512820512820513e-05, + "loss": 0.3726, + "step": 2925 + }, + { + "epoch": 3.248898678414097, + "grad_norm": 0.9118036031723022, + "learning_rate": 1.7487179487179487e-05, + "loss": 0.3702, + "step": 2950 + }, + { + "epoch": 3.2764317180616738, + "grad_norm": 0.8753936290740967, + "learning_rate": 1.7461538461538464e-05, + "loss": 0.3637, + "step": 2975 + }, + { + "epoch": 3.303964757709251, + "grad_norm": 0.8670012354850769, + "learning_rate": 1.7435897435897438e-05, + "loss": 0.3484, + "step": 3000 + }, + { + "epoch": 3.303964757709251, + "eval_cer": 86.51599738631023, + "eval_loss": 0.6673649549484253, + "eval_runtime": 498.8483, + "eval_samples_per_second": 21.211, + "eval_steps_per_second": 5.304, + "eval_wer": 250.1933050447902, + "step": 3000 + }, + { + "epoch": 3.3314977973568283, + "grad_norm": 0.8442785143852234, + "learning_rate": 1.741025641025641e-05, + "loss": 0.3744, + "step": 3025 + }, + { + "epoch": 3.359030837004405, + "grad_norm": 0.9680752754211426, + "learning_rate": 1.7384615384615385e-05, + "loss": 0.4015, + "step": 3050 + }, + { + "epoch": 3.3865638766519823, + "grad_norm": 1.012505292892456, + "learning_rate": 1.7358974358974362e-05, + "loss": 0.3609, + "step": 3075 + }, + { + "epoch": 3.4140969162995596, + "grad_norm": 0.838257908821106, + "learning_rate": 1.7333333333333336e-05, + "loss": 0.425, + "step": 3100 + }, + { + "epoch": 3.4416299559471364, + "grad_norm": 1.0413908958435059, + "learning_rate": 1.730769230769231e-05, + "loss": 0.3912, + "step": 3125 + }, + { + "epoch": 3.4691629955947136, + "grad_norm": 0.6918802261352539, + "learning_rate": 1.7282051282051283e-05, + "loss": 0.3665, + "step": 3150 + }, + { + "epoch": 3.496696035242291, + "grad_norm": 0.8139140009880066, + "learning_rate": 1.725641025641026e-05, + "loss": 0.3797, + "step": 3175 + }, + { + "epoch": 3.5242290748898677, + "grad_norm": 0.8543463349342346, + "learning_rate": 1.7230769230769234e-05, + "loss": 0.3906, + "step": 3200 + }, + { + "epoch": 3.551762114537445, + "grad_norm": 0.8312181234359741, + "learning_rate": 1.7205128205128207e-05, + "loss": 0.3728, + "step": 3225 + }, + { + "epoch": 3.579295154185022, + "grad_norm": 0.8691410422325134, + "learning_rate": 1.717948717948718e-05, + "loss": 0.3696, + "step": 3250 + }, + { + "epoch": 3.606828193832599, + "grad_norm": 0.9111345410346985, + "learning_rate": 1.7153846153846155e-05, + "loss": 0.4036, + "step": 3275 + }, + { + "epoch": 3.6343612334801763, + "grad_norm": 1.0638726949691772, + "learning_rate": 1.7128205128205128e-05, + "loss": 0.4002, + "step": 3300 + }, + { + "epoch": 3.6618942731277535, + "grad_norm": 0.650422215461731, + "learning_rate": 1.7102564102564102e-05, + "loss": 0.345, + "step": 3325 + }, + { + "epoch": 3.6894273127753303, + "grad_norm": 0.9388235211372375, + "learning_rate": 1.707692307692308e-05, + "loss": 0.3814, + "step": 3350 + }, + { + "epoch": 3.7169603524229076, + "grad_norm": 0.843593955039978, + "learning_rate": 1.7051282051282053e-05, + "loss": 0.3333, + "step": 3375 + }, + { + "epoch": 3.744493392070485, + "grad_norm": 0.9046334028244019, + "learning_rate": 1.7025641025641026e-05, + "loss": 0.3803, + "step": 3400 + }, + { + "epoch": 3.7720264317180616, + "grad_norm": 1.0686695575714111, + "learning_rate": 1.7e-05, + "loss": 0.3584, + "step": 3425 + }, + { + "epoch": 3.799559471365639, + "grad_norm": 0.9424968361854553, + "learning_rate": 1.6974358974358977e-05, + "loss": 0.3596, + "step": 3450 + }, + { + "epoch": 3.827092511013216, + "grad_norm": 0.8426197171211243, + "learning_rate": 1.694871794871795e-05, + "loss": 0.3648, + "step": 3475 + }, + { + "epoch": 3.854625550660793, + "grad_norm": 1.1227201223373413, + "learning_rate": 1.6923076923076924e-05, + "loss": 0.4158, + "step": 3500 + }, + { + "epoch": 3.88215859030837, + "grad_norm": 0.8253980278968811, + "learning_rate": 1.6897435897435898e-05, + "loss": 0.3581, + "step": 3525 + }, + { + "epoch": 3.909691629955947, + "grad_norm": 1.038388729095459, + "learning_rate": 1.687179487179487e-05, + "loss": 0.3824, + "step": 3550 + }, + { + "epoch": 3.9372246696035242, + "grad_norm": 0.847663164138794, + "learning_rate": 1.684615384615385e-05, + "loss": 0.4023, + "step": 3575 + }, + { + "epoch": 3.964757709251101, + "grad_norm": 1.0508993864059448, + "learning_rate": 1.6820512820512822e-05, + "loss": 0.3857, + "step": 3600 + }, + { + "epoch": 3.9922907488986783, + "grad_norm": 0.8296692371368408, + "learning_rate": 1.6794871794871796e-05, + "loss": 0.3799, + "step": 3625 + }, + { + "epoch": 4.0198237885462555, + "grad_norm": 0.7697169184684753, + "learning_rate": 1.676923076923077e-05, + "loss": 0.2988, + "step": 3650 + }, + { + "epoch": 4.047356828193832, + "grad_norm": 0.8309630751609802, + "learning_rate": 1.6743589743589747e-05, + "loss": 0.3086, + "step": 3675 + }, + { + "epoch": 4.07488986784141, + "grad_norm": 0.9464417099952698, + "learning_rate": 1.671794871794872e-05, + "loss": 0.2595, + "step": 3700 + }, + { + "epoch": 4.102422907488987, + "grad_norm": 0.6952372789382935, + "learning_rate": 1.6692307692307694e-05, + "loss": 0.3095, + "step": 3725 + }, + { + "epoch": 4.129955947136564, + "grad_norm": 1.4975730180740356, + "learning_rate": 1.6666666666666667e-05, + "loss": 0.3056, + "step": 3750 + }, + { + "epoch": 4.157488986784141, + "grad_norm": 0.7223972082138062, + "learning_rate": 1.6641025641025645e-05, + "loss": 0.2897, + "step": 3775 + }, + { + "epoch": 4.185022026431718, + "grad_norm": 0.5949111580848694, + "learning_rate": 1.6615384615384618e-05, + "loss": 0.2548, + "step": 3800 + }, + { + "epoch": 4.212555066079295, + "grad_norm": 0.7186844348907471, + "learning_rate": 1.6589743589743592e-05, + "loss": 0.2927, + "step": 3825 + }, + { + "epoch": 4.240088105726873, + "grad_norm": 0.8656429648399353, + "learning_rate": 1.6564102564102565e-05, + "loss": 0.2867, + "step": 3850 + }, + { + "epoch": 4.2676211453744495, + "grad_norm": 0.7154597640037537, + "learning_rate": 1.653846153846154e-05, + "loss": 0.3131, + "step": 3875 + }, + { + "epoch": 4.295154185022026, + "grad_norm": 0.7212648987770081, + "learning_rate": 1.6512820512820513e-05, + "loss": 0.305, + "step": 3900 + }, + { + "epoch": 4.322687224669604, + "grad_norm": 0.9249489307403564, + "learning_rate": 1.6487179487179486e-05, + "loss": 0.3191, + "step": 3925 + }, + { + "epoch": 4.350220264317181, + "grad_norm": 0.8987734317779541, + "learning_rate": 1.6461538461538463e-05, + "loss": 0.3007, + "step": 3950 + }, + { + "epoch": 4.377753303964758, + "grad_norm": 0.8961289525032043, + "learning_rate": 1.6435897435897437e-05, + "loss": 0.3249, + "step": 3975 + }, + { + "epoch": 4.405286343612334, + "grad_norm": 1.0475411415100098, + "learning_rate": 1.641025641025641e-05, + "loss": 0.3012, + "step": 4000 + }, + { + "epoch": 4.405286343612334, + "eval_cer": 143.75523024543463, + "eval_loss": 0.673335075378418, + "eval_runtime": 578.457, + "eval_samples_per_second": 18.292, + "eval_steps_per_second": 4.574, + "eval_wer": 390.6647807637907, + "step": 4000 + }, + { + "epoch": 4.432819383259912, + "grad_norm": 0.6653734445571899, + "learning_rate": 1.6384615384615384e-05, + "loss": 0.3257, + "step": 4025 + }, + { + "epoch": 4.460352422907489, + "grad_norm": 1.0824356079101562, + "learning_rate": 1.635897435897436e-05, + "loss": 0.3307, + "step": 4050 + }, + { + "epoch": 4.487885462555066, + "grad_norm": 1.0432815551757812, + "learning_rate": 1.6333333333333335e-05, + "loss": 0.3184, + "step": 4075 + }, + { + "epoch": 4.515418502202643, + "grad_norm": 0.6064091324806213, + "learning_rate": 1.630769230769231e-05, + "loss": 0.2876, + "step": 4100 + }, + { + "epoch": 4.54295154185022, + "grad_norm": 0.9204082489013672, + "learning_rate": 1.6282051282051282e-05, + "loss": 0.3239, + "step": 4125 + }, + { + "epoch": 4.570484581497797, + "grad_norm": 0.9674654006958008, + "learning_rate": 1.625641025641026e-05, + "loss": 0.3295, + "step": 4150 + }, + { + "epoch": 4.598017621145375, + "grad_norm": 1.0070710182189941, + "learning_rate": 1.6230769230769233e-05, + "loss": 0.3171, + "step": 4175 + }, + { + "epoch": 4.6255506607929515, + "grad_norm": 0.7143537998199463, + "learning_rate": 1.6205128205128207e-05, + "loss": 0.299, + "step": 4200 + }, + { + "epoch": 4.653083700440528, + "grad_norm": 1.4927059412002563, + "learning_rate": 1.617948717948718e-05, + "loss": 0.3133, + "step": 4225 + }, + { + "epoch": 4.680616740088106, + "grad_norm": 0.9383150339126587, + "learning_rate": 1.6153846153846154e-05, + "loss": 0.2923, + "step": 4250 + }, + { + "epoch": 4.708149779735683, + "grad_norm": 0.8235558271408081, + "learning_rate": 1.612820512820513e-05, + "loss": 0.3061, + "step": 4275 + }, + { + "epoch": 4.73568281938326, + "grad_norm": 1.0017234086990356, + "learning_rate": 1.6102564102564105e-05, + "loss": 0.3159, + "step": 4300 + }, + { + "epoch": 4.763215859030837, + "grad_norm": 0.8471269607543945, + "learning_rate": 1.607692307692308e-05, + "loss": 0.3082, + "step": 4325 + }, + { + "epoch": 4.790748898678414, + "grad_norm": 0.9471202492713928, + "learning_rate": 1.6051282051282052e-05, + "loss": 0.3103, + "step": 4350 + }, + { + "epoch": 4.818281938325991, + "grad_norm": 1.0645701885223389, + "learning_rate": 1.602564102564103e-05, + "loss": 0.3185, + "step": 4375 + }, + { + "epoch": 4.845814977973569, + "grad_norm": 0.6874596476554871, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.2969, + "step": 4400 + }, + { + "epoch": 4.8733480176211454, + "grad_norm": 0.6609659194946289, + "learning_rate": 1.5974358974358976e-05, + "loss": 0.2947, + "step": 4425 + }, + { + "epoch": 4.900881057268722, + "grad_norm": 0.9104480743408203, + "learning_rate": 1.594871794871795e-05, + "loss": 0.3108, + "step": 4450 + }, + { + "epoch": 4.9284140969163, + "grad_norm": 0.7762302756309509, + "learning_rate": 1.5923076923076924e-05, + "loss": 0.2803, + "step": 4475 + }, + { + "epoch": 4.955947136563877, + "grad_norm": 0.8123212456703186, + "learning_rate": 1.5897435897435897e-05, + "loss": 0.2983, + "step": 4500 + }, + { + "epoch": 4.983480176211454, + "grad_norm": 0.8753710985183716, + "learning_rate": 1.587179487179487e-05, + "loss": 0.3076, + "step": 4525 + }, + { + "epoch": 5.011013215859031, + "grad_norm": 0.8646990060806274, + "learning_rate": 1.5846153846153848e-05, + "loss": 0.2629, + "step": 4550 + }, + { + "epoch": 5.038546255506608, + "grad_norm": 0.45370516180992126, + "learning_rate": 1.582051282051282e-05, + "loss": 0.2463, + "step": 4575 + }, + { + "epoch": 5.066079295154185, + "grad_norm": 0.8293446898460388, + "learning_rate": 1.5794871794871795e-05, + "loss": 0.2204, + "step": 4600 + }, + { + "epoch": 5.093612334801762, + "grad_norm": 0.5812169909477234, + "learning_rate": 1.576923076923077e-05, + "loss": 0.2584, + "step": 4625 + }, + { + "epoch": 5.121145374449339, + "grad_norm": 0.6977860331535339, + "learning_rate": 1.5743589743589746e-05, + "loss": 0.2581, + "step": 4650 + }, + { + "epoch": 5.148678414096916, + "grad_norm": 0.5691882967948914, + "learning_rate": 1.571794871794872e-05, + "loss": 0.2561, + "step": 4675 + }, + { + "epoch": 5.176211453744493, + "grad_norm": 0.6710711121559143, + "learning_rate": 1.5692307692307693e-05, + "loss": 0.2287, + "step": 4700 + }, + { + "epoch": 5.203744493392071, + "grad_norm": 0.6122124195098877, + "learning_rate": 1.5666666666666667e-05, + "loss": 0.2539, + "step": 4725 + }, + { + "epoch": 5.2312775330396475, + "grad_norm": 0.8912720680236816, + "learning_rate": 1.5641025641025644e-05, + "loss": 0.2261, + "step": 4750 + }, + { + "epoch": 5.258810572687224, + "grad_norm": 0.7192656993865967, + "learning_rate": 1.5615384615384618e-05, + "loss": 0.277, + "step": 4775 + }, + { + "epoch": 5.286343612334802, + "grad_norm": 0.5232201814651489, + "learning_rate": 1.558974358974359e-05, + "loss": 0.2315, + "step": 4800 + }, + { + "epoch": 5.313876651982379, + "grad_norm": 0.5389770865440369, + "learning_rate": 1.5564102564102565e-05, + "loss": 0.2157, + "step": 4825 + }, + { + "epoch": 5.341409691629956, + "grad_norm": 0.7740320563316345, + "learning_rate": 1.553846153846154e-05, + "loss": 0.2374, + "step": 4850 + }, + { + "epoch": 5.368942731277533, + "grad_norm": 0.8149337768554688, + "learning_rate": 1.5512820512820516e-05, + "loss": 0.2459, + "step": 4875 + }, + { + "epoch": 5.39647577092511, + "grad_norm": 0.8045769929885864, + "learning_rate": 1.548717948717949e-05, + "loss": 0.2262, + "step": 4900 + }, + { + "epoch": 5.424008810572687, + "grad_norm": 0.8608861565589905, + "learning_rate": 1.5461538461538463e-05, + "loss": 0.2812, + "step": 4925 + }, + { + "epoch": 5.451541850220265, + "grad_norm": 0.6250303387641907, + "learning_rate": 1.5435897435897436e-05, + "loss": 0.2565, + "step": 4950 + }, + { + "epoch": 5.479074889867841, + "grad_norm": 0.6353731155395508, + "learning_rate": 1.5410256410256414e-05, + "loss": 0.2348, + "step": 4975 + }, + { + "epoch": 5.506607929515418, + "grad_norm": 0.6314155459403992, + "learning_rate": 1.5384615384615387e-05, + "loss": 0.2416, + "step": 5000 + }, + { + "epoch": 5.506607929515418, + "eval_cer": 89.07064986874234, + "eval_loss": 0.685695230960846, + "eval_runtime": 499.9069, + "eval_samples_per_second": 21.166, + "eval_steps_per_second": 5.293, + "eval_wer": 259.84912776991985, + "step": 5000 + }, + { + "epoch": 5.534140969162996, + "grad_norm": 0.7577415704727173, + "learning_rate": 1.535897435897436e-05, + "loss": 0.2469, + "step": 5025 + }, + { + "epoch": 5.561674008810573, + "grad_norm": 0.8051561713218689, + "learning_rate": 1.5333333333333334e-05, + "loss": 0.2706, + "step": 5050 + }, + { + "epoch": 5.5892070484581495, + "grad_norm": 0.7317938208580017, + "learning_rate": 1.5307692307692308e-05, + "loss": 0.251, + "step": 5075 + }, + { + "epoch": 5.616740088105727, + "grad_norm": 0.9899778962135315, + "learning_rate": 1.5282051282051282e-05, + "loss": 0.2415, + "step": 5100 + }, + { + "epoch": 5.644273127753304, + "grad_norm": 1.2906497716903687, + "learning_rate": 1.5256410256410257e-05, + "loss": 0.2477, + "step": 5125 + }, + { + "epoch": 5.671806167400881, + "grad_norm": 0.7807902693748474, + "learning_rate": 1.523076923076923e-05, + "loss": 0.2629, + "step": 5150 + }, + { + "epoch": 5.6993392070484585, + "grad_norm": 0.7599391937255859, + "learning_rate": 1.5205128205128206e-05, + "loss": 0.229, + "step": 5175 + }, + { + "epoch": 5.726872246696035, + "grad_norm": 0.71393221616745, + "learning_rate": 1.517948717948718e-05, + "loss": 0.2768, + "step": 5200 + }, + { + "epoch": 5.754405286343612, + "grad_norm": 0.6856432557106018, + "learning_rate": 1.5153846153846155e-05, + "loss": 0.2434, + "step": 5225 + }, + { + "epoch": 5.78193832599119, + "grad_norm": 1.1420562267303467, + "learning_rate": 1.5128205128205129e-05, + "loss": 0.2724, + "step": 5250 + }, + { + "epoch": 5.809471365638767, + "grad_norm": 0.7609044909477234, + "learning_rate": 1.5102564102564104e-05, + "loss": 0.2538, + "step": 5275 + }, + { + "epoch": 5.8370044052863435, + "grad_norm": 0.9725056290626526, + "learning_rate": 1.5076923076923078e-05, + "loss": 0.2455, + "step": 5300 + }, + { + "epoch": 5.864537444933921, + "grad_norm": 0.8432466387748718, + "learning_rate": 1.5051282051282053e-05, + "loss": 0.2502, + "step": 5325 + }, + { + "epoch": 5.892070484581498, + "grad_norm": 0.784191370010376, + "learning_rate": 1.5025641025641027e-05, + "loss": 0.2559, + "step": 5350 + }, + { + "epoch": 5.919603524229075, + "grad_norm": 0.6561942100524902, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.2337, + "step": 5375 + }, + { + "epoch": 5.9471365638766525, + "grad_norm": 0.9330358505249023, + "learning_rate": 1.4974358974358976e-05, + "loss": 0.2369, + "step": 5400 + }, + { + "epoch": 5.974669603524229, + "grad_norm": 0.8283205628395081, + "learning_rate": 1.494871794871795e-05, + "loss": 0.2332, + "step": 5425 + }, + { + "epoch": 6.002202643171806, + "grad_norm": 0.7662353515625, + "learning_rate": 1.4923076923076925e-05, + "loss": 0.2154, + "step": 5450 + }, + { + "epoch": 6.029735682819383, + "grad_norm": 0.6226630210876465, + "learning_rate": 1.4897435897435898e-05, + "loss": 0.1824, + "step": 5475 + }, + { + "epoch": 6.057268722466961, + "grad_norm": 0.7479391098022461, + "learning_rate": 1.4871794871794874e-05, + "loss": 0.1676, + "step": 5500 + }, + { + "epoch": 6.084801762114537, + "grad_norm": 0.8184050917625427, + "learning_rate": 1.4846153846153847e-05, + "loss": 0.1946, + "step": 5525 + }, + { + "epoch": 6.112334801762114, + "grad_norm": 0.6436644196510315, + "learning_rate": 1.4820512820512823e-05, + "loss": 0.2017, + "step": 5550 + }, + { + "epoch": 6.139867841409692, + "grad_norm": 0.5338073968887329, + "learning_rate": 1.4794871794871796e-05, + "loss": 0.1875, + "step": 5575 + }, + { + "epoch": 6.167400881057269, + "grad_norm": 0.8311201930046082, + "learning_rate": 1.4769230769230772e-05, + "loss": 0.209, + "step": 5600 + }, + { + "epoch": 6.1949339207048455, + "grad_norm": 0.5197294354438782, + "learning_rate": 1.4743589743589745e-05, + "loss": 0.1704, + "step": 5625 + }, + { + "epoch": 6.222466960352423, + "grad_norm": 0.8122308850288391, + "learning_rate": 1.471794871794872e-05, + "loss": 0.2048, + "step": 5650 + }, + { + "epoch": 6.25, + "grad_norm": 0.6813929677009583, + "learning_rate": 1.4692307692307694e-05, + "loss": 0.1971, + "step": 5675 + }, + { + "epoch": 6.277533039647577, + "grad_norm": 0.5382255911827087, + "learning_rate": 1.4666666666666666e-05, + "loss": 0.1629, + "step": 5700 + }, + { + "epoch": 6.3050660792951545, + "grad_norm": 0.5841939449310303, + "learning_rate": 1.4641025641025642e-05, + "loss": 0.1939, + "step": 5725 + }, + { + "epoch": 6.332599118942731, + "grad_norm": 0.6143937706947327, + "learning_rate": 1.4615384615384615e-05, + "loss": 0.1787, + "step": 5750 + }, + { + "epoch": 6.360132158590308, + "grad_norm": 0.6379638314247131, + "learning_rate": 1.458974358974359e-05, + "loss": 0.1867, + "step": 5775 + }, + { + "epoch": 6.387665198237886, + "grad_norm": 0.6888879537582397, + "learning_rate": 1.4564102564102564e-05, + "loss": 0.1941, + "step": 5800 + }, + { + "epoch": 6.415198237885463, + "grad_norm": 0.645486056804657, + "learning_rate": 1.453846153846154e-05, + "loss": 0.1823, + "step": 5825 + }, + { + "epoch": 6.442731277533039, + "grad_norm": 0.5782633423805237, + "learning_rate": 1.4512820512820513e-05, + "loss": 0.2033, + "step": 5850 + }, + { + "epoch": 6.470264317180617, + "grad_norm": 0.81694495677948, + "learning_rate": 1.4487179487179489e-05, + "loss": 0.2154, + "step": 5875 + }, + { + "epoch": 6.497797356828194, + "grad_norm": 0.7232884168624878, + "learning_rate": 1.4461538461538462e-05, + "loss": 0.1968, + "step": 5900 + }, + { + "epoch": 6.525330396475771, + "grad_norm": 0.7122112512588501, + "learning_rate": 1.4435897435897438e-05, + "loss": 0.217, + "step": 5925 + }, + { + "epoch": 6.5528634361233475, + "grad_norm": 0.5657493472099304, + "learning_rate": 1.4410256410256411e-05, + "loss": 0.1989, + "step": 5950 + }, + { + "epoch": 6.580396475770925, + "grad_norm": 0.7421569228172302, + "learning_rate": 1.4384615384615387e-05, + "loss": 0.1993, + "step": 5975 + }, + { + "epoch": 6.607929515418502, + "grad_norm": 0.7630535364151001, + "learning_rate": 1.435897435897436e-05, + "loss": 0.194, + "step": 6000 + }, + { + "epoch": 6.607929515418502, + "eval_cer": 75.63250146160283, + "eval_loss": 0.7101058959960938, + "eval_runtime": 501.2734, + "eval_samples_per_second": 21.108, + "eval_steps_per_second": 5.279, + "eval_wer": 197.0768505421971, + "step": 6000 + }, + { + "epoch": 6.635462555066079, + "grad_norm": 0.5665035843849182, + "learning_rate": 1.4333333333333334e-05, + "loss": 0.1686, + "step": 6025 + }, + { + "epoch": 6.6629955947136565, + "grad_norm": 0.5344381928443909, + "learning_rate": 1.430769230769231e-05, + "loss": 0.2208, + "step": 6050 + }, + { + "epoch": 6.690528634361233, + "grad_norm": 0.8236050605773926, + "learning_rate": 1.4282051282051283e-05, + "loss": 0.228, + "step": 6075 + }, + { + "epoch": 6.71806167400881, + "grad_norm": 0.8301483988761902, + "learning_rate": 1.4256410256410258e-05, + "loss": 0.2233, + "step": 6100 + }, + { + "epoch": 6.745594713656388, + "grad_norm": 0.5735576748847961, + "learning_rate": 1.4230769230769232e-05, + "loss": 0.2112, + "step": 6125 + }, + { + "epoch": 6.773127753303965, + "grad_norm": 0.7066081762313843, + "learning_rate": 1.4205128205128207e-05, + "loss": 0.1978, + "step": 6150 + }, + { + "epoch": 6.8006607929515415, + "grad_norm": 0.6695354580879211, + "learning_rate": 1.4179487179487181e-05, + "loss": 0.2082, + "step": 6175 + }, + { + "epoch": 6.828193832599119, + "grad_norm": 0.6278955340385437, + "learning_rate": 1.4153846153846156e-05, + "loss": 0.1864, + "step": 6200 + }, + { + "epoch": 6.855726872246696, + "grad_norm": 0.7320701479911804, + "learning_rate": 1.412820512820513e-05, + "loss": 0.2168, + "step": 6225 + }, + { + "epoch": 6.883259911894273, + "grad_norm": 0.612777829170227, + "learning_rate": 1.4102564102564105e-05, + "loss": 0.1938, + "step": 6250 + }, + { + "epoch": 6.9107929515418505, + "grad_norm": 0.7008684873580933, + "learning_rate": 1.4076923076923079e-05, + "loss": 0.1862, + "step": 6275 + }, + { + "epoch": 6.938325991189427, + "grad_norm": 0.5621137619018555, + "learning_rate": 1.405128205128205e-05, + "loss": 0.1923, + "step": 6300 + }, + { + "epoch": 6.965859030837004, + "grad_norm": 0.8963515758514404, + "learning_rate": 1.4025641025641026e-05, + "loss": 0.1986, + "step": 6325 + }, + { + "epoch": 6.993392070484582, + "grad_norm": 0.6735339760780334, + "learning_rate": 1.4e-05, + "loss": 0.1991, + "step": 6350 + }, + { + "epoch": 7.020925110132159, + "grad_norm": 0.4790953993797302, + "learning_rate": 1.3974358974358975e-05, + "loss": 0.1505, + "step": 6375 + }, + { + "epoch": 7.048458149779735, + "grad_norm": 0.4763677716255188, + "learning_rate": 1.3948717948717949e-05, + "loss": 0.1424, + "step": 6400 + }, + { + "epoch": 7.075991189427313, + "grad_norm": 0.41467663645744324, + "learning_rate": 1.3923076923076924e-05, + "loss": 0.1336, + "step": 6425 + }, + { + "epoch": 7.10352422907489, + "grad_norm": 0.5693683624267578, + "learning_rate": 1.3897435897435898e-05, + "loss": 0.1341, + "step": 6450 + }, + { + "epoch": 7.131057268722467, + "grad_norm": 0.6672292947769165, + "learning_rate": 1.3871794871794873e-05, + "loss": 0.1446, + "step": 6475 + }, + { + "epoch": 7.158590308370044, + "grad_norm": 0.6646426320075989, + "learning_rate": 1.3846153846153847e-05, + "loss": 0.1573, + "step": 6500 + }, + { + "epoch": 7.186123348017621, + "grad_norm": 1.0004900693893433, + "learning_rate": 1.3820512820512822e-05, + "loss": 0.1445, + "step": 6525 + }, + { + "epoch": 7.213656387665198, + "grad_norm": 0.8498390913009644, + "learning_rate": 1.3794871794871796e-05, + "loss": 0.1453, + "step": 6550 + }, + { + "epoch": 7.241189427312776, + "grad_norm": 0.43505725264549255, + "learning_rate": 1.3769230769230771e-05, + "loss": 0.1508, + "step": 6575 + }, + { + "epoch": 7.2687224669603525, + "grad_norm": 0.717443585395813, + "learning_rate": 1.3743589743589745e-05, + "loss": 0.1517, + "step": 6600 + }, + { + "epoch": 7.296255506607929, + "grad_norm": 0.6070489287376404, + "learning_rate": 1.3717948717948718e-05, + "loss": 0.1646, + "step": 6625 + }, + { + "epoch": 7.323788546255507, + "grad_norm": 1.4384660720825195, + "learning_rate": 1.3692307692307694e-05, + "loss": 0.1566, + "step": 6650 + }, + { + "epoch": 7.351321585903084, + "grad_norm": 0.6029159426689148, + "learning_rate": 1.3666666666666667e-05, + "loss": 0.1683, + "step": 6675 + }, + { + "epoch": 7.378854625550661, + "grad_norm": 0.5382408499717712, + "learning_rate": 1.3641025641025643e-05, + "loss": 0.1366, + "step": 6700 + }, + { + "epoch": 7.406387665198238, + "grad_norm": 0.4797053039073944, + "learning_rate": 1.3615384615384616e-05, + "loss": 0.1342, + "step": 6725 + }, + { + "epoch": 7.433920704845815, + "grad_norm": 0.6972818970680237, + "learning_rate": 1.3589743589743592e-05, + "loss": 0.155, + "step": 6750 + }, + { + "epoch": 7.461453744493392, + "grad_norm": 0.5016514658927917, + "learning_rate": 1.3564102564102565e-05, + "loss": 0.1663, + "step": 6775 + }, + { + "epoch": 7.48898678414097, + "grad_norm": 0.618401050567627, + "learning_rate": 1.353846153846154e-05, + "loss": 0.1597, + "step": 6800 + }, + { + "epoch": 7.516519823788546, + "grad_norm": 0.675841748714447, + "learning_rate": 1.3512820512820514e-05, + "loss": 0.1482, + "step": 6825 + }, + { + "epoch": 7.544052863436123, + "grad_norm": 0.4411374628543854, + "learning_rate": 1.348717948717949e-05, + "loss": 0.1497, + "step": 6850 + }, + { + "epoch": 7.5715859030837, + "grad_norm": 0.7848430275917053, + "learning_rate": 1.3461538461538463e-05, + "loss": 0.1439, + "step": 6875 + }, + { + "epoch": 7.599118942731278, + "grad_norm": 0.6942805051803589, + "learning_rate": 1.3435897435897435e-05, + "loss": 0.178, + "step": 6900 + }, + { + "epoch": 7.6266519823788546, + "grad_norm": 0.48058149218559265, + "learning_rate": 1.341025641025641e-05, + "loss": 0.1429, + "step": 6925 + }, + { + "epoch": 7.654185022026431, + "grad_norm": 0.7218976020812988, + "learning_rate": 1.3384615384615384e-05, + "loss": 0.1804, + "step": 6950 + }, + { + "epoch": 7.681718061674009, + "grad_norm": 0.6133447885513306, + "learning_rate": 1.335897435897436e-05, + "loss": 0.1686, + "step": 6975 + }, + { + "epoch": 7.709251101321586, + "grad_norm": 0.8538093566894531, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.1436, + "step": 7000 + }, + { + "epoch": 7.709251101321586, + "eval_cer": 103.36913782628135, + "eval_loss": 0.7327257394790649, + "eval_runtime": 545.0853, + "eval_samples_per_second": 19.412, + "eval_steps_per_second": 4.854, + "eval_wer": 235.48326261197548, + "step": 7000 + }, + { + "epoch": 7.736784140969163, + "grad_norm": 0.8791314363479614, + "learning_rate": 1.3307692307692309e-05, + "loss": 0.1747, + "step": 7025 + }, + { + "epoch": 7.76431718061674, + "grad_norm": 0.7194722294807434, + "learning_rate": 1.3282051282051282e-05, + "loss": 0.152, + "step": 7050 + }, + { + "epoch": 7.791850220264317, + "grad_norm": 0.6934202313423157, + "learning_rate": 1.3256410256410258e-05, + "loss": 0.1532, + "step": 7075 + }, + { + "epoch": 7.819383259911894, + "grad_norm": 0.8959735631942749, + "learning_rate": 1.3230769230769231e-05, + "loss": 0.1377, + "step": 7100 + }, + { + "epoch": 7.846916299559472, + "grad_norm": 0.6112185716629028, + "learning_rate": 1.3205128205128207e-05, + "loss": 0.1407, + "step": 7125 + }, + { + "epoch": 7.8744493392070485, + "grad_norm": 0.6644231081008911, + "learning_rate": 1.317948717948718e-05, + "loss": 0.1777, + "step": 7150 + }, + { + "epoch": 7.901982378854625, + "grad_norm": 0.4906589388847351, + "learning_rate": 1.3153846153846156e-05, + "loss": 0.1619, + "step": 7175 + }, + { + "epoch": 7.929515418502203, + "grad_norm": 0.6490347981452942, + "learning_rate": 1.312820512820513e-05, + "loss": 0.1727, + "step": 7200 + }, + { + "epoch": 7.95704845814978, + "grad_norm": 0.709720253944397, + "learning_rate": 1.3102564102564103e-05, + "loss": 0.1646, + "step": 7225 + }, + { + "epoch": 7.984581497797357, + "grad_norm": 0.694337785243988, + "learning_rate": 1.3076923076923078e-05, + "loss": 0.1604, + "step": 7250 + }, + { + "epoch": 8.012114537444933, + "grad_norm": 0.41181373596191406, + "learning_rate": 1.3051282051282052e-05, + "loss": 0.1716, + "step": 7275 + }, + { + "epoch": 8.039647577092511, + "grad_norm": 0.49692800641059875, + "learning_rate": 1.3025641025641027e-05, + "loss": 0.1231, + "step": 7300 + }, + { + "epoch": 8.067180616740089, + "grad_norm": 0.4960375130176544, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.1202, + "step": 7325 + }, + { + "epoch": 8.094713656387665, + "grad_norm": 0.5013170838356018, + "learning_rate": 1.2974358974358976e-05, + "loss": 0.1137, + "step": 7350 + }, + { + "epoch": 8.122246696035242, + "grad_norm": 0.579422116279602, + "learning_rate": 1.294871794871795e-05, + "loss": 0.1129, + "step": 7375 + }, + { + "epoch": 8.14977973568282, + "grad_norm": 0.542116641998291, + "learning_rate": 1.2923076923076925e-05, + "loss": 0.1098, + "step": 7400 + }, + { + "epoch": 8.177312775330396, + "grad_norm": 0.2935909926891327, + "learning_rate": 1.2897435897435899e-05, + "loss": 0.1197, + "step": 7425 + }, + { + "epoch": 8.204845814977974, + "grad_norm": 0.4438793957233429, + "learning_rate": 1.2871794871794874e-05, + "loss": 0.1086, + "step": 7450 + }, + { + "epoch": 8.232378854625551, + "grad_norm": 0.6815393567085266, + "learning_rate": 1.2846153846153848e-05, + "loss": 0.1396, + "step": 7475 + }, + { + "epoch": 8.259911894273127, + "grad_norm": 0.4968509376049042, + "learning_rate": 1.2820512820512823e-05, + "loss": 0.1129, + "step": 7500 + }, + { + "epoch": 8.287444933920705, + "grad_norm": 0.9515237212181091, + "learning_rate": 1.2794871794871795e-05, + "loss": 0.1132, + "step": 7525 + }, + { + "epoch": 8.314977973568283, + "grad_norm": 0.4827808141708374, + "learning_rate": 1.2769230769230769e-05, + "loss": 0.1072, + "step": 7550 + }, + { + "epoch": 8.342511013215859, + "grad_norm": 0.7047274112701416, + "learning_rate": 1.2743589743589744e-05, + "loss": 0.1177, + "step": 7575 + }, + { + "epoch": 8.370044052863436, + "grad_norm": 0.4887017011642456, + "learning_rate": 1.2717948717948718e-05, + "loss": 0.1051, + "step": 7600 + }, + { + "epoch": 8.397577092511014, + "grad_norm": 0.6869075894355774, + "learning_rate": 1.2692307692307693e-05, + "loss": 0.1272, + "step": 7625 + }, + { + "epoch": 8.42511013215859, + "grad_norm": 0.4369036853313446, + "learning_rate": 1.2666666666666667e-05, + "loss": 0.106, + "step": 7650 + }, + { + "epoch": 8.452643171806168, + "grad_norm": 0.6985669732093811, + "learning_rate": 1.2641025641025642e-05, + "loss": 0.1185, + "step": 7675 + }, + { + "epoch": 8.480176211453745, + "grad_norm": 0.5139929056167603, + "learning_rate": 1.2615384615384616e-05, + "loss": 0.1146, + "step": 7700 + }, + { + "epoch": 8.507709251101321, + "grad_norm": 0.5673744082450867, + "learning_rate": 1.2589743589743591e-05, + "loss": 0.1166, + "step": 7725 + }, + { + "epoch": 8.535242290748899, + "grad_norm": 0.7198874354362488, + "learning_rate": 1.2564102564102565e-05, + "loss": 0.1278, + "step": 7750 + }, + { + "epoch": 8.562775330396477, + "grad_norm": 0.6545113325119019, + "learning_rate": 1.253846153846154e-05, + "loss": 0.1245, + "step": 7775 + }, + { + "epoch": 8.590308370044053, + "grad_norm": 0.667356014251709, + "learning_rate": 1.2512820512820514e-05, + "loss": 0.1314, + "step": 7800 + }, + { + "epoch": 8.61784140969163, + "grad_norm": 0.32821300625801086, + "learning_rate": 1.2487179487179487e-05, + "loss": 0.1325, + "step": 7825 + }, + { + "epoch": 8.645374449339208, + "grad_norm": 0.7761093378067017, + "learning_rate": 1.2461538461538463e-05, + "loss": 0.1069, + "step": 7850 + }, + { + "epoch": 8.672907488986784, + "grad_norm": 0.5627309679985046, + "learning_rate": 1.2435897435897436e-05, + "loss": 0.1046, + "step": 7875 + }, + { + "epoch": 8.700440528634362, + "grad_norm": 0.8482288122177124, + "learning_rate": 1.2410256410256412e-05, + "loss": 0.1305, + "step": 7900 + }, + { + "epoch": 8.72797356828194, + "grad_norm": 0.4097133278846741, + "learning_rate": 1.2384615384615385e-05, + "loss": 0.1262, + "step": 7925 + }, + { + "epoch": 8.755506607929515, + "grad_norm": 0.5583866834640503, + "learning_rate": 1.235897435897436e-05, + "loss": 0.1227, + "step": 7950 + }, + { + "epoch": 8.783039647577093, + "grad_norm": 0.5424619317054749, + "learning_rate": 1.2333333333333334e-05, + "loss": 0.1258, + "step": 7975 + }, + { + "epoch": 8.810572687224669, + "grad_norm": 0.7977835536003113, + "learning_rate": 1.230769230769231e-05, + "loss": 0.135, + "step": 8000 + }, + { + "epoch": 8.810572687224669, + "eval_cer": 96.63028899613678, + "eval_loss": 0.7634754180908203, + "eval_runtime": 560.1673, + "eval_samples_per_second": 18.889, + "eval_steps_per_second": 4.724, + "eval_wer": 223.13059877416316, + "step": 8000 + }, + { + "epoch": 8.838105726872246, + "grad_norm": 0.5561416745185852, + "learning_rate": 1.2282051282051283e-05, + "loss": 0.1158, + "step": 8025 + }, + { + "epoch": 8.865638766519824, + "grad_norm": 0.5646144151687622, + "learning_rate": 1.2256410256410259e-05, + "loss": 0.1298, + "step": 8050 + }, + { + "epoch": 8.8931718061674, + "grad_norm": 0.547580897808075, + "learning_rate": 1.2230769230769232e-05, + "loss": 0.1194, + "step": 8075 + }, + { + "epoch": 8.920704845814978, + "grad_norm": 0.3888353407382965, + "learning_rate": 1.2205128205128208e-05, + "loss": 0.1367, + "step": 8100 + }, + { + "epoch": 8.948237885462555, + "grad_norm": 0.734626054763794, + "learning_rate": 1.217948717948718e-05, + "loss": 0.1156, + "step": 8125 + }, + { + "epoch": 8.975770925110131, + "grad_norm": 0.4471361041069031, + "learning_rate": 1.2153846153846153e-05, + "loss": 0.1089, + "step": 8150 + }, + { + "epoch": 9.003303964757709, + "grad_norm": 0.4151979982852936, + "learning_rate": 1.2128205128205129e-05, + "loss": 0.1235, + "step": 8175 + }, + { + "epoch": 9.030837004405287, + "grad_norm": 0.6531796455383301, + "learning_rate": 1.2102564102564102e-05, + "loss": 0.0797, + "step": 8200 + }, + { + "epoch": 9.058370044052863, + "grad_norm": 0.31330060958862305, + "learning_rate": 1.2076923076923078e-05, + "loss": 0.0821, + "step": 8225 + }, + { + "epoch": 9.08590308370044, + "grad_norm": 0.6132084131240845, + "learning_rate": 1.2051282051282051e-05, + "loss": 0.0751, + "step": 8250 + }, + { + "epoch": 9.113436123348018, + "grad_norm": 0.6006444692611694, + "learning_rate": 1.2025641025641027e-05, + "loss": 0.0848, + "step": 8275 + }, + { + "epoch": 9.140969162995594, + "grad_norm": 0.4292917549610138, + "learning_rate": 1.2e-05, + "loss": 0.0933, + "step": 8300 + }, + { + "epoch": 9.168502202643172, + "grad_norm": 0.4369455873966217, + "learning_rate": 1.1974358974358976e-05, + "loss": 0.0744, + "step": 8325 + }, + { + "epoch": 9.19603524229075, + "grad_norm": 0.5301995277404785, + "learning_rate": 1.194871794871795e-05, + "loss": 0.0932, + "step": 8350 + }, + { + "epoch": 9.223568281938325, + "grad_norm": 0.42414310574531555, + "learning_rate": 1.1923076923076925e-05, + "loss": 0.0841, + "step": 8375 + }, + { + "epoch": 9.251101321585903, + "grad_norm": 0.45556119084358215, + "learning_rate": 1.1897435897435898e-05, + "loss": 0.0835, + "step": 8400 + }, + { + "epoch": 9.27863436123348, + "grad_norm": 0.4080977141857147, + "learning_rate": 1.1871794871794872e-05, + "loss": 0.0842, + "step": 8425 + }, + { + "epoch": 9.306167400881057, + "grad_norm": 0.4317820966243744, + "learning_rate": 1.1846153846153847e-05, + "loss": 0.097, + "step": 8450 + }, + { + "epoch": 9.333700440528634, + "grad_norm": 0.5324757695198059, + "learning_rate": 1.1820512820512821e-05, + "loss": 0.0853, + "step": 8475 + }, + { + "epoch": 9.361233480176212, + "grad_norm": 0.336733877658844, + "learning_rate": 1.1794871794871796e-05, + "loss": 0.0933, + "step": 8500 + }, + { + "epoch": 9.388766519823788, + "grad_norm": 0.28637856245040894, + "learning_rate": 1.176923076923077e-05, + "loss": 0.0891, + "step": 8525 + }, + { + "epoch": 9.416299559471366, + "grad_norm": 0.480830579996109, + "learning_rate": 1.1743589743589745e-05, + "loss": 0.0945, + "step": 8550 + }, + { + "epoch": 9.443832599118943, + "grad_norm": 0.44252699613571167, + "learning_rate": 1.1717948717948719e-05, + "loss": 0.0886, + "step": 8575 + }, + { + "epoch": 9.47136563876652, + "grad_norm": 0.5835415720939636, + "learning_rate": 1.1692307692307694e-05, + "loss": 0.0904, + "step": 8600 + }, + { + "epoch": 9.498898678414097, + "grad_norm": 0.6899629235267639, + "learning_rate": 1.1666666666666668e-05, + "loss": 0.1032, + "step": 8625 + }, + { + "epoch": 9.526431718061675, + "grad_norm": 0.3765935003757477, + "learning_rate": 1.1641025641025643e-05, + "loss": 0.105, + "step": 8650 + }, + { + "epoch": 9.55396475770925, + "grad_norm": 0.5741687417030334, + "learning_rate": 1.1615384615384617e-05, + "loss": 0.0963, + "step": 8675 + }, + { + "epoch": 9.581497797356828, + "grad_norm": 0.6885107159614563, + "learning_rate": 1.1589743589743592e-05, + "loss": 0.079, + "step": 8700 + }, + { + "epoch": 9.609030837004406, + "grad_norm": 0.3543364405632019, + "learning_rate": 1.1564102564102566e-05, + "loss": 0.1011, + "step": 8725 + }, + { + "epoch": 9.636563876651982, + "grad_norm": 0.5241756439208984, + "learning_rate": 1.1538461538461538e-05, + "loss": 0.0965, + "step": 8750 + }, + { + "epoch": 9.66409691629956, + "grad_norm": 0.5131816864013672, + "learning_rate": 1.1512820512820513e-05, + "loss": 0.089, + "step": 8775 + }, + { + "epoch": 9.691629955947137, + "grad_norm": 0.6563847064971924, + "learning_rate": 1.1487179487179487e-05, + "loss": 0.0776, + "step": 8800 + }, + { + "epoch": 9.719162995594713, + "grad_norm": 0.6348633766174316, + "learning_rate": 1.1461538461538462e-05, + "loss": 0.1008, + "step": 8825 + }, + { + "epoch": 9.746696035242291, + "grad_norm": 0.5632328987121582, + "learning_rate": 1.1435897435897436e-05, + "loss": 0.1074, + "step": 8850 + }, + { + "epoch": 9.774229074889869, + "grad_norm": 0.5217724442481995, + "learning_rate": 1.1410256410256411e-05, + "loss": 0.1016, + "step": 8875 + }, + { + "epoch": 9.801762114537445, + "grad_norm": 0.8553673624992371, + "learning_rate": 1.1384615384615385e-05, + "loss": 0.0864, + "step": 8900 + }, + { + "epoch": 9.829295154185022, + "grad_norm": 0.548801839351654, + "learning_rate": 1.135897435897436e-05, + "loss": 0.1079, + "step": 8925 + }, + { + "epoch": 9.8568281938326, + "grad_norm": 0.35070112347602844, + "learning_rate": 1.1333333333333334e-05, + "loss": 0.0976, + "step": 8950 + }, + { + "epoch": 9.884361233480176, + "grad_norm": 0.5965908765792847, + "learning_rate": 1.1307692307692309e-05, + "loss": 0.0864, + "step": 8975 + }, + { + "epoch": 9.911894273127754, + "grad_norm": 0.6857784390449524, + "learning_rate": 1.1282051282051283e-05, + "loss": 0.0854, + "step": 9000 + }, + { + "epoch": 9.911894273127754, + "eval_cer": 96.66926507170452, + "eval_loss": 0.784762442111969, + "eval_runtime": 542.6886, + "eval_samples_per_second": 19.497, + "eval_steps_per_second": 4.876, + "eval_wer": 235.66242338519567, + "step": 9000 + }, + { + "epoch": 9.939427312775331, + "grad_norm": 0.3563746213912964, + "learning_rate": 1.1256410256410258e-05, + "loss": 0.09, + "step": 9025 + }, + { + "epoch": 9.966960352422907, + "grad_norm": 0.8333278298377991, + "learning_rate": 1.1230769230769232e-05, + "loss": 0.1009, + "step": 9050 + }, + { + "epoch": 9.994493392070485, + "grad_norm": 0.5625563263893127, + "learning_rate": 1.1205128205128205e-05, + "loss": 0.0924, + "step": 9075 + }, + { + "epoch": 10.022026431718063, + "grad_norm": 0.27612215280532837, + "learning_rate": 1.117948717948718e-05, + "loss": 0.0642, + "step": 9100 + }, + { + "epoch": 10.049559471365638, + "grad_norm": 0.44427192211151123, + "learning_rate": 1.1153846153846154e-05, + "loss": 0.0609, + "step": 9125 + }, + { + "epoch": 10.077092511013216, + "grad_norm": 0.8023830652236938, + "learning_rate": 1.112820512820513e-05, + "loss": 0.078, + "step": 9150 + }, + { + "epoch": 10.104625550660794, + "grad_norm": 0.6242303252220154, + "learning_rate": 1.1102564102564103e-05, + "loss": 0.0553, + "step": 9175 + }, + { + "epoch": 10.13215859030837, + "grad_norm": 0.5606359243392944, + "learning_rate": 1.1076923076923079e-05, + "loss": 0.0673, + "step": 9200 + }, + { + "epoch": 10.159691629955947, + "grad_norm": 0.25088736414909363, + "learning_rate": 1.1051282051282052e-05, + "loss": 0.062, + "step": 9225 + }, + { + "epoch": 10.187224669603523, + "grad_norm": 0.4773547947406769, + "learning_rate": 1.1025641025641028e-05, + "loss": 0.0745, + "step": 9250 + }, + { + "epoch": 10.214757709251101, + "grad_norm": 0.19335705041885376, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.0553, + "step": 9275 + }, + { + "epoch": 10.242290748898679, + "grad_norm": 0.32268404960632324, + "learning_rate": 1.0974358974358977e-05, + "loss": 0.0573, + "step": 9300 + }, + { + "epoch": 10.269823788546255, + "grad_norm": 0.6676753759384155, + "learning_rate": 1.094871794871795e-05, + "loss": 0.0686, + "step": 9325 + }, + { + "epoch": 10.297356828193832, + "grad_norm": 0.45940276980400085, + "learning_rate": 1.0923076923076922e-05, + "loss": 0.0699, + "step": 9350 + }, + { + "epoch": 10.32488986784141, + "grad_norm": 0.3542877435684204, + "learning_rate": 1.0897435897435898e-05, + "loss": 0.0839, + "step": 9375 + }, + { + "epoch": 10.352422907488986, + "grad_norm": 0.5135036110877991, + "learning_rate": 1.0871794871794871e-05, + "loss": 0.0592, + "step": 9400 + }, + { + "epoch": 10.379955947136564, + "grad_norm": 0.3040463328361511, + "learning_rate": 1.0846153846153847e-05, + "loss": 0.0794, + "step": 9425 + }, + { + "epoch": 10.407488986784141, + "grad_norm": 0.3832766115665436, + "learning_rate": 1.082051282051282e-05, + "loss": 0.0684, + "step": 9450 + }, + { + "epoch": 10.435022026431717, + "grad_norm": 0.3733726739883423, + "learning_rate": 1.0794871794871796e-05, + "loss": 0.0594, + "step": 9475 + }, + { + "epoch": 10.462555066079295, + "grad_norm": 0.41105160117149353, + "learning_rate": 1.076923076923077e-05, + "loss": 0.0608, + "step": 9500 + }, + { + "epoch": 10.490088105726873, + "grad_norm": 0.5909921526908875, + "learning_rate": 1.0743589743589745e-05, + "loss": 0.0815, + "step": 9525 + }, + { + "epoch": 10.517621145374449, + "grad_norm": 0.3582472801208496, + "learning_rate": 1.0717948717948718e-05, + "loss": 0.0612, + "step": 9550 + }, + { + "epoch": 10.545154185022026, + "grad_norm": 0.30810996890068054, + "learning_rate": 1.0692307692307694e-05, + "loss": 0.0669, + "step": 9575 + }, + { + "epoch": 10.572687224669604, + "grad_norm": 0.44238853454589844, + "learning_rate": 1.0666666666666667e-05, + "loss": 0.0648, + "step": 9600 + }, + { + "epoch": 10.60022026431718, + "grad_norm": 0.6048989295959473, + "learning_rate": 1.0641025641025643e-05, + "loss": 0.071, + "step": 9625 + }, + { + "epoch": 10.627753303964758, + "grad_norm": 0.3572470545768738, + "learning_rate": 1.0615384615384616e-05, + "loss": 0.0725, + "step": 9650 + }, + { + "epoch": 10.655286343612335, + "grad_norm": 0.4658237099647522, + "learning_rate": 1.058974358974359e-05, + "loss": 0.0815, + "step": 9675 + }, + { + "epoch": 10.682819383259911, + "grad_norm": 1.4714276790618896, + "learning_rate": 1.0564102564102565e-05, + "loss": 0.0769, + "step": 9700 + }, + { + "epoch": 10.710352422907489, + "grad_norm": 0.3880234360694885, + "learning_rate": 1.0538461538461539e-05, + "loss": 0.0779, + "step": 9725 + }, + { + "epoch": 10.737885462555067, + "grad_norm": 0.28868013620376587, + "learning_rate": 1.0512820512820514e-05, + "loss": 0.0737, + "step": 9750 + }, + { + "epoch": 10.765418502202643, + "grad_norm": 0.2880098521709442, + "learning_rate": 1.0487179487179488e-05, + "loss": 0.0634, + "step": 9775 + }, + { + "epoch": 10.79295154185022, + "grad_norm": 0.5987436175346375, + "learning_rate": 1.0461538461538463e-05, + "loss": 0.0744, + "step": 9800 + }, + { + "epoch": 10.820484581497798, + "grad_norm": 0.58491051197052, + "learning_rate": 1.0435897435897437e-05, + "loss": 0.0688, + "step": 9825 + }, + { + "epoch": 10.848017621145374, + "grad_norm": 0.3418164551258087, + "learning_rate": 1.0410256410256412e-05, + "loss": 0.0643, + "step": 9850 + }, + { + "epoch": 10.875550660792952, + "grad_norm": 0.475784569978714, + "learning_rate": 1.0384615384615386e-05, + "loss": 0.0729, + "step": 9875 + }, + { + "epoch": 10.90308370044053, + "grad_norm": 0.713233232498169, + "learning_rate": 1.0358974358974361e-05, + "loss": 0.0531, + "step": 9900 + }, + { + "epoch": 10.930616740088105, + "grad_norm": 0.5004772543907166, + "learning_rate": 1.0333333333333335e-05, + "loss": 0.0573, + "step": 9925 + }, + { + "epoch": 10.958149779735683, + "grad_norm": 0.3708213269710541, + "learning_rate": 1.0307692307692307e-05, + "loss": 0.0716, + "step": 9950 + }, + { + "epoch": 10.98568281938326, + "grad_norm": 0.41214606165885925, + "learning_rate": 1.0282051282051282e-05, + "loss": 0.0793, + "step": 9975 + }, + { + "epoch": 11.013215859030836, + "grad_norm": 0.15892280638217926, + "learning_rate": 1.0256410256410256e-05, + "loss": 0.062, + "step": 10000 + }, + { + "epoch": 11.013215859030836, + "eval_cer": 83.99286967088143, + "eval_loss": 0.8101971745491028, + "eval_runtime": 534.2549, + "eval_samples_per_second": 19.805, + "eval_steps_per_second": 4.953, + "eval_wer": 199.8114097123998, + "step": 10000 + }, + { + "epoch": 11.040748898678414, + "grad_norm": 0.3245684504508972, + "learning_rate": 1.0230769230769231e-05, + "loss": 0.0495, + "step": 10025 + }, + { + "epoch": 11.068281938325992, + "grad_norm": 0.3588270843029022, + "learning_rate": 1.0205128205128205e-05, + "loss": 0.0501, + "step": 10050 + }, + { + "epoch": 11.095814977973568, + "grad_norm": 0.2821900248527527, + "learning_rate": 1.017948717948718e-05, + "loss": 0.0522, + "step": 10075 + }, + { + "epoch": 11.123348017621145, + "grad_norm": 0.22381822764873505, + "learning_rate": 1.0153846153846154e-05, + "loss": 0.0511, + "step": 10100 + }, + { + "epoch": 11.150881057268723, + "grad_norm": 0.5374503135681152, + "learning_rate": 1.012820512820513e-05, + "loss": 0.0423, + "step": 10125 + }, + { + "epoch": 11.178414096916299, + "grad_norm": 0.6062394380569458, + "learning_rate": 1.0102564102564103e-05, + "loss": 0.0473, + "step": 10150 + }, + { + "epoch": 11.205947136563877, + "grad_norm": 0.47851529717445374, + "learning_rate": 1.0076923076923078e-05, + "loss": 0.0472, + "step": 10175 + }, + { + "epoch": 11.233480176211454, + "grad_norm": 0.3368567228317261, + "learning_rate": 1.0051282051282052e-05, + "loss": 0.048, + "step": 10200 + }, + { + "epoch": 11.26101321585903, + "grad_norm": 0.37278836965560913, + "learning_rate": 1.0025641025641027e-05, + "loss": 0.0475, + "step": 10225 + }, + { + "epoch": 11.288546255506608, + "grad_norm": 0.5743754506111145, + "learning_rate": 1e-05, + "loss": 0.0416, + "step": 10250 + }, + { + "epoch": 11.316079295154186, + "grad_norm": 0.3918323814868927, + "learning_rate": 9.974358974358974e-06, + "loss": 0.0511, + "step": 10275 + }, + { + "epoch": 11.343612334801762, + "grad_norm": 0.36391782760620117, + "learning_rate": 9.94871794871795e-06, + "loss": 0.0506, + "step": 10300 + }, + { + "epoch": 11.37114537444934, + "grad_norm": 0.24007482826709747, + "learning_rate": 9.923076923076923e-06, + "loss": 0.0451, + "step": 10325 + }, + { + "epoch": 11.398678414096917, + "grad_norm": 0.2574377954006195, + "learning_rate": 9.897435897435899e-06, + "loss": 0.0516, + "step": 10350 + }, + { + "epoch": 11.426211453744493, + "grad_norm": 0.212848961353302, + "learning_rate": 9.871794871794872e-06, + "loss": 0.0435, + "step": 10375 + }, + { + "epoch": 11.45374449339207, + "grad_norm": 0.2388388067483902, + "learning_rate": 9.846153846153848e-06, + "loss": 0.0571, + "step": 10400 + }, + { + "epoch": 11.481277533039648, + "grad_norm": 0.441074013710022, + "learning_rate": 9.820512820512821e-06, + "loss": 0.0475, + "step": 10425 + }, + { + "epoch": 11.508810572687224, + "grad_norm": 0.23038113117218018, + "learning_rate": 9.794871794871795e-06, + "loss": 0.053, + "step": 10450 + }, + { + "epoch": 11.536343612334802, + "grad_norm": 0.42329710721969604, + "learning_rate": 9.76923076923077e-06, + "loss": 0.0411, + "step": 10475 + }, + { + "epoch": 11.56387665198238, + "grad_norm": 0.2476469725370407, + "learning_rate": 9.743589743589744e-06, + "loss": 0.0468, + "step": 10500 + }, + { + "epoch": 11.591409691629956, + "grad_norm": 0.3586748540401459, + "learning_rate": 9.71794871794872e-06, + "loss": 0.0551, + "step": 10525 + }, + { + "epoch": 11.618942731277533, + "grad_norm": 0.465305894613266, + "learning_rate": 9.692307692307693e-06, + "loss": 0.0504, + "step": 10550 + }, + { + "epoch": 11.646475770925111, + "grad_norm": 0.5798656344413757, + "learning_rate": 9.666666666666667e-06, + "loss": 0.0526, + "step": 10575 + }, + { + "epoch": 11.674008810572687, + "grad_norm": 0.21707002818584442, + "learning_rate": 9.641025641025642e-06, + "loss": 0.0441, + "step": 10600 + }, + { + "epoch": 11.701541850220265, + "grad_norm": 0.2580870985984802, + "learning_rate": 9.615384615384616e-06, + "loss": 0.0505, + "step": 10625 + }, + { + "epoch": 11.729074889867842, + "grad_norm": 0.23660093545913696, + "learning_rate": 9.589743589743591e-06, + "loss": 0.0502, + "step": 10650 + }, + { + "epoch": 11.756607929515418, + "grad_norm": 0.40983569622039795, + "learning_rate": 9.564102564102565e-06, + "loss": 0.0506, + "step": 10675 + }, + { + "epoch": 11.784140969162996, + "grad_norm": 0.5480314493179321, + "learning_rate": 9.53846153846154e-06, + "loss": 0.0538, + "step": 10700 + }, + { + "epoch": 11.811674008810574, + "grad_norm": 0.23049433529376984, + "learning_rate": 9.512820512820514e-06, + "loss": 0.0441, + "step": 10725 + }, + { + "epoch": 11.83920704845815, + "grad_norm": 0.2648046314716339, + "learning_rate": 9.487179487179487e-06, + "loss": 0.0573, + "step": 10750 + }, + { + "epoch": 11.866740088105727, + "grad_norm": 0.19929298758506775, + "learning_rate": 9.461538461538463e-06, + "loss": 0.0437, + "step": 10775 + }, + { + "epoch": 11.894273127753303, + "grad_norm": 0.43784064054489136, + "learning_rate": 9.435897435897436e-06, + "loss": 0.0506, + "step": 10800 + }, + { + "epoch": 11.92180616740088, + "grad_norm": 0.33534079790115356, + "learning_rate": 9.410256410256412e-06, + "loss": 0.0447, + "step": 10825 + }, + { + "epoch": 11.949339207048459, + "grad_norm": 0.41424134373664856, + "learning_rate": 9.384615384615385e-06, + "loss": 0.0479, + "step": 10850 + }, + { + "epoch": 11.976872246696034, + "grad_norm": 0.46953722834587097, + "learning_rate": 9.358974358974359e-06, + "loss": 0.0642, + "step": 10875 + }, + { + "epoch": 12.004405286343612, + "grad_norm": 0.2814580798149109, + "learning_rate": 9.333333333333334e-06, + "loss": 0.0539, + "step": 10900 + }, + { + "epoch": 12.03193832599119, + "grad_norm": 0.3269638419151306, + "learning_rate": 9.307692307692308e-06, + "loss": 0.0319, + "step": 10925 + }, + { + "epoch": 12.059471365638766, + "grad_norm": 0.3257371783256531, + "learning_rate": 9.282051282051283e-06, + "loss": 0.0341, + "step": 10950 + }, + { + "epoch": 12.087004405286343, + "grad_norm": 0.3003399074077606, + "learning_rate": 9.256410256410257e-06, + "loss": 0.0334, + "step": 10975 + }, + { + "epoch": 12.114537444933921, + "grad_norm": 0.23329438269138336, + "learning_rate": 9.230769230769232e-06, + "loss": 0.0299, + "step": 11000 + }, + { + "epoch": 12.114537444933921, + "eval_cer": 102.80570426329486, + "eval_loss": 0.8363927602767944, + "eval_runtime": 573.3468, + "eval_samples_per_second": 18.455, + "eval_steps_per_second": 4.615, + "eval_wer": 177.04856199905706, + "step": 11000 + }, + { + "epoch": 12.142070484581497, + "grad_norm": 0.3134128749370575, + "learning_rate": 9.205128205128206e-06, + "loss": 0.0351, + "step": 11025 + }, + { + "epoch": 12.169603524229075, + "grad_norm": 0.19677399098873138, + "learning_rate": 9.17948717948718e-06, + "loss": 0.0308, + "step": 11050 + }, + { + "epoch": 12.197136563876652, + "grad_norm": 0.22807055711746216, + "learning_rate": 9.153846153846155e-06, + "loss": 0.0398, + "step": 11075 + }, + { + "epoch": 12.224669603524228, + "grad_norm": 0.23452354967594147, + "learning_rate": 9.128205128205129e-06, + "loss": 0.0337, + "step": 11100 + }, + { + "epoch": 12.252202643171806, + "grad_norm": 0.21507778763771057, + "learning_rate": 9.102564102564104e-06, + "loss": 0.0343, + "step": 11125 + }, + { + "epoch": 12.279735682819384, + "grad_norm": 0.5566434264183044, + "learning_rate": 9.076923076923078e-06, + "loss": 0.0339, + "step": 11150 + }, + { + "epoch": 12.30726872246696, + "grad_norm": 0.17659461498260498, + "learning_rate": 9.051282051282051e-06, + "loss": 0.0325, + "step": 11175 + }, + { + "epoch": 12.334801762114537, + "grad_norm": 0.2903195023536682, + "learning_rate": 9.025641025641027e-06, + "loss": 0.0332, + "step": 11200 + }, + { + "epoch": 12.362334801762115, + "grad_norm": 0.20203354954719543, + "learning_rate": 9e-06, + "loss": 0.0279, + "step": 11225 + }, + { + "epoch": 12.389867841409691, + "grad_norm": 0.25275635719299316, + "learning_rate": 8.974358974358976e-06, + "loss": 0.0329, + "step": 11250 + }, + { + "epoch": 12.417400881057269, + "grad_norm": 0.16091254353523254, + "learning_rate": 8.94871794871795e-06, + "loss": 0.0322, + "step": 11275 + }, + { + "epoch": 12.444933920704846, + "grad_norm": 0.2583257853984833, + "learning_rate": 8.923076923076925e-06, + "loss": 0.0323, + "step": 11300 + }, + { + "epoch": 12.472466960352422, + "grad_norm": 0.3744211494922638, + "learning_rate": 8.897435897435898e-06, + "loss": 0.035, + "step": 11325 + }, + { + "epoch": 12.5, + "grad_norm": 0.37432727217674255, + "learning_rate": 8.871794871794872e-06, + "loss": 0.0408, + "step": 11350 + }, + { + "epoch": 12.527533039647578, + "grad_norm": 0.22354626655578613, + "learning_rate": 8.846153846153847e-06, + "loss": 0.0394, + "step": 11375 + }, + { + "epoch": 12.555066079295154, + "grad_norm": 0.20891696214675903, + "learning_rate": 8.820512820512821e-06, + "loss": 0.0412, + "step": 11400 + }, + { + "epoch": 12.582599118942731, + "grad_norm": 0.31616201996803284, + "learning_rate": 8.794871794871796e-06, + "loss": 0.0395, + "step": 11425 + }, + { + "epoch": 12.610132158590309, + "grad_norm": 0.2666582465171814, + "learning_rate": 8.76923076923077e-06, + "loss": 0.0345, + "step": 11450 + }, + { + "epoch": 12.637665198237885, + "grad_norm": 0.41860589385032654, + "learning_rate": 8.743589743589743e-06, + "loss": 0.0374, + "step": 11475 + }, + { + "epoch": 12.665198237885463, + "grad_norm": 1.188323974609375, + "learning_rate": 8.717948717948719e-06, + "loss": 0.0382, + "step": 11500 + }, + { + "epoch": 12.69273127753304, + "grad_norm": 0.4103492498397827, + "learning_rate": 8.692307692307692e-06, + "loss": 0.0363, + "step": 11525 + }, + { + "epoch": 12.720264317180616, + "grad_norm": 0.19665437936782837, + "learning_rate": 8.666666666666668e-06, + "loss": 0.038, + "step": 11550 + }, + { + "epoch": 12.747797356828194, + "grad_norm": 0.2887498140335083, + "learning_rate": 8.641025641025641e-06, + "loss": 0.0343, + "step": 11575 + }, + { + "epoch": 12.775330396475772, + "grad_norm": 0.34571361541748047, + "learning_rate": 8.615384615384617e-06, + "loss": 0.0363, + "step": 11600 + }, + { + "epoch": 12.802863436123348, + "grad_norm": 0.4487113058567047, + "learning_rate": 8.58974358974359e-06, + "loss": 0.0374, + "step": 11625 + }, + { + "epoch": 12.830396475770925, + "grad_norm": 0.3439520001411438, + "learning_rate": 8.564102564102564e-06, + "loss": 0.0336, + "step": 11650 + }, + { + "epoch": 12.857929515418503, + "grad_norm": 0.4911608099937439, + "learning_rate": 8.53846153846154e-06, + "loss": 0.0346, + "step": 11675 + }, + { + "epoch": 12.885462555066079, + "grad_norm": 0.15864339470863342, + "learning_rate": 8.512820512820513e-06, + "loss": 0.0377, + "step": 11700 + }, + { + "epoch": 12.912995594713657, + "grad_norm": 0.20187804102897644, + "learning_rate": 8.487179487179488e-06, + "loss": 0.0379, + "step": 11725 + }, + { + "epoch": 12.940528634361234, + "grad_norm": 0.402381956577301, + "learning_rate": 8.461538461538462e-06, + "loss": 0.0432, + "step": 11750 + }, + { + "epoch": 12.96806167400881, + "grad_norm": 0.39829185605049133, + "learning_rate": 8.435897435897436e-06, + "loss": 0.0394, + "step": 11775 + }, + { + "epoch": 12.995594713656388, + "grad_norm": 0.28365182876586914, + "learning_rate": 8.410256410256411e-06, + "loss": 0.0416, + "step": 11800 + }, + { + "epoch": 13.023127753303966, + "grad_norm": 0.20419412851333618, + "learning_rate": 8.384615384615385e-06, + "loss": 0.0281, + "step": 11825 + }, + { + "epoch": 13.050660792951541, + "grad_norm": 0.6308184266090393, + "learning_rate": 8.35897435897436e-06, + "loss": 0.0274, + "step": 11850 + }, + { + "epoch": 13.07819383259912, + "grad_norm": 0.14284294843673706, + "learning_rate": 8.333333333333334e-06, + "loss": 0.0262, + "step": 11875 + }, + { + "epoch": 13.105726872246697, + "grad_norm": 0.25530532002449036, + "learning_rate": 8.307692307692309e-06, + "loss": 0.0243, + "step": 11900 + }, + { + "epoch": 13.133259911894273, + "grad_norm": 0.22208963334560394, + "learning_rate": 8.282051282051283e-06, + "loss": 0.0271, + "step": 11925 + }, + { + "epoch": 13.16079295154185, + "grad_norm": 0.14056318998336792, + "learning_rate": 8.256410256410256e-06, + "loss": 0.0235, + "step": 11950 + }, + { + "epoch": 13.188325991189428, + "grad_norm": 0.5111184120178223, + "learning_rate": 8.230769230769232e-06, + "loss": 0.0255, + "step": 11975 + }, + { + "epoch": 13.215859030837004, + "grad_norm": 0.40052205324172974, + "learning_rate": 8.205128205128205e-06, + "loss": 0.0254, + "step": 12000 + }, + { + "epoch": 13.215859030837004, + "eval_cer": 85.54675409535383, + "eval_loss": 0.8552005290985107, + "eval_runtime": 512.197, + "eval_samples_per_second": 20.658, + "eval_steps_per_second": 5.166, + "eval_wer": 176.0867515322961, + "step": 12000 + }, + { + "epoch": 13.243392070484582, + "grad_norm": 0.37046974897384644, + "learning_rate": 8.17948717948718e-06, + "loss": 0.0281, + "step": 12025 + }, + { + "epoch": 13.270925110132158, + "grad_norm": 0.18737167119979858, + "learning_rate": 8.153846153846154e-06, + "loss": 0.0212, + "step": 12050 + }, + { + "epoch": 13.298458149779735, + "grad_norm": 0.22969557344913483, + "learning_rate": 8.12820512820513e-06, + "loss": 0.0233, + "step": 12075 + }, + { + "epoch": 13.325991189427313, + "grad_norm": 0.24471865594387054, + "learning_rate": 8.102564102564103e-06, + "loss": 0.0273, + "step": 12100 + }, + { + "epoch": 13.353524229074889, + "grad_norm": 0.1323070228099823, + "learning_rate": 8.076923076923077e-06, + "loss": 0.0273, + "step": 12125 + }, + { + "epoch": 13.381057268722467, + "grad_norm": 0.19890151917934418, + "learning_rate": 8.051282051282052e-06, + "loss": 0.0267, + "step": 12150 + }, + { + "epoch": 13.408590308370044, + "grad_norm": 0.18089306354522705, + "learning_rate": 8.025641025641026e-06, + "loss": 0.0256, + "step": 12175 + }, + { + "epoch": 13.43612334801762, + "grad_norm": 0.3312578499317169, + "learning_rate": 8.000000000000001e-06, + "loss": 0.0238, + "step": 12200 + }, + { + "epoch": 13.463656387665198, + "grad_norm": 0.372565358877182, + "learning_rate": 7.974358974358975e-06, + "loss": 0.0279, + "step": 12225 + }, + { + "epoch": 13.491189427312776, + "grad_norm": 0.16830548644065857, + "learning_rate": 7.948717948717949e-06, + "loss": 0.023, + "step": 12250 + }, + { + "epoch": 13.518722466960352, + "grad_norm": 0.16041933000087738, + "learning_rate": 7.923076923076924e-06, + "loss": 0.027, + "step": 12275 + }, + { + "epoch": 13.54625550660793, + "grad_norm": 0.3963809013366699, + "learning_rate": 7.897435897435898e-06, + "loss": 0.0307, + "step": 12300 + }, + { + "epoch": 13.573788546255507, + "grad_norm": 0.2639409899711609, + "learning_rate": 7.871794871794873e-06, + "loss": 0.024, + "step": 12325 + }, + { + "epoch": 13.601321585903083, + "grad_norm": 0.15676841139793396, + "learning_rate": 7.846153846153847e-06, + "loss": 0.0239, + "step": 12350 + }, + { + "epoch": 13.62885462555066, + "grad_norm": 0.314523309469223, + "learning_rate": 7.820512820512822e-06, + "loss": 0.0274, + "step": 12375 + }, + { + "epoch": 13.656387665198238, + "grad_norm": 0.20265546441078186, + "learning_rate": 7.794871794871796e-06, + "loss": 0.0264, + "step": 12400 + }, + { + "epoch": 13.683920704845814, + "grad_norm": 0.11325351148843765, + "learning_rate": 7.76923076923077e-06, + "loss": 0.0248, + "step": 12425 + }, + { + "epoch": 13.711453744493392, + "grad_norm": 0.16151660680770874, + "learning_rate": 7.743589743589745e-06, + "loss": 0.0261, + "step": 12450 + }, + { + "epoch": 13.73898678414097, + "grad_norm": 0.2732401192188263, + "learning_rate": 7.717948717948718e-06, + "loss": 0.0267, + "step": 12475 + }, + { + "epoch": 13.766519823788546, + "grad_norm": 0.37177756428718567, + "learning_rate": 7.692307692307694e-06, + "loss": 0.0293, + "step": 12500 + }, + { + "epoch": 13.794052863436123, + "grad_norm": 0.12377744168043137, + "learning_rate": 7.666666666666667e-06, + "loss": 0.0254, + "step": 12525 + }, + { + "epoch": 13.821585903083701, + "grad_norm": 0.3608620762825012, + "learning_rate": 7.641025641025641e-06, + "loss": 0.0296, + "step": 12550 + }, + { + "epoch": 13.849118942731277, + "grad_norm": 0.17188109457492828, + "learning_rate": 7.615384615384615e-06, + "loss": 0.0212, + "step": 12575 + }, + { + "epoch": 13.876651982378855, + "grad_norm": 0.1362937092781067, + "learning_rate": 7.58974358974359e-06, + "loss": 0.0259, + "step": 12600 + }, + { + "epoch": 13.904185022026432, + "grad_norm": 0.1876504123210907, + "learning_rate": 7.564102564102564e-06, + "loss": 0.0273, + "step": 12625 + }, + { + "epoch": 13.931718061674008, + "grad_norm": 0.14184896647930145, + "learning_rate": 7.538461538461539e-06, + "loss": 0.0233, + "step": 12650 + }, + { + "epoch": 13.959251101321586, + "grad_norm": 0.17070193588733673, + "learning_rate": 7.512820512820513e-06, + "loss": 0.0235, + "step": 12675 + }, + { + "epoch": 13.986784140969164, + "grad_norm": 0.3402119576931, + "learning_rate": 7.487179487179488e-06, + "loss": 0.0294, + "step": 12700 + }, + { + "epoch": 14.01431718061674, + "grad_norm": 0.11597840487957001, + "learning_rate": 7.461538461538462e-06, + "loss": 0.0217, + "step": 12725 + }, + { + "epoch": 14.041850220264317, + "grad_norm": 0.26662012934684753, + "learning_rate": 7.435897435897437e-06, + "loss": 0.0162, + "step": 12750 + }, + { + "epoch": 14.069383259911895, + "grad_norm": 0.09315423667430878, + "learning_rate": 7.410256410256411e-06, + "loss": 0.0184, + "step": 12775 + }, + { + "epoch": 14.09691629955947, + "grad_norm": 0.34267550706863403, + "learning_rate": 7.384615384615386e-06, + "loss": 0.0169, + "step": 12800 + }, + { + "epoch": 14.124449339207048, + "grad_norm": 0.2209981232881546, + "learning_rate": 7.35897435897436e-06, + "loss": 0.0163, + "step": 12825 + }, + { + "epoch": 14.151982378854626, + "grad_norm": 0.2604495882987976, + "learning_rate": 7.333333333333333e-06, + "loss": 0.0194, + "step": 12850 + }, + { + "epoch": 14.179515418502202, + "grad_norm": 0.10924118757247925, + "learning_rate": 7.307692307692308e-06, + "loss": 0.0149, + "step": 12875 + }, + { + "epoch": 14.20704845814978, + "grad_norm": 0.10360319912433624, + "learning_rate": 7.282051282051282e-06, + "loss": 0.0177, + "step": 12900 + }, + { + "epoch": 14.234581497797357, + "grad_norm": 0.20702053606510162, + "learning_rate": 7.256410256410257e-06, + "loss": 0.0179, + "step": 12925 + }, + { + "epoch": 14.262114537444933, + "grad_norm": 0.22959311306476593, + "learning_rate": 7.230769230769231e-06, + "loss": 0.017, + "step": 12950 + }, + { + "epoch": 14.289647577092511, + "grad_norm": 0.10817913711071014, + "learning_rate": 7.205128205128206e-06, + "loss": 0.018, + "step": 12975 + }, + { + "epoch": 14.317180616740089, + "grad_norm": 0.1359902173280716, + "learning_rate": 7.17948717948718e-06, + "loss": 0.0196, + "step": 13000 + }, + { + "epoch": 14.317180616740089, + "eval_cer": 60.44272236424288, + "eval_loss": 0.8670655488967896, + "eval_runtime": 479.7498, + "eval_samples_per_second": 22.055, + "eval_steps_per_second": 5.515, + "eval_wer": 126.28005657708627, + "step": 13000 + }, + { + "epoch": 14.344713656387665, + "grad_norm": 0.21668480336666107, + "learning_rate": 7.153846153846155e-06, + "loss": 0.0185, + "step": 13025 + }, + { + "epoch": 14.372246696035242, + "grad_norm": 0.12029585987329483, + "learning_rate": 7.128205128205129e-06, + "loss": 0.0181, + "step": 13050 + }, + { + "epoch": 14.39977973568282, + "grad_norm": 0.2249136120080948, + "learning_rate": 7.102564102564104e-06, + "loss": 0.0194, + "step": 13075 + }, + { + "epoch": 14.427312775330396, + "grad_norm": 0.1638566255569458, + "learning_rate": 7.076923076923078e-06, + "loss": 0.0198, + "step": 13100 + }, + { + "epoch": 14.454845814977974, + "grad_norm": 0.10072775185108185, + "learning_rate": 7.051282051282053e-06, + "loss": 0.0196, + "step": 13125 + }, + { + "epoch": 14.482378854625551, + "grad_norm": 0.3090708553791046, + "learning_rate": 7.025641025641025e-06, + "loss": 0.018, + "step": 13150 + }, + { + "epoch": 14.509911894273127, + "grad_norm": 0.2770833373069763, + "learning_rate": 7e-06, + "loss": 0.0186, + "step": 13175 + }, + { + "epoch": 14.537444933920705, + "grad_norm": 0.37222012877464294, + "learning_rate": 6.974358974358974e-06, + "loss": 0.022, + "step": 13200 + }, + { + "epoch": 14.564977973568283, + "grad_norm": 0.11628738045692444, + "learning_rate": 6.948717948717949e-06, + "loss": 0.019, + "step": 13225 + }, + { + "epoch": 14.592511013215859, + "grad_norm": 0.3900822401046753, + "learning_rate": 6.923076923076923e-06, + "loss": 0.0205, + "step": 13250 + }, + { + "epoch": 14.620044052863436, + "grad_norm": 0.1428934782743454, + "learning_rate": 6.897435897435898e-06, + "loss": 0.0186, + "step": 13275 + }, + { + "epoch": 14.647577092511014, + "grad_norm": 0.25180259346961975, + "learning_rate": 6.871794871794872e-06, + "loss": 0.0214, + "step": 13300 + }, + { + "epoch": 14.67511013215859, + "grad_norm": 0.2877309024333954, + "learning_rate": 6.846153846153847e-06, + "loss": 0.0218, + "step": 13325 + }, + { + "epoch": 14.702643171806168, + "grad_norm": 0.21245427429676056, + "learning_rate": 6.820512820512821e-06, + "loss": 0.0192, + "step": 13350 + }, + { + "epoch": 14.730176211453745, + "grad_norm": 0.33086642622947693, + "learning_rate": 6.794871794871796e-06, + "loss": 0.0194, + "step": 13375 + }, + { + "epoch": 14.757709251101321, + "grad_norm": 0.21538333594799042, + "learning_rate": 6.76923076923077e-06, + "loss": 0.0208, + "step": 13400 + }, + { + "epoch": 14.785242290748899, + "grad_norm": 0.1178901195526123, + "learning_rate": 6.743589743589745e-06, + "loss": 0.0177, + "step": 13425 + }, + { + "epoch": 14.812775330396477, + "grad_norm": 0.13401082158088684, + "learning_rate": 6.717948717948718e-06, + "loss": 0.0204, + "step": 13450 + }, + { + "epoch": 14.840308370044053, + "grad_norm": 0.1349448412656784, + "learning_rate": 6.692307692307692e-06, + "loss": 0.0151, + "step": 13475 + }, + { + "epoch": 14.86784140969163, + "grad_norm": 0.10409779101610184, + "learning_rate": 6.666666666666667e-06, + "loss": 0.021, + "step": 13500 + }, + { + "epoch": 14.895374449339208, + "grad_norm": 0.11943039298057556, + "learning_rate": 6.641025641025641e-06, + "loss": 0.0188, + "step": 13525 + }, + { + "epoch": 14.922907488986784, + "grad_norm": 0.15288065373897552, + "learning_rate": 6.615384615384616e-06, + "loss": 0.0166, + "step": 13550 + }, + { + "epoch": 14.950440528634362, + "grad_norm": 0.3485194146633148, + "learning_rate": 6.58974358974359e-06, + "loss": 0.0194, + "step": 13575 + }, + { + "epoch": 14.97797356828194, + "grad_norm": 0.23302164673805237, + "learning_rate": 6.564102564102565e-06, + "loss": 0.0198, + "step": 13600 + }, + { + "epoch": 15.005506607929515, + "grad_norm": 0.18940000236034393, + "learning_rate": 6.538461538461539e-06, + "loss": 0.0211, + "step": 13625 + }, + { + "epoch": 15.033039647577093, + "grad_norm": 0.17743512988090515, + "learning_rate": 6.512820512820514e-06, + "loss": 0.0147, + "step": 13650 + }, + { + "epoch": 15.060572687224669, + "grad_norm": 0.1025228500366211, + "learning_rate": 6.487179487179488e-06, + "loss": 0.0137, + "step": 13675 + }, + { + "epoch": 15.088105726872246, + "grad_norm": 0.08102612942457199, + "learning_rate": 6.461538461538463e-06, + "loss": 0.0134, + "step": 13700 + }, + { + "epoch": 15.115638766519824, + "grad_norm": 0.11834702640771866, + "learning_rate": 6.435897435897437e-06, + "loss": 0.0136, + "step": 13725 + }, + { + "epoch": 15.1431718061674, + "grad_norm": 0.09455480426549911, + "learning_rate": 6.410256410256412e-06, + "loss": 0.014, + "step": 13750 + }, + { + "epoch": 15.170704845814978, + "grad_norm": 0.10078572481870651, + "learning_rate": 6.384615384615384e-06, + "loss": 0.012, + "step": 13775 + }, + { + "epoch": 15.198237885462555, + "grad_norm": 0.10887516289949417, + "learning_rate": 6.358974358974359e-06, + "loss": 0.0131, + "step": 13800 + }, + { + "epoch": 15.225770925110131, + "grad_norm": 0.07745672762393951, + "learning_rate": 6.333333333333333e-06, + "loss": 0.0149, + "step": 13825 + }, + { + "epoch": 15.253303964757709, + "grad_norm": 0.5340884923934937, + "learning_rate": 6.307692307692308e-06, + "loss": 0.0165, + "step": 13850 + }, + { + "epoch": 15.280837004405287, + "grad_norm": 0.105720154941082, + "learning_rate": 6.282051282051282e-06, + "loss": 0.0131, + "step": 13875 + }, + { + "epoch": 15.308370044052863, + "grad_norm": 0.2738574743270874, + "learning_rate": 6.256410256410257e-06, + "loss": 0.0127, + "step": 13900 + }, + { + "epoch": 15.33590308370044, + "grad_norm": 0.2029091864824295, + "learning_rate": 6.230769230769231e-06, + "loss": 0.0127, + "step": 13925 + }, + { + "epoch": 15.363436123348018, + "grad_norm": 0.09055199474096298, + "learning_rate": 6.205128205128206e-06, + "loss": 0.0153, + "step": 13950 + }, + { + "epoch": 15.390969162995594, + "grad_norm": 0.33734768629074097, + "learning_rate": 6.17948717948718e-06, + "loss": 0.0142, + "step": 13975 + }, + { + "epoch": 15.418502202643172, + "grad_norm": 0.11411110311746597, + "learning_rate": 6.153846153846155e-06, + "loss": 0.0136, + "step": 14000 + }, + { + "epoch": 15.418502202643172, + "eval_cer": 73.25610720713492, + "eval_loss": 0.8812981843948364, + "eval_runtime": 442.8713, + "eval_samples_per_second": 23.892, + "eval_steps_per_second": 5.975, + "eval_wer": 177.97265440829796, + "step": 14000 + }, + { + "epoch": 15.44603524229075, + "grad_norm": 0.21195538341999054, + "learning_rate": 6.128205128205129e-06, + "loss": 0.0142, + "step": 14025 + }, + { + "epoch": 15.473568281938325, + "grad_norm": 0.09307265281677246, + "learning_rate": 6.102564102564104e-06, + "loss": 0.0121, + "step": 14050 + }, + { + "epoch": 15.501101321585903, + "grad_norm": 0.18475420773029327, + "learning_rate": 6.076923076923077e-06, + "loss": 0.0133, + "step": 14075 + }, + { + "epoch": 15.52863436123348, + "grad_norm": 0.12336278706789017, + "learning_rate": 6.051282051282051e-06, + "loss": 0.0126, + "step": 14100 + }, + { + "epoch": 15.556167400881057, + "grad_norm": 0.18600626289844513, + "learning_rate": 6.025641025641026e-06, + "loss": 0.0122, + "step": 14125 + }, + { + "epoch": 15.583700440528634, + "grad_norm": 0.22498783469200134, + "learning_rate": 6e-06, + "loss": 0.0138, + "step": 14150 + }, + { + "epoch": 15.611233480176212, + "grad_norm": 0.10272740572690964, + "learning_rate": 5.974358974358975e-06, + "loss": 0.0146, + "step": 14175 + }, + { + "epoch": 15.638766519823788, + "grad_norm": 0.27834802865982056, + "learning_rate": 5.948717948717949e-06, + "loss": 0.013, + "step": 14200 + }, + { + "epoch": 15.666299559471366, + "grad_norm": 0.21032482385635376, + "learning_rate": 5.923076923076924e-06, + "loss": 0.0122, + "step": 14225 + }, + { + "epoch": 15.693832599118943, + "grad_norm": 0.12462333589792252, + "learning_rate": 5.897435897435898e-06, + "loss": 0.0142, + "step": 14250 + }, + { + "epoch": 15.72136563876652, + "grad_norm": 0.14421270787715912, + "learning_rate": 5.871794871794873e-06, + "loss": 0.0133, + "step": 14275 + }, + { + "epoch": 15.748898678414097, + "grad_norm": 0.0854870080947876, + "learning_rate": 5.846153846153847e-06, + "loss": 0.0141, + "step": 14300 + }, + { + "epoch": 15.776431718061675, + "grad_norm": 0.3137272298336029, + "learning_rate": 5.820512820512822e-06, + "loss": 0.0141, + "step": 14325 + }, + { + "epoch": 15.80396475770925, + "grad_norm": 0.42760950326919556, + "learning_rate": 5.794871794871796e-06, + "loss": 0.0162, + "step": 14350 + }, + { + "epoch": 15.831497797356828, + "grad_norm": 0.08184482902288437, + "learning_rate": 5.769230769230769e-06, + "loss": 0.0143, + "step": 14375 + }, + { + "epoch": 15.859030837004406, + "grad_norm": 0.2387475222349167, + "learning_rate": 5.743589743589743e-06, + "loss": 0.0139, + "step": 14400 + }, + { + "epoch": 15.886563876651982, + "grad_norm": 0.14585834741592407, + "learning_rate": 5.717948717948718e-06, + "loss": 0.0137, + "step": 14425 + }, + { + "epoch": 15.91409691629956, + "grad_norm": 0.22786743938922882, + "learning_rate": 5.692307692307692e-06, + "loss": 0.0142, + "step": 14450 + }, + { + "epoch": 15.941629955947137, + "grad_norm": 0.11104258894920349, + "learning_rate": 5.666666666666667e-06, + "loss": 0.0135, + "step": 14475 + }, + { + "epoch": 15.969162995594713, + "grad_norm": 0.12056852877140045, + "learning_rate": 5.641025641025641e-06, + "loss": 0.0123, + "step": 14500 + }, + { + "epoch": 15.996696035242291, + "grad_norm": 0.12010839581489563, + "learning_rate": 5.615384615384616e-06, + "loss": 0.0142, + "step": 14525 + }, + { + "epoch": 16.024229074889867, + "grad_norm": 0.14887858927249908, + "learning_rate": 5.58974358974359e-06, + "loss": 0.01, + "step": 14550 + }, + { + "epoch": 16.051762114537446, + "grad_norm": 0.06393441557884216, + "learning_rate": 5.564102564102565e-06, + "loss": 0.0107, + "step": 14575 + }, + { + "epoch": 16.079295154185022, + "grad_norm": 0.05813656747341156, + "learning_rate": 5.538461538461539e-06, + "loss": 0.0098, + "step": 14600 + }, + { + "epoch": 16.106828193832598, + "grad_norm": 0.30714789032936096, + "learning_rate": 5.512820512820514e-06, + "loss": 0.0103, + "step": 14625 + }, + { + "epoch": 16.134361233480178, + "grad_norm": 0.06188270449638367, + "learning_rate": 5.487179487179488e-06, + "loss": 0.0095, + "step": 14650 + }, + { + "epoch": 16.161894273127754, + "grad_norm": 0.16568152606487274, + "learning_rate": 5.461538461538461e-06, + "loss": 0.0093, + "step": 14675 + }, + { + "epoch": 16.18942731277533, + "grad_norm": 0.07314164191484451, + "learning_rate": 5.435897435897436e-06, + "loss": 0.0101, + "step": 14700 + }, + { + "epoch": 16.21696035242291, + "grad_norm": 0.06874439120292664, + "learning_rate": 5.41025641025641e-06, + "loss": 0.0102, + "step": 14725 + }, + { + "epoch": 16.244493392070485, + "grad_norm": 0.0864795446395874, + "learning_rate": 5.384615384615385e-06, + "loss": 0.0091, + "step": 14750 + }, + { + "epoch": 16.27202643171806, + "grad_norm": 0.09835877269506454, + "learning_rate": 5.358974358974359e-06, + "loss": 0.011, + "step": 14775 + }, + { + "epoch": 16.29955947136564, + "grad_norm": 0.18435481190681458, + "learning_rate": 5.333333333333334e-06, + "loss": 0.0106, + "step": 14800 + }, + { + "epoch": 16.327092511013216, + "grad_norm": 0.06160463020205498, + "learning_rate": 5.307692307692308e-06, + "loss": 0.0094, + "step": 14825 + }, + { + "epoch": 16.354625550660792, + "grad_norm": 0.06980791687965393, + "learning_rate": 5.282051282051283e-06, + "loss": 0.0107, + "step": 14850 + }, + { + "epoch": 16.38215859030837, + "grad_norm": 0.09515848010778427, + "learning_rate": 5.256410256410257e-06, + "loss": 0.0115, + "step": 14875 + }, + { + "epoch": 16.409691629955947, + "grad_norm": 0.11992871761322021, + "learning_rate": 5.230769230769232e-06, + "loss": 0.0102, + "step": 14900 + }, + { + "epoch": 16.437224669603523, + "grad_norm": 0.08134716004133224, + "learning_rate": 5.205128205128206e-06, + "loss": 0.0103, + "step": 14925 + }, + { + "epoch": 16.464757709251103, + "grad_norm": 0.5592033863067627, + "learning_rate": 5.179487179487181e-06, + "loss": 0.0118, + "step": 14950 + }, + { + "epoch": 16.49229074889868, + "grad_norm": 0.06395772844552994, + "learning_rate": 5.1538461538461534e-06, + "loss": 0.012, + "step": 14975 + }, + { + "epoch": 16.519823788546255, + "grad_norm": 0.24540168046951294, + "learning_rate": 5.128205128205128e-06, + "loss": 0.0102, + "step": 15000 + }, + { + "epoch": 16.519823788546255, + "eval_cer": 57.35444155308198, + "eval_loss": 0.8929909467697144, + "eval_runtime": 475.0118, + "eval_samples_per_second": 22.275, + "eval_steps_per_second": 5.57, + "eval_wer": 142.6968411126827, + "step": 15000 + }, + { + "epoch": 16.547356828193834, + "grad_norm": 0.1699555218219757, + "learning_rate": 5.1025641025641024e-06, + "loss": 0.0104, + "step": 15025 + }, + { + "epoch": 16.57488986784141, + "grad_norm": 0.12570585310459137, + "learning_rate": 5.076923076923077e-06, + "loss": 0.0092, + "step": 15050 + }, + { + "epoch": 16.602422907488986, + "grad_norm": 0.07865352183580399, + "learning_rate": 5.051282051282051e-06, + "loss": 0.0098, + "step": 15075 + }, + { + "epoch": 16.629955947136565, + "grad_norm": 0.12875448167324066, + "learning_rate": 5.025641025641026e-06, + "loss": 0.0104, + "step": 15100 + }, + { + "epoch": 16.65748898678414, + "grad_norm": 0.14089557528495789, + "learning_rate": 5e-06, + "loss": 0.0109, + "step": 15125 + }, + { + "epoch": 16.685022026431717, + "grad_norm": 0.07757396996021271, + "learning_rate": 4.974358974358975e-06, + "loss": 0.0092, + "step": 15150 + }, + { + "epoch": 16.712555066079297, + "grad_norm": 0.07462769746780396, + "learning_rate": 4.948717948717949e-06, + "loss": 0.0097, + "step": 15175 + }, + { + "epoch": 16.740088105726873, + "grad_norm": 0.17445862293243408, + "learning_rate": 4.923076923076924e-06, + "loss": 0.0108, + "step": 15200 + }, + { + "epoch": 16.76762114537445, + "grad_norm": 0.06989112496376038, + "learning_rate": 4.8974358974358975e-06, + "loss": 0.0107, + "step": 15225 + }, + { + "epoch": 16.795154185022028, + "grad_norm": 0.10867713391780853, + "learning_rate": 4.871794871794872e-06, + "loss": 0.0094, + "step": 15250 + }, + { + "epoch": 16.822687224669604, + "grad_norm": 0.08284565061330795, + "learning_rate": 4.8461538461538465e-06, + "loss": 0.0096, + "step": 15275 + }, + { + "epoch": 16.85022026431718, + "grad_norm": 0.40096861124038696, + "learning_rate": 4.820512820512821e-06, + "loss": 0.0106, + "step": 15300 + }, + { + "epoch": 16.87775330396476, + "grad_norm": 0.20616371929645538, + "learning_rate": 4.7948717948717955e-06, + "loss": 0.0098, + "step": 15325 + }, + { + "epoch": 16.905286343612335, + "grad_norm": 0.09715402871370316, + "learning_rate": 4.76923076923077e-06, + "loss": 0.009, + "step": 15350 + }, + { + "epoch": 16.93281938325991, + "grad_norm": 0.19870775938034058, + "learning_rate": 4.743589743589744e-06, + "loss": 0.0097, + "step": 15375 + }, + { + "epoch": 16.96035242290749, + "grad_norm": 0.07990965992212296, + "learning_rate": 4.717948717948718e-06, + "loss": 0.0103, + "step": 15400 + }, + { + "epoch": 16.987885462555067, + "grad_norm": 0.1826823353767395, + "learning_rate": 4.692307692307693e-06, + "loss": 0.0119, + "step": 15425 + }, + { + "epoch": 17.015418502202643, + "grad_norm": 0.07441161572933197, + "learning_rate": 4.666666666666667e-06, + "loss": 0.0091, + "step": 15450 + }, + { + "epoch": 17.042951541850222, + "grad_norm": 0.05266563221812248, + "learning_rate": 4.641025641025642e-06, + "loss": 0.0092, + "step": 15475 + }, + { + "epoch": 17.070484581497798, + "grad_norm": 0.06553443521261215, + "learning_rate": 4.615384615384616e-06, + "loss": 0.0068, + "step": 15500 + }, + { + "epoch": 17.098017621145374, + "grad_norm": 0.04783637821674347, + "learning_rate": 4.58974358974359e-06, + "loss": 0.0076, + "step": 15525 + }, + { + "epoch": 17.125550660792953, + "grad_norm": 0.06203090399503708, + "learning_rate": 4.564102564102564e-06, + "loss": 0.0076, + "step": 15550 + }, + { + "epoch": 17.15308370044053, + "grad_norm": 0.18787582218647003, + "learning_rate": 4.538461538461539e-06, + "loss": 0.0087, + "step": 15575 + }, + { + "epoch": 17.180616740088105, + "grad_norm": 0.054731931537389755, + "learning_rate": 4.512820512820513e-06, + "loss": 0.007, + "step": 15600 + }, + { + "epoch": 17.208149779735685, + "grad_norm": 0.06515911966562271, + "learning_rate": 4.487179487179488e-06, + "loss": 0.0091, + "step": 15625 + }, + { + "epoch": 17.23568281938326, + "grad_norm": 0.12270718812942505, + "learning_rate": 4.461538461538462e-06, + "loss": 0.0082, + "step": 15650 + }, + { + "epoch": 17.263215859030836, + "grad_norm": 0.09646424651145935, + "learning_rate": 4.435897435897436e-06, + "loss": 0.0082, + "step": 15675 + }, + { + "epoch": 17.290748898678412, + "grad_norm": 0.13605421781539917, + "learning_rate": 4.4102564102564104e-06, + "loss": 0.0071, + "step": 15700 + }, + { + "epoch": 17.318281938325992, + "grad_norm": 0.18393711745738983, + "learning_rate": 4.384615384615385e-06, + "loss": 0.0078, + "step": 15725 + }, + { + "epoch": 17.345814977973568, + "grad_norm": 0.053975436836481094, + "learning_rate": 4.358974358974359e-06, + "loss": 0.0085, + "step": 15750 + }, + { + "epoch": 17.373348017621144, + "grad_norm": 0.05142604932188988, + "learning_rate": 4.333333333333334e-06, + "loss": 0.0072, + "step": 15775 + }, + { + "epoch": 17.400881057268723, + "grad_norm": 0.09333042055368423, + "learning_rate": 4.307692307692308e-06, + "loss": 0.0071, + "step": 15800 + }, + { + "epoch": 17.4284140969163, + "grad_norm": 0.0760270357131958, + "learning_rate": 4.282051282051282e-06, + "loss": 0.0076, + "step": 15825 + }, + { + "epoch": 17.455947136563875, + "grad_norm": 0.07667813450098038, + "learning_rate": 4.2564102564102566e-06, + "loss": 0.0086, + "step": 15850 + }, + { + "epoch": 17.483480176211454, + "grad_norm": 0.07268601655960083, + "learning_rate": 4.230769230769231e-06, + "loss": 0.008, + "step": 15875 + }, + { + "epoch": 17.51101321585903, + "grad_norm": 0.05997084081172943, + "learning_rate": 4.2051282051282055e-06, + "loss": 0.0084, + "step": 15900 + }, + { + "epoch": 17.538546255506606, + "grad_norm": 0.08455850183963776, + "learning_rate": 4.17948717948718e-06, + "loss": 0.0079, + "step": 15925 + }, + { + "epoch": 17.566079295154186, + "grad_norm": 0.05682849884033203, + "learning_rate": 4.1538461538461545e-06, + "loss": 0.009, + "step": 15950 + }, + { + "epoch": 17.59361233480176, + "grad_norm": 0.13320918381214142, + "learning_rate": 4.128205128205128e-06, + "loss": 0.0091, + "step": 15975 + }, + { + "epoch": 17.621145374449338, + "grad_norm": 0.08159907907247543, + "learning_rate": 4.102564102564103e-06, + "loss": 0.0079, + "step": 16000 + }, + { + "epoch": 17.621145374449338, + "eval_cer": 59.87355702543762, + "eval_loss": 0.9064295291900635, + "eval_runtime": 473.5233, + "eval_samples_per_second": 22.345, + "eval_steps_per_second": 5.588, + "eval_wer": 132.6166902404526, + "step": 16000 + }, + { + "epoch": 17.648678414096917, + "grad_norm": 0.08243115991353989, + "learning_rate": 4.076923076923077e-06, + "loss": 0.0075, + "step": 16025 + }, + { + "epoch": 17.676211453744493, + "grad_norm": 0.06684228777885437, + "learning_rate": 4.051282051282052e-06, + "loss": 0.0086, + "step": 16050 + }, + { + "epoch": 17.70374449339207, + "grad_norm": 0.07782474905252457, + "learning_rate": 4.025641025641026e-06, + "loss": 0.0081, + "step": 16075 + }, + { + "epoch": 17.73127753303965, + "grad_norm": 0.09877069294452667, + "learning_rate": 4.000000000000001e-06, + "loss": 0.0082, + "step": 16100 + }, + { + "epoch": 17.758810572687224, + "grad_norm": 0.1733996719121933, + "learning_rate": 3.974358974358974e-06, + "loss": 0.008, + "step": 16125 + }, + { + "epoch": 17.7863436123348, + "grad_norm": 0.05951946601271629, + "learning_rate": 3.948717948717949e-06, + "loss": 0.0089, + "step": 16150 + }, + { + "epoch": 17.81387665198238, + "grad_norm": 0.09281080961227417, + "learning_rate": 3.923076923076923e-06, + "loss": 0.0085, + "step": 16175 + }, + { + "epoch": 17.841409691629956, + "grad_norm": 0.17630000412464142, + "learning_rate": 3.897435897435898e-06, + "loss": 0.0091, + "step": 16200 + }, + { + "epoch": 17.86894273127753, + "grad_norm": 0.058690495789051056, + "learning_rate": 3.871794871794872e-06, + "loss": 0.008, + "step": 16225 + }, + { + "epoch": 17.89647577092511, + "grad_norm": 0.09863056242465973, + "learning_rate": 3.846153846153847e-06, + "loss": 0.0089, + "step": 16250 + }, + { + "epoch": 17.924008810572687, + "grad_norm": 0.09190403670072556, + "learning_rate": 3.8205128205128204e-06, + "loss": 0.0087, + "step": 16275 + }, + { + "epoch": 17.951541850220263, + "grad_norm": 0.06220954656600952, + "learning_rate": 3.794871794871795e-06, + "loss": 0.008, + "step": 16300 + }, + { + "epoch": 17.979074889867842, + "grad_norm": 0.0854143351316452, + "learning_rate": 3.7692307692307694e-06, + "loss": 0.008, + "step": 16325 + }, + { + "epoch": 18.006607929515418, + "grad_norm": 0.10065086930990219, + "learning_rate": 3.743589743589744e-06, + "loss": 0.0077, + "step": 16350 + }, + { + "epoch": 18.034140969162994, + "grad_norm": 0.07576055824756622, + "learning_rate": 3.7179487179487184e-06, + "loss": 0.0066, + "step": 16375 + }, + { + "epoch": 18.061674008810574, + "grad_norm": 0.04324162006378174, + "learning_rate": 3.692307692307693e-06, + "loss": 0.0065, + "step": 16400 + }, + { + "epoch": 18.08920704845815, + "grad_norm": 0.07214418798685074, + "learning_rate": 3.6666666666666666e-06, + "loss": 0.0061, + "step": 16425 + }, + { + "epoch": 18.116740088105725, + "grad_norm": 0.05964656174182892, + "learning_rate": 3.641025641025641e-06, + "loss": 0.006, + "step": 16450 + }, + { + "epoch": 18.144273127753305, + "grad_norm": 0.06809210777282715, + "learning_rate": 3.6153846153846156e-06, + "loss": 0.0068, + "step": 16475 + }, + { + "epoch": 18.17180616740088, + "grad_norm": 0.04498510807752609, + "learning_rate": 3.58974358974359e-06, + "loss": 0.0065, + "step": 16500 + }, + { + "epoch": 18.199339207048457, + "grad_norm": 0.05225253850221634, + "learning_rate": 3.5641025641025646e-06, + "loss": 0.0064, + "step": 16525 + }, + { + "epoch": 18.226872246696036, + "grad_norm": 0.05612196773290634, + "learning_rate": 3.538461538461539e-06, + "loss": 0.0069, + "step": 16550 + }, + { + "epoch": 18.254405286343612, + "grad_norm": 0.05375833064317703, + "learning_rate": 3.5128205128205127e-06, + "loss": 0.0063, + "step": 16575 + }, + { + "epoch": 18.281938325991188, + "grad_norm": 0.08952938765287399, + "learning_rate": 3.487179487179487e-06, + "loss": 0.0063, + "step": 16600 + }, + { + "epoch": 18.309471365638768, + "grad_norm": 0.05091915279626846, + "learning_rate": 3.4615384615384617e-06, + "loss": 0.0064, + "step": 16625 + }, + { + "epoch": 18.337004405286343, + "grad_norm": 0.05258096382021904, + "learning_rate": 3.435897435897436e-06, + "loss": 0.0066, + "step": 16650 + }, + { + "epoch": 18.36453744493392, + "grad_norm": 0.09490983188152313, + "learning_rate": 3.4102564102564107e-06, + "loss": 0.0063, + "step": 16675 + }, + { + "epoch": 18.3920704845815, + "grad_norm": 0.05754420533776283, + "learning_rate": 3.384615384615385e-06, + "loss": 0.0066, + "step": 16700 + }, + { + "epoch": 18.419603524229075, + "grad_norm": 0.045870695263147354, + "learning_rate": 3.358974358974359e-06, + "loss": 0.0062, + "step": 16725 + }, + { + "epoch": 18.44713656387665, + "grad_norm": 0.05464649200439453, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.007, + "step": 16750 + }, + { + "epoch": 18.47466960352423, + "grad_norm": 0.09517217427492142, + "learning_rate": 3.307692307692308e-06, + "loss": 0.0066, + "step": 16775 + }, + { + "epoch": 18.502202643171806, + "grad_norm": 0.05817287415266037, + "learning_rate": 3.2820512820512823e-06, + "loss": 0.0066, + "step": 16800 + }, + { + "epoch": 18.529735682819382, + "grad_norm": 0.06281236559152603, + "learning_rate": 3.256410256410257e-06, + "loss": 0.0069, + "step": 16825 + }, + { + "epoch": 18.55726872246696, + "grad_norm": 0.06776443868875504, + "learning_rate": 3.2307692307692313e-06, + "loss": 0.0071, + "step": 16850 + }, + { + "epoch": 18.584801762114537, + "grad_norm": 0.07165702432394028, + "learning_rate": 3.205128205128206e-06, + "loss": 0.0067, + "step": 16875 + }, + { + "epoch": 18.612334801762113, + "grad_norm": 0.05155620351433754, + "learning_rate": 3.1794871794871795e-06, + "loss": 0.0063, + "step": 16900 + }, + { + "epoch": 18.639867841409693, + "grad_norm": 0.05491460859775543, + "learning_rate": 3.153846153846154e-06, + "loss": 0.0062, + "step": 16925 + }, + { + "epoch": 18.66740088105727, + "grad_norm": 0.05498324707150459, + "learning_rate": 3.1282051282051284e-06, + "loss": 0.0068, + "step": 16950 + }, + { + "epoch": 18.694933920704845, + "grad_norm": 0.06264489144086838, + "learning_rate": 3.102564102564103e-06, + "loss": 0.0063, + "step": 16975 + }, + { + "epoch": 18.722466960352424, + "grad_norm": 0.058767594397068024, + "learning_rate": 3.0769230769230774e-06, + "loss": 0.0074, + "step": 17000 + }, + { + "epoch": 18.722466960352424, + "eval_cer": 55.902582738183945, + "eval_loss": 0.9159504175186157, + "eval_runtime": 454.2093, + "eval_samples_per_second": 23.295, + "eval_steps_per_second": 5.826, + "eval_wer": 125.60113154172561, + "step": 17000 + }, + { + "epoch": 18.75, + "grad_norm": 0.07268258184194565, + "learning_rate": 3.051282051282052e-06, + "loss": 0.0072, + "step": 17025 + }, + { + "epoch": 18.777533039647576, + "grad_norm": 0.1571418195962906, + "learning_rate": 3.0256410256410256e-06, + "loss": 0.0066, + "step": 17050 + }, + { + "epoch": 18.805066079295155, + "grad_norm": 0.054754406213760376, + "learning_rate": 3e-06, + "loss": 0.0065, + "step": 17075 + }, + { + "epoch": 18.83259911894273, + "grad_norm": 0.06480716168880463, + "learning_rate": 2.9743589743589746e-06, + "loss": 0.0061, + "step": 17100 + }, + { + "epoch": 18.860132158590307, + "grad_norm": 0.08004415780305862, + "learning_rate": 2.948717948717949e-06, + "loss": 0.0064, + "step": 17125 + }, + { + "epoch": 18.887665198237887, + "grad_norm": 0.05629754438996315, + "learning_rate": 2.9230769230769236e-06, + "loss": 0.0069, + "step": 17150 + }, + { + "epoch": 18.915198237885463, + "grad_norm": 0.05493941903114319, + "learning_rate": 2.897435897435898e-06, + "loss": 0.0068, + "step": 17175 + }, + { + "epoch": 18.94273127753304, + "grad_norm": 0.05299900844693184, + "learning_rate": 2.8717948717948717e-06, + "loss": 0.0067, + "step": 17200 + }, + { + "epoch": 18.970264317180618, + "grad_norm": 0.06607411801815033, + "learning_rate": 2.846153846153846e-06, + "loss": 0.0073, + "step": 17225 + }, + { + "epoch": 18.997797356828194, + "grad_norm": 0.06345100700855255, + "learning_rate": 2.8205128205128207e-06, + "loss": 0.0068, + "step": 17250 + }, + { + "epoch": 19.02533039647577, + "grad_norm": 0.04356463998556137, + "learning_rate": 2.794871794871795e-06, + "loss": 0.0057, + "step": 17275 + }, + { + "epoch": 19.05286343612335, + "grad_norm": 0.044814836233854294, + "learning_rate": 2.7692307692307697e-06, + "loss": 0.0055, + "step": 17300 + }, + { + "epoch": 19.080396475770925, + "grad_norm": 0.07849572598934174, + "learning_rate": 2.743589743589744e-06, + "loss": 0.0055, + "step": 17325 + }, + { + "epoch": 19.1079295154185, + "grad_norm": 0.07459200173616409, + "learning_rate": 2.717948717948718e-06, + "loss": 0.0054, + "step": 17350 + }, + { + "epoch": 19.13546255506608, + "grad_norm": 0.04626765102148056, + "learning_rate": 2.6923076923076923e-06, + "loss": 0.0051, + "step": 17375 + }, + { + "epoch": 19.162995594713657, + "grad_norm": 0.04221516102552414, + "learning_rate": 2.666666666666667e-06, + "loss": 0.0053, + "step": 17400 + }, + { + "epoch": 19.190528634361232, + "grad_norm": 0.05490809306502342, + "learning_rate": 2.6410256410256413e-06, + "loss": 0.0055, + "step": 17425 + }, + { + "epoch": 19.218061674008812, + "grad_norm": 0.058104030787944794, + "learning_rate": 2.615384615384616e-06, + "loss": 0.0055, + "step": 17450 + }, + { + "epoch": 19.245594713656388, + "grad_norm": 0.0437290221452713, + "learning_rate": 2.5897435897435903e-06, + "loss": 0.0058, + "step": 17475 + }, + { + "epoch": 19.273127753303964, + "grad_norm": 0.048149868845939636, + "learning_rate": 2.564102564102564e-06, + "loss": 0.0053, + "step": 17500 + }, + { + "epoch": 19.300660792951543, + "grad_norm": 0.05037374794483185, + "learning_rate": 2.5384615384615385e-06, + "loss": 0.006, + "step": 17525 + }, + { + "epoch": 19.32819383259912, + "grad_norm": 0.06295998394489288, + "learning_rate": 2.512820512820513e-06, + "loss": 0.0057, + "step": 17550 + }, + { + "epoch": 19.355726872246695, + "grad_norm": 0.04326135665178299, + "learning_rate": 2.4871794871794875e-06, + "loss": 0.0053, + "step": 17575 + }, + { + "epoch": 19.383259911894275, + "grad_norm": 0.06319163739681244, + "learning_rate": 2.461538461538462e-06, + "loss": 0.0059, + "step": 17600 + }, + { + "epoch": 19.41079295154185, + "grad_norm": 0.03665575757622719, + "learning_rate": 2.435897435897436e-06, + "loss": 0.0054, + "step": 17625 + }, + { + "epoch": 19.438325991189426, + "grad_norm": 0.07812593877315521, + "learning_rate": 2.4102564102564105e-06, + "loss": 0.0056, + "step": 17650 + }, + { + "epoch": 19.465859030837006, + "grad_norm": 0.0484769307076931, + "learning_rate": 2.384615384615385e-06, + "loss": 0.006, + "step": 17675 + }, + { + "epoch": 19.493392070484582, + "grad_norm": 0.04405777528882027, + "learning_rate": 2.358974358974359e-06, + "loss": 0.0056, + "step": 17700 + }, + { + "epoch": 19.520925110132158, + "grad_norm": 0.04773370921611786, + "learning_rate": 2.3333333333333336e-06, + "loss": 0.0054, + "step": 17725 + }, + { + "epoch": 19.548458149779737, + "grad_norm": 0.04407154396176338, + "learning_rate": 2.307692307692308e-06, + "loss": 0.0059, + "step": 17750 + }, + { + "epoch": 19.575991189427313, + "grad_norm": 0.05238990858197212, + "learning_rate": 2.282051282051282e-06, + "loss": 0.0058, + "step": 17775 + }, + { + "epoch": 19.60352422907489, + "grad_norm": 0.06105871871113777, + "learning_rate": 2.2564102564102566e-06, + "loss": 0.0055, + "step": 17800 + }, + { + "epoch": 19.63105726872247, + "grad_norm": 0.15811942517757416, + "learning_rate": 2.230769230769231e-06, + "loss": 0.0062, + "step": 17825 + }, + { + "epoch": 19.658590308370044, + "grad_norm": 0.036435484886169434, + "learning_rate": 2.2051282051282052e-06, + "loss": 0.0053, + "step": 17850 + }, + { + "epoch": 19.68612334801762, + "grad_norm": 0.042688727378845215, + "learning_rate": 2.1794871794871797e-06, + "loss": 0.0056, + "step": 17875 + }, + { + "epoch": 19.7136563876652, + "grad_norm": 0.04815078526735306, + "learning_rate": 2.153846153846154e-06, + "loss": 0.0054, + "step": 17900 + }, + { + "epoch": 19.741189427312776, + "grad_norm": 0.04753319174051285, + "learning_rate": 2.1282051282051283e-06, + "loss": 0.0056, + "step": 17925 + }, + { + "epoch": 19.76872246696035, + "grad_norm": 0.04775834083557129, + "learning_rate": 2.1025641025641028e-06, + "loss": 0.0058, + "step": 17950 + }, + { + "epoch": 19.79625550660793, + "grad_norm": 0.04497678205370903, + "learning_rate": 2.0769230769230773e-06, + "loss": 0.0054, + "step": 17975 + }, + { + "epoch": 19.823788546255507, + "grad_norm": 0.06292616575956345, + "learning_rate": 2.0512820512820513e-06, + "loss": 0.0053, + "step": 18000 + }, + { + "epoch": 19.823788546255507, + "eval_cer": 50.16278243325347, + "eval_loss": 0.9244782328605652, + "eval_runtime": 438.8708, + "eval_samples_per_second": 24.11, + "eval_steps_per_second": 6.029, + "eval_wer": 116.01131541725603, + "step": 18000 + }, + { + "epoch": 19.851321585903083, + "grad_norm": 0.04525560513138771, + "learning_rate": 2.025641025641026e-06, + "loss": 0.0061, + "step": 18025 + }, + { + "epoch": 19.878854625550662, + "grad_norm": 0.04258139431476593, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.0055, + "step": 18050 + }, + { + "epoch": 19.90638766519824, + "grad_norm": 0.05663159489631653, + "learning_rate": 1.9743589743589744e-06, + "loss": 0.0056, + "step": 18075 + }, + { + "epoch": 19.933920704845814, + "grad_norm": 0.05438832566142082, + "learning_rate": 1.948717948717949e-06, + "loss": 0.0057, + "step": 18100 + }, + { + "epoch": 19.961453744493394, + "grad_norm": 0.049876242876052856, + "learning_rate": 1.9230769230769234e-06, + "loss": 0.0057, + "step": 18125 + }, + { + "epoch": 19.98898678414097, + "grad_norm": 0.04682116210460663, + "learning_rate": 1.8974358974358975e-06, + "loss": 0.0054, + "step": 18150 + }, + { + "epoch": 20.016519823788546, + "grad_norm": 0.031594615429639816, + "learning_rate": 1.871794871794872e-06, + "loss": 0.0049, + "step": 18175 + }, + { + "epoch": 20.044052863436125, + "grad_norm": 0.03813030198216438, + "learning_rate": 1.8461538461538465e-06, + "loss": 0.0047, + "step": 18200 + }, + { + "epoch": 20.0715859030837, + "grad_norm": 0.0341855026781559, + "learning_rate": 1.8205128205128205e-06, + "loss": 0.005, + "step": 18225 + }, + { + "epoch": 20.099118942731277, + "grad_norm": 0.03425971418619156, + "learning_rate": 1.794871794871795e-06, + "loss": 0.0048, + "step": 18250 + }, + { + "epoch": 20.126651982378856, + "grad_norm": 0.03382967412471771, + "learning_rate": 1.7692307692307695e-06, + "loss": 0.0046, + "step": 18275 + }, + { + "epoch": 20.154185022026432, + "grad_norm": 0.0427679605782032, + "learning_rate": 1.7435897435897436e-06, + "loss": 0.0046, + "step": 18300 + }, + { + "epoch": 20.181718061674008, + "grad_norm": 0.053978513926267624, + "learning_rate": 1.717948717948718e-06, + "loss": 0.005, + "step": 18325 + }, + { + "epoch": 20.209251101321588, + "grad_norm": 0.03782325237989426, + "learning_rate": 1.6923076923076926e-06, + "loss": 0.0048, + "step": 18350 + }, + { + "epoch": 20.236784140969164, + "grad_norm": 0.03709937259554863, + "learning_rate": 1.6666666666666667e-06, + "loss": 0.0046, + "step": 18375 + }, + { + "epoch": 20.26431718061674, + "grad_norm": 0.037801649421453476, + "learning_rate": 1.6410256410256412e-06, + "loss": 0.0045, + "step": 18400 + }, + { + "epoch": 20.291850220264315, + "grad_norm": 0.04301599785685539, + "learning_rate": 1.6153846153846157e-06, + "loss": 0.0049, + "step": 18425 + }, + { + "epoch": 20.319383259911895, + "grad_norm": 0.052962783724069595, + "learning_rate": 1.5897435897435897e-06, + "loss": 0.0054, + "step": 18450 + }, + { + "epoch": 20.34691629955947, + "grad_norm": 0.033712126314640045, + "learning_rate": 1.5641025641025642e-06, + "loss": 0.0046, + "step": 18475 + }, + { + "epoch": 20.374449339207047, + "grad_norm": 0.04511284828186035, + "learning_rate": 1.5384615384615387e-06, + "loss": 0.0055, + "step": 18500 + }, + { + "epoch": 20.401982378854626, + "grad_norm": 0.04226896911859512, + "learning_rate": 1.5128205128205128e-06, + "loss": 0.005, + "step": 18525 + }, + { + "epoch": 20.429515418502202, + "grad_norm": 0.05907629802823067, + "learning_rate": 1.4871794871794873e-06, + "loss": 0.0053, + "step": 18550 + }, + { + "epoch": 20.457048458149778, + "grad_norm": 0.03855994716286659, + "learning_rate": 1.4615384615384618e-06, + "loss": 0.0048, + "step": 18575 + }, + { + "epoch": 20.484581497797357, + "grad_norm": 0.03888246417045593, + "learning_rate": 1.4358974358974359e-06, + "loss": 0.0044, + "step": 18600 + }, + { + "epoch": 20.512114537444933, + "grad_norm": 0.04747318476438522, + "learning_rate": 1.4102564102564104e-06, + "loss": 0.005, + "step": 18625 + }, + { + "epoch": 20.53964757709251, + "grad_norm": 0.03958306834101677, + "learning_rate": 1.3846153846153848e-06, + "loss": 0.0049, + "step": 18650 + }, + { + "epoch": 20.56718061674009, + "grad_norm": 0.03587072342634201, + "learning_rate": 1.358974358974359e-06, + "loss": 0.005, + "step": 18675 + }, + { + "epoch": 20.594713656387665, + "grad_norm": 0.05696781352162361, + "learning_rate": 1.3333333333333334e-06, + "loss": 0.0047, + "step": 18700 + }, + { + "epoch": 20.62224669603524, + "grad_norm": 0.04611218348145485, + "learning_rate": 1.307692307692308e-06, + "loss": 0.0049, + "step": 18725 + }, + { + "epoch": 20.64977973568282, + "grad_norm": 0.039149776101112366, + "learning_rate": 1.282051282051282e-06, + "loss": 0.0047, + "step": 18750 + }, + { + "epoch": 20.677312775330396, + "grad_norm": 0.041222602128982544, + "learning_rate": 1.2564102564102565e-06, + "loss": 0.0049, + "step": 18775 + }, + { + "epoch": 20.704845814977972, + "grad_norm": 0.04520060867071152, + "learning_rate": 1.230769230769231e-06, + "loss": 0.005, + "step": 18800 + }, + { + "epoch": 20.73237885462555, + "grad_norm": 0.2069810926914215, + "learning_rate": 1.2051282051282053e-06, + "loss": 0.0051, + "step": 18825 + }, + { + "epoch": 20.759911894273127, + "grad_norm": 0.04224303737282753, + "learning_rate": 1.1794871794871795e-06, + "loss": 0.0049, + "step": 18850 + }, + { + "epoch": 20.787444933920703, + "grad_norm": 0.040397610515356064, + "learning_rate": 1.153846153846154e-06, + "loss": 0.0045, + "step": 18875 + }, + { + "epoch": 20.814977973568283, + "grad_norm": 0.037870801985263824, + "learning_rate": 1.1282051282051283e-06, + "loss": 0.0048, + "step": 18900 + }, + { + "epoch": 20.84251101321586, + "grad_norm": 0.04810772091150284, + "learning_rate": 1.1025641025641026e-06, + "loss": 0.0051, + "step": 18925 + }, + { + "epoch": 20.870044052863435, + "grad_norm": 0.045735545456409454, + "learning_rate": 1.076923076923077e-06, + "loss": 0.0047, + "step": 18950 + }, + { + "epoch": 20.897577092511014, + "grad_norm": 0.03935140371322632, + "learning_rate": 1.0512820512820514e-06, + "loss": 0.0048, + "step": 18975 + }, + { + "epoch": 20.92511013215859, + "grad_norm": 0.05066705495119095, + "learning_rate": 1.0256410256410257e-06, + "loss": 0.0052, + "step": 19000 + }, + { + "epoch": 20.92511013215859, + "eval_cer": 47.776644159893614, + "eval_loss": 0.9299447536468506, + "eval_runtime": 436.4027, + "eval_samples_per_second": 24.246, + "eval_steps_per_second": 6.063, + "eval_wer": 115.08722300801509, + "step": 19000 + }, + { + "epoch": 20.952643171806166, + "grad_norm": 0.04411695525050163, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.0049, + "step": 19025 + }, + { + "epoch": 20.980176211453745, + "grad_norm": 0.041007477790117264, + "learning_rate": 9.743589743589745e-07, + "loss": 0.0049, + "step": 19050 + }, + { + "epoch": 21.00770925110132, + "grad_norm": 0.03803296014666557, + "learning_rate": 9.487179487179487e-07, + "loss": 0.0048, + "step": 19075 + }, + { + "epoch": 21.035242290748897, + "grad_norm": 0.03167716786265373, + "learning_rate": 9.230769230769232e-07, + "loss": 0.0043, + "step": 19100 + }, + { + "epoch": 21.062775330396477, + "grad_norm": 0.04057995602488518, + "learning_rate": 8.974358974358975e-07, + "loss": 0.0044, + "step": 19125 + }, + { + "epoch": 21.090308370044053, + "grad_norm": 0.042665161192417145, + "learning_rate": 8.717948717948718e-07, + "loss": 0.0044, + "step": 19150 + }, + { + "epoch": 21.11784140969163, + "grad_norm": 0.031034432351589203, + "learning_rate": 8.461538461538463e-07, + "loss": 0.0042, + "step": 19175 + }, + { + "epoch": 21.145374449339208, + "grad_norm": 0.03610997274518013, + "learning_rate": 8.205128205128206e-07, + "loss": 0.0043, + "step": 19200 + }, + { + "epoch": 21.172907488986784, + "grad_norm": 0.03419085592031479, + "learning_rate": 7.948717948717949e-07, + "loss": 0.0044, + "step": 19225 + }, + { + "epoch": 21.20044052863436, + "grad_norm": 0.03195258602499962, + "learning_rate": 7.692307692307694e-07, + "loss": 0.0043, + "step": 19250 + }, + { + "epoch": 21.22797356828194, + "grad_norm": 0.033934228122234344, + "learning_rate": 7.435897435897436e-07, + "loss": 0.0044, + "step": 19275 + }, + { + "epoch": 21.255506607929515, + "grad_norm": 0.038121603429317474, + "learning_rate": 7.179487179487179e-07, + "loss": 0.0042, + "step": 19300 + }, + { + "epoch": 21.28303964757709, + "grad_norm": 0.042483534663915634, + "learning_rate": 6.923076923076924e-07, + "loss": 0.0043, + "step": 19325 + }, + { + "epoch": 21.31057268722467, + "grad_norm": 0.03081641159951687, + "learning_rate": 6.666666666666667e-07, + "loss": 0.0045, + "step": 19350 + }, + { + "epoch": 21.338105726872246, + "grad_norm": 0.038055986166000366, + "learning_rate": 6.41025641025641e-07, + "loss": 0.0045, + "step": 19375 + }, + { + "epoch": 21.365638766519822, + "grad_norm": 0.047620829194784164, + "learning_rate": 6.153846153846155e-07, + "loss": 0.0045, + "step": 19400 + }, + { + "epoch": 21.393171806167402, + "grad_norm": 0.04037508741021156, + "learning_rate": 5.897435897435898e-07, + "loss": 0.0046, + "step": 19425 + }, + { + "epoch": 21.420704845814978, + "grad_norm": 0.031555745750665665, + "learning_rate": 5.641025641025642e-07, + "loss": 0.0044, + "step": 19450 + }, + { + "epoch": 21.448237885462554, + "grad_norm": 0.040886688977479935, + "learning_rate": 5.384615384615386e-07, + "loss": 0.0045, + "step": 19475 + }, + { + "epoch": 21.475770925110133, + "grad_norm": 0.034373532980680466, + "learning_rate": 5.128205128205128e-07, + "loss": 0.0045, + "step": 19500 + }, + { + "epoch": 21.50330396475771, + "grad_norm": 0.03271722421050072, + "learning_rate": 4.871794871794872e-07, + "loss": 0.0044, + "step": 19525 + }, + { + "epoch": 21.530837004405285, + "grad_norm": 0.03823432698845863, + "learning_rate": 4.615384615384616e-07, + "loss": 0.0043, + "step": 19550 + }, + { + "epoch": 21.558370044052865, + "grad_norm": 0.03353292867541313, + "learning_rate": 4.358974358974359e-07, + "loss": 0.0041, + "step": 19575 + }, + { + "epoch": 21.58590308370044, + "grad_norm": 0.04605744779109955, + "learning_rate": 4.102564102564103e-07, + "loss": 0.0046, + "step": 19600 + }, + { + "epoch": 21.613436123348016, + "grad_norm": 0.0359153151512146, + "learning_rate": 3.846153846153847e-07, + "loss": 0.0047, + "step": 19625 + }, + { + "epoch": 21.640969162995596, + "grad_norm": 0.03662218898534775, + "learning_rate": 3.5897435897435896e-07, + "loss": 0.0044, + "step": 19650 + }, + { + "epoch": 21.66850220264317, + "grad_norm": 0.03287964314222336, + "learning_rate": 3.3333333333333335e-07, + "loss": 0.0043, + "step": 19675 + }, + { + "epoch": 21.696035242290748, + "grad_norm": 0.031979408115148544, + "learning_rate": 3.0769230769230774e-07, + "loss": 0.0043, + "step": 19700 + }, + { + "epoch": 21.723568281938327, + "grad_norm": 0.03703833371400833, + "learning_rate": 2.820512820512821e-07, + "loss": 0.005, + "step": 19725 + }, + { + "epoch": 21.751101321585903, + "grad_norm": 0.037790607661008835, + "learning_rate": 2.564102564102564e-07, + "loss": 0.0044, + "step": 19750 + }, + { + "epoch": 21.77863436123348, + "grad_norm": 0.04017505794763565, + "learning_rate": 2.307692307692308e-07, + "loss": 0.0044, + "step": 19775 + }, + { + "epoch": 21.80616740088106, + "grad_norm": 0.045976828783750534, + "learning_rate": 2.0512820512820514e-07, + "loss": 0.0043, + "step": 19800 + }, + { + "epoch": 21.833700440528634, + "grad_norm": 0.035648688673973083, + "learning_rate": 1.7948717948717948e-07, + "loss": 0.0043, + "step": 19825 + }, + { + "epoch": 21.86123348017621, + "grad_norm": 0.041394732892513275, + "learning_rate": 1.5384615384615387e-07, + "loss": 0.0044, + "step": 19850 + }, + { + "epoch": 21.88876651982379, + "grad_norm": 0.03750582039356232, + "learning_rate": 1.282051282051282e-07, + "loss": 0.0043, + "step": 19875 + }, + { + "epoch": 21.916299559471366, + "grad_norm": 0.037499021738767624, + "learning_rate": 1.0256410256410257e-07, + "loss": 0.0042, + "step": 19900 + }, + { + "epoch": 21.94383259911894, + "grad_norm": 0.04036805406212807, + "learning_rate": 7.692307692307694e-08, + "loss": 0.0044, + "step": 19925 + }, + { + "epoch": 21.97136563876652, + "grad_norm": 0.03988456726074219, + "learning_rate": 5.1282051282051286e-08, + "loss": 0.0043, + "step": 19950 + }, + { + "epoch": 21.998898678414097, + "grad_norm": 0.03900681063532829, + "learning_rate": 2.5641025641025643e-08, + "loss": 0.0047, + "step": 19975 + }, + { + "epoch": 22.026431718061673, + "grad_norm": 0.03660197928547859, + "learning_rate": 0.0, + "loss": 0.0043, + "step": 20000 + }, + { + "epoch": 22.026431718061673, + "eval_cer": 50.12380635768574, + "eval_loss": 0.932576060295105, + "eval_runtime": 448.5885, + "eval_samples_per_second": 23.587, + "eval_steps_per_second": 5.899, + "eval_wer": 115.25695426685525, + "step": 20000 + }, + { + "epoch": 22.026431718061673, + "step": 20000, + "total_flos": 1.5756085807389082e+19, + "train_loss": 0.1688191306591034, + "train_runtime": 17105.7932, + "train_samples_per_second": 37.414, + "train_steps_per_second": 1.169 + } + ], + "logging_steps": 25, + "max_steps": 20000, + "num_input_tokens_seen": 0, + "num_train_epochs": 23, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.5756085807389082e+19, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}