| { |
| "best_metric": 83.48892032060348, |
| "best_model_checkpoint": "./iteboshi_student_model_temp/checkpoint-12000", |
| "epoch": 14.317180616740089, |
| "eval_steps": 1000, |
| "global_step": 13000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02753303964757709, |
| "grad_norm": 4.478171348571777, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 11.1909, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.05506607929515418, |
| "grad_norm": 2.329022169113159, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 10.3969, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.08259911894273128, |
| "grad_norm": 2.1983585357666016, |
| "learning_rate": 3e-06, |
| "loss": 8.5836, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.11013215859030837, |
| "grad_norm": 1.6278852224349976, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 6.6181, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.13766519823788545, |
| "grad_norm": 2.1669721603393555, |
| "learning_rate": 5e-06, |
| "loss": 5.5522, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.16519823788546256, |
| "grad_norm": 1.5040589570999146, |
| "learning_rate": 6e-06, |
| "loss": 4.6043, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.19273127753303965, |
| "grad_norm": 1.5719494819641113, |
| "learning_rate": 7e-06, |
| "loss": 4.2552, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.22026431718061673, |
| "grad_norm": 1.7963982820510864, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 4.1074, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.24779735682819384, |
| "grad_norm": 1.4686423540115356, |
| "learning_rate": 9e-06, |
| "loss": 3.9698, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.2753303964757709, |
| "grad_norm": 1.724307894706726, |
| "learning_rate": 1e-05, |
| "loss": 3.8433, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.30286343612334804, |
| "grad_norm": 1.8229764699935913, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 3.7012, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.3303964757709251, |
| "grad_norm": 1.511186957359314, |
| "learning_rate": 1.2e-05, |
| "loss": 3.5982, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.3579295154185022, |
| "grad_norm": 1.5931724309921265, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 3.4969, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.3854625550660793, |
| "grad_norm": 1.491697907447815, |
| "learning_rate": 1.4e-05, |
| "loss": 3.4174, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.4129955947136564, |
| "grad_norm": 1.6408978700637817, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 3.3055, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.44052863436123346, |
| "grad_norm": 1.9127188920974731, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 3.1426, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.46806167400881055, |
| "grad_norm": 1.61039137840271, |
| "learning_rate": 1.7e-05, |
| "loss": 3.0495, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.4955947136563877, |
| "grad_norm": 1.7420951128005981, |
| "learning_rate": 1.8e-05, |
| "loss": 2.9459, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.5231277533039648, |
| "grad_norm": 1.6728287935256958, |
| "learning_rate": 1.9e-05, |
| "loss": 2.8005, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.5506607929515418, |
| "grad_norm": 1.5474518537521362, |
| "learning_rate": 2e-05, |
| "loss": 2.5985, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5781938325991189, |
| "grad_norm": 2.028834342956543, |
| "learning_rate": 1.9974358974358975e-05, |
| "loss": 2.482, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.6057268722466961, |
| "grad_norm": 1.9661887884140015, |
| "learning_rate": 1.994871794871795e-05, |
| "loss": 2.318, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.6332599118942731, |
| "grad_norm": 1.7340378761291504, |
| "learning_rate": 1.9923076923076926e-05, |
| "loss": 2.1969, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.6607929515418502, |
| "grad_norm": 1.4609960317611694, |
| "learning_rate": 1.98974358974359e-05, |
| "loss": 2.0461, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6883259911894273, |
| "grad_norm": 1.7009024620056152, |
| "learning_rate": 1.9871794871794873e-05, |
| "loss": 1.9371, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.7158590308370044, |
| "grad_norm": 1.9823498725891113, |
| "learning_rate": 1.9846153846153847e-05, |
| "loss": 1.8849, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.7433920704845814, |
| "grad_norm": 1.3274204730987549, |
| "learning_rate": 1.9820512820512824e-05, |
| "loss": 1.7659, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.7709251101321586, |
| "grad_norm": 1.5015735626220703, |
| "learning_rate": 1.9794871794871798e-05, |
| "loss": 1.685, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.7984581497797357, |
| "grad_norm": 1.4172351360321045, |
| "learning_rate": 1.976923076923077e-05, |
| "loss": 1.6873, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.8259911894273128, |
| "grad_norm": 1.688879132270813, |
| "learning_rate": 1.9743589743589745e-05, |
| "loss": 1.6332, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.8535242290748899, |
| "grad_norm": 1.6498134136199951, |
| "learning_rate": 1.9717948717948722e-05, |
| "loss": 1.5008, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.8810572687224669, |
| "grad_norm": 1.9401237964630127, |
| "learning_rate": 1.9692307692307696e-05, |
| "loss": 1.526, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.9085903083700441, |
| "grad_norm": 1.6347532272338867, |
| "learning_rate": 1.9666666666666666e-05, |
| "loss": 1.3635, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.9361233480176211, |
| "grad_norm": 1.622261643409729, |
| "learning_rate": 1.9641025641025643e-05, |
| "loss": 1.3529, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.9636563876651982, |
| "grad_norm": 1.1809067726135254, |
| "learning_rate": 1.9615384615384617e-05, |
| "loss": 1.369, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.9911894273127754, |
| "grad_norm": 1.718253493309021, |
| "learning_rate": 1.958974358974359e-05, |
| "loss": 1.3382, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.0187224669603525, |
| "grad_norm": 1.380740761756897, |
| "learning_rate": 1.9564102564102564e-05, |
| "loss": 1.2302, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.0462555066079295, |
| "grad_norm": 1.4687336683273315, |
| "learning_rate": 1.953846153846154e-05, |
| "loss": 1.1042, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.0737885462555066, |
| "grad_norm": 1.4777907133102417, |
| "learning_rate": 1.9512820512820515e-05, |
| "loss": 1.1084, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.1013215859030836, |
| "grad_norm": 1.3653723001480103, |
| "learning_rate": 1.9487179487179488e-05, |
| "loss": 1.0832, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.1013215859030836, |
| "eval_cer": 49.70538672291449, |
| "eval_loss": 1.242371678352356, |
| "eval_runtime": 1446.0207, |
| "eval_samples_per_second": 7.317, |
| "eval_steps_per_second": 1.83, |
| "eval_wer": 97.0957095709571, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.1288546255506609, |
| "grad_norm": 1.432028889656067, |
| "learning_rate": 1.9461538461538462e-05, |
| "loss": 1.0547, |
| "step": 1025 |
| }, |
| { |
| "epoch": 1.1563876651982379, |
| "grad_norm": 1.4628076553344727, |
| "learning_rate": 1.943589743589744e-05, |
| "loss": 1.0737, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.183920704845815, |
| "grad_norm": 1.3926039934158325, |
| "learning_rate": 1.9410256410256413e-05, |
| "loss": 1.0366, |
| "step": 1075 |
| }, |
| { |
| "epoch": 1.2114537444933922, |
| "grad_norm": 2.2188920974731445, |
| "learning_rate": 1.9384615384615386e-05, |
| "loss": 1.0043, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.2389867841409692, |
| "grad_norm": 2.169039726257324, |
| "learning_rate": 1.935897435897436e-05, |
| "loss": 1.0657, |
| "step": 1125 |
| }, |
| { |
| "epoch": 1.2665198237885462, |
| "grad_norm": 1.8362762928009033, |
| "learning_rate": 1.9333333333333333e-05, |
| "loss": 0.9749, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.2940528634361232, |
| "grad_norm": 1.239357352256775, |
| "learning_rate": 1.930769230769231e-05, |
| "loss": 0.9658, |
| "step": 1175 |
| }, |
| { |
| "epoch": 1.3215859030837005, |
| "grad_norm": 1.2631865739822388, |
| "learning_rate": 1.9282051282051284e-05, |
| "loss": 0.9698, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.3491189427312775, |
| "grad_norm": 1.3246465921401978, |
| "learning_rate": 1.9256410256410258e-05, |
| "loss": 0.9609, |
| "step": 1225 |
| }, |
| { |
| "epoch": 1.3766519823788546, |
| "grad_norm": 1.2062760591506958, |
| "learning_rate": 1.923076923076923e-05, |
| "loss": 0.9763, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.4041850220264318, |
| "grad_norm": 2.449249744415283, |
| "learning_rate": 1.920512820512821e-05, |
| "loss": 0.9281, |
| "step": 1275 |
| }, |
| { |
| "epoch": 1.4317180616740088, |
| "grad_norm": 1.5830440521240234, |
| "learning_rate": 1.9179487179487182e-05, |
| "loss": 0.9049, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.4592511013215859, |
| "grad_norm": 1.3640918731689453, |
| "learning_rate": 1.9153846153846156e-05, |
| "loss": 0.9787, |
| "step": 1325 |
| }, |
| { |
| "epoch": 1.4867841409691631, |
| "grad_norm": 1.2333142757415771, |
| "learning_rate": 1.912820512820513e-05, |
| "loss": 0.9326, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.51431718061674, |
| "grad_norm": 1.9163013696670532, |
| "learning_rate": 1.9102564102564106e-05, |
| "loss": 0.9313, |
| "step": 1375 |
| }, |
| { |
| "epoch": 1.5418502202643172, |
| "grad_norm": 1.2806075811386108, |
| "learning_rate": 1.907692307692308e-05, |
| "loss": 0.9362, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.5693832599118944, |
| "grad_norm": 1.2817801237106323, |
| "learning_rate": 1.905128205128205e-05, |
| "loss": 0.9065, |
| "step": 1425 |
| }, |
| { |
| "epoch": 1.5969162995594712, |
| "grad_norm": 1.1079732179641724, |
| "learning_rate": 1.9025641025641027e-05, |
| "loss": 0.9041, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.6244493392070485, |
| "grad_norm": 1.439504861831665, |
| "learning_rate": 1.9e-05, |
| "loss": 0.8902, |
| "step": 1475 |
| }, |
| { |
| "epoch": 1.6519823788546255, |
| "grad_norm": 1.3412843942642212, |
| "learning_rate": 1.8974358974358975e-05, |
| "loss": 0.833, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.6795154185022025, |
| "grad_norm": 1.084276556968689, |
| "learning_rate": 1.894871794871795e-05, |
| "loss": 0.8751, |
| "step": 1525 |
| }, |
| { |
| "epoch": 1.7070484581497798, |
| "grad_norm": 1.2395702600479126, |
| "learning_rate": 1.8923076923076925e-05, |
| "loss": 0.8539, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.7345814977973568, |
| "grad_norm": 1.8642207384109497, |
| "learning_rate": 1.88974358974359e-05, |
| "loss": 0.8383, |
| "step": 1575 |
| }, |
| { |
| "epoch": 1.7621145374449338, |
| "grad_norm": 0.9966503977775574, |
| "learning_rate": 1.8871794871794873e-05, |
| "loss": 0.8346, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.789647577092511, |
| "grad_norm": 1.464998722076416, |
| "learning_rate": 1.8846153846153846e-05, |
| "loss": 0.8721, |
| "step": 1625 |
| }, |
| { |
| "epoch": 1.8171806167400881, |
| "grad_norm": 1.3232594728469849, |
| "learning_rate": 1.8820512820512823e-05, |
| "loss": 0.8706, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.8447136563876652, |
| "grad_norm": 1.2619857788085938, |
| "learning_rate": 1.8794871794871797e-05, |
| "loss": 0.8774, |
| "step": 1675 |
| }, |
| { |
| "epoch": 1.8722466960352424, |
| "grad_norm": 1.0801773071289062, |
| "learning_rate": 1.876923076923077e-05, |
| "loss": 0.8175, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.8997797356828194, |
| "grad_norm": 1.763427734375, |
| "learning_rate": 1.8743589743589744e-05, |
| "loss": 0.8096, |
| "step": 1725 |
| }, |
| { |
| "epoch": 1.9273127753303965, |
| "grad_norm": 1.1805825233459473, |
| "learning_rate": 1.8717948717948718e-05, |
| "loss": 0.8209, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.9548458149779737, |
| "grad_norm": 1.342016577720642, |
| "learning_rate": 1.8692307692307695e-05, |
| "loss": 0.8195, |
| "step": 1775 |
| }, |
| { |
| "epoch": 1.9823788546255505, |
| "grad_norm": 1.4250379800796509, |
| "learning_rate": 1.866666666666667e-05, |
| "loss": 0.7972, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.0099118942731278, |
| "grad_norm": 1.0316681861877441, |
| "learning_rate": 1.8641025641025642e-05, |
| "loss": 0.7532, |
| "step": 1825 |
| }, |
| { |
| "epoch": 2.037444933920705, |
| "grad_norm": 0.9474232196807861, |
| "learning_rate": 1.8615384615384616e-05, |
| "loss": 0.5916, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.064977973568282, |
| "grad_norm": 1.3454560041427612, |
| "learning_rate": 1.8589743589743593e-05, |
| "loss": 0.5828, |
| "step": 1875 |
| }, |
| { |
| "epoch": 2.092511013215859, |
| "grad_norm": 0.8348492980003357, |
| "learning_rate": 1.8564102564102567e-05, |
| "loss": 0.5755, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.1200440528634363, |
| "grad_norm": 0.9976056814193726, |
| "learning_rate": 1.853846153846154e-05, |
| "loss": 0.6259, |
| "step": 1925 |
| }, |
| { |
| "epoch": 2.147577092511013, |
| "grad_norm": 0.8830908536911011, |
| "learning_rate": 1.8512820512820514e-05, |
| "loss": 0.5476, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.1751101321585904, |
| "grad_norm": 1.018017292022705, |
| "learning_rate": 1.848717948717949e-05, |
| "loss": 0.5775, |
| "step": 1975 |
| }, |
| { |
| "epoch": 2.202643171806167, |
| "grad_norm": 1.001952052116394, |
| "learning_rate": 1.8461538461538465e-05, |
| "loss": 0.5945, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.202643171806167, |
| "eval_cer": 39.26037164834409, |
| "eval_loss": 0.8650864958763123, |
| "eval_runtime": 1455.0852, |
| "eval_samples_per_second": 7.272, |
| "eval_steps_per_second": 1.818, |
| "eval_wer": 91.02310231023102, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.2301762114537445, |
| "grad_norm": 1.1084239482879639, |
| "learning_rate": 1.8435897435897435e-05, |
| "loss": 0.5743, |
| "step": 2025 |
| }, |
| { |
| "epoch": 2.2577092511013217, |
| "grad_norm": 1.2013578414916992, |
| "learning_rate": 1.8410256410256412e-05, |
| "loss": 0.5737, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.2852422907488985, |
| "grad_norm": 0.7342941761016846, |
| "learning_rate": 1.8384615384615386e-05, |
| "loss": 0.5621, |
| "step": 2075 |
| }, |
| { |
| "epoch": 2.3127753303964758, |
| "grad_norm": 2.2131869792938232, |
| "learning_rate": 1.835897435897436e-05, |
| "loss": 0.5863, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.340308370044053, |
| "grad_norm": 1.2190771102905273, |
| "learning_rate": 1.8333333333333333e-05, |
| "loss": 0.5353, |
| "step": 2125 |
| }, |
| { |
| "epoch": 2.36784140969163, |
| "grad_norm": 1.2508844137191772, |
| "learning_rate": 1.830769230769231e-05, |
| "loss": 0.546, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.395374449339207, |
| "grad_norm": 1.3966801166534424, |
| "learning_rate": 1.8282051282051284e-05, |
| "loss": 0.5745, |
| "step": 2175 |
| }, |
| { |
| "epoch": 2.4229074889867843, |
| "grad_norm": 1.0170118808746338, |
| "learning_rate": 1.8256410256410257e-05, |
| "loss": 0.534, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.450440528634361, |
| "grad_norm": 0.9630802869796753, |
| "learning_rate": 1.823076923076923e-05, |
| "loss": 0.5307, |
| "step": 2225 |
| }, |
| { |
| "epoch": 2.4779735682819384, |
| "grad_norm": 1.943258285522461, |
| "learning_rate": 1.8205128205128208e-05, |
| "loss": 0.558, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.505506607929515, |
| "grad_norm": 1.0180917978286743, |
| "learning_rate": 1.817948717948718e-05, |
| "loss": 0.5996, |
| "step": 2275 |
| }, |
| { |
| "epoch": 2.5330396475770924, |
| "grad_norm": 0.8535317778587341, |
| "learning_rate": 1.8153846153846155e-05, |
| "loss": 0.5679, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.5605726872246697, |
| "grad_norm": 1.0330489873886108, |
| "learning_rate": 1.812820512820513e-05, |
| "loss": 0.549, |
| "step": 2325 |
| }, |
| { |
| "epoch": 2.5881057268722465, |
| "grad_norm": 1.1753156185150146, |
| "learning_rate": 1.8102564102564102e-05, |
| "loss": 0.5496, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.6156387665198237, |
| "grad_norm": 1.1847915649414062, |
| "learning_rate": 1.807692307692308e-05, |
| "loss": 0.574, |
| "step": 2375 |
| }, |
| { |
| "epoch": 2.643171806167401, |
| "grad_norm": 0.9923620223999023, |
| "learning_rate": 1.8051282051282053e-05, |
| "loss": 0.5257, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.670704845814978, |
| "grad_norm": 1.3621445894241333, |
| "learning_rate": 1.8025641025641027e-05, |
| "loss": 0.5196, |
| "step": 2425 |
| }, |
| { |
| "epoch": 2.698237885462555, |
| "grad_norm": 0.9695484638214111, |
| "learning_rate": 1.8e-05, |
| "loss": 0.5813, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.7257709251101323, |
| "grad_norm": 1.0030736923217773, |
| "learning_rate": 1.7974358974358977e-05, |
| "loss": 0.5143, |
| "step": 2475 |
| }, |
| { |
| "epoch": 2.753303964757709, |
| "grad_norm": 1.0027751922607422, |
| "learning_rate": 1.794871794871795e-05, |
| "loss": 0.5893, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.7808370044052864, |
| "grad_norm": 1.7140127420425415, |
| "learning_rate": 1.7923076923076925e-05, |
| "loss": 0.5449, |
| "step": 2525 |
| }, |
| { |
| "epoch": 2.8083700440528636, |
| "grad_norm": 0.9435563683509827, |
| "learning_rate": 1.78974358974359e-05, |
| "loss": 0.4986, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.8359030837004404, |
| "grad_norm": 0.9085638523101807, |
| "learning_rate": 1.7871794871794875e-05, |
| "loss": 0.5328, |
| "step": 2575 |
| }, |
| { |
| "epoch": 2.8634361233480177, |
| "grad_norm": 0.9814099073410034, |
| "learning_rate": 1.784615384615385e-05, |
| "loss": 0.5536, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.890969162995595, |
| "grad_norm": 1.0650631189346313, |
| "learning_rate": 1.7820512820512823e-05, |
| "loss": 0.5093, |
| "step": 2625 |
| }, |
| { |
| "epoch": 2.9185022026431717, |
| "grad_norm": 0.7566800713539124, |
| "learning_rate": 1.7794871794871796e-05, |
| "loss": 0.5247, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.946035242290749, |
| "grad_norm": 0.8712677359580994, |
| "learning_rate": 1.776923076923077e-05, |
| "loss": 0.5367, |
| "step": 2675 |
| }, |
| { |
| "epoch": 2.9735682819383262, |
| "grad_norm": 1.1241258382797241, |
| "learning_rate": 1.7743589743589744e-05, |
| "loss": 0.5001, |
| "step": 2700 |
| }, |
| { |
| "epoch": 3.001101321585903, |
| "grad_norm": 0.8537120223045349, |
| "learning_rate": 1.7717948717948717e-05, |
| "loss": 0.557, |
| "step": 2725 |
| }, |
| { |
| "epoch": 3.0286343612334803, |
| "grad_norm": 0.7500240802764893, |
| "learning_rate": 1.7692307692307694e-05, |
| "loss": 0.3894, |
| "step": 2750 |
| }, |
| { |
| "epoch": 3.056167400881057, |
| "grad_norm": 0.7330994009971619, |
| "learning_rate": 1.7666666666666668e-05, |
| "loss": 0.3448, |
| "step": 2775 |
| }, |
| { |
| "epoch": 3.0837004405286343, |
| "grad_norm": 0.7797436118125916, |
| "learning_rate": 1.7641025641025642e-05, |
| "loss": 0.3588, |
| "step": 2800 |
| }, |
| { |
| "epoch": 3.1112334801762116, |
| "grad_norm": 0.6182436943054199, |
| "learning_rate": 1.7615384615384615e-05, |
| "loss": 0.3464, |
| "step": 2825 |
| }, |
| { |
| "epoch": 3.1387665198237884, |
| "grad_norm": 0.8365157246589661, |
| "learning_rate": 1.7589743589743592e-05, |
| "loss": 0.3481, |
| "step": 2850 |
| }, |
| { |
| "epoch": 3.1662995594713657, |
| "grad_norm": 0.941464900970459, |
| "learning_rate": 1.7564102564102566e-05, |
| "loss": 0.3727, |
| "step": 2875 |
| }, |
| { |
| "epoch": 3.193832599118943, |
| "grad_norm": 1.6682766675949097, |
| "learning_rate": 1.753846153846154e-05, |
| "loss": 0.3702, |
| "step": 2900 |
| }, |
| { |
| "epoch": 3.2213656387665197, |
| "grad_norm": 0.7901954650878906, |
| "learning_rate": 1.7512820512820513e-05, |
| "loss": 0.3722, |
| "step": 2925 |
| }, |
| { |
| "epoch": 3.248898678414097, |
| "grad_norm": 0.8976119756698608, |
| "learning_rate": 1.7487179487179487e-05, |
| "loss": 0.372, |
| "step": 2950 |
| }, |
| { |
| "epoch": 3.2764317180616738, |
| "grad_norm": 0.7055606842041016, |
| "learning_rate": 1.7461538461538464e-05, |
| "loss": 0.3566, |
| "step": 2975 |
| }, |
| { |
| "epoch": 3.303964757709251, |
| "grad_norm": 0.9473551511764526, |
| "learning_rate": 1.7435897435897438e-05, |
| "loss": 0.3381, |
| "step": 3000 |
| }, |
| { |
| "epoch": 3.303964757709251, |
| "eval_cer": 30.075774076324326, |
| "eval_loss": 0.7787998914718628, |
| "eval_runtime": 1321.0503, |
| "eval_samples_per_second": 8.01, |
| "eval_steps_per_second": 2.003, |
| "eval_wer": 87.96793965110797, |
| "step": 3000 |
| }, |
| { |
| "epoch": 3.3314977973568283, |
| "grad_norm": 1.1943602561950684, |
| "learning_rate": 1.741025641025641e-05, |
| "loss": 0.3608, |
| "step": 3025 |
| }, |
| { |
| "epoch": 3.359030837004405, |
| "grad_norm": 1.0215487480163574, |
| "learning_rate": 1.7384615384615385e-05, |
| "loss": 0.3988, |
| "step": 3050 |
| }, |
| { |
| "epoch": 3.3865638766519823, |
| "grad_norm": 2.3731114864349365, |
| "learning_rate": 1.7358974358974362e-05, |
| "loss": 0.3593, |
| "step": 3075 |
| }, |
| { |
| "epoch": 3.4140969162995596, |
| "grad_norm": 0.7270675897598267, |
| "learning_rate": 1.7333333333333336e-05, |
| "loss": 0.4127, |
| "step": 3100 |
| }, |
| { |
| "epoch": 3.4416299559471364, |
| "grad_norm": 0.8125990033149719, |
| "learning_rate": 1.730769230769231e-05, |
| "loss": 0.3776, |
| "step": 3125 |
| }, |
| { |
| "epoch": 3.4691629955947136, |
| "grad_norm": 0.7871472239494324, |
| "learning_rate": 1.7282051282051283e-05, |
| "loss": 0.354, |
| "step": 3150 |
| }, |
| { |
| "epoch": 3.496696035242291, |
| "grad_norm": 0.9689500331878662, |
| "learning_rate": 1.725641025641026e-05, |
| "loss": 0.3755, |
| "step": 3175 |
| }, |
| { |
| "epoch": 3.5242290748898677, |
| "grad_norm": 0.8293836712837219, |
| "learning_rate": 1.7230769230769234e-05, |
| "loss": 0.3786, |
| "step": 3200 |
| }, |
| { |
| "epoch": 3.551762114537445, |
| "grad_norm": 0.787414014339447, |
| "learning_rate": 1.7205128205128207e-05, |
| "loss": 0.3491, |
| "step": 3225 |
| }, |
| { |
| "epoch": 3.579295154185022, |
| "grad_norm": 0.8381183743476868, |
| "learning_rate": 1.717948717948718e-05, |
| "loss": 0.3497, |
| "step": 3250 |
| }, |
| { |
| "epoch": 3.606828193832599, |
| "grad_norm": 0.6920949816703796, |
| "learning_rate": 1.7153846153846155e-05, |
| "loss": 0.3841, |
| "step": 3275 |
| }, |
| { |
| "epoch": 3.6343612334801763, |
| "grad_norm": 1.0415260791778564, |
| "learning_rate": 1.7128205128205128e-05, |
| "loss": 0.383, |
| "step": 3300 |
| }, |
| { |
| "epoch": 3.6618942731277535, |
| "grad_norm": 0.848689079284668, |
| "learning_rate": 1.7102564102564102e-05, |
| "loss": 0.3278, |
| "step": 3325 |
| }, |
| { |
| "epoch": 3.6894273127753303, |
| "grad_norm": 0.850678026676178, |
| "learning_rate": 1.707692307692308e-05, |
| "loss": 0.3625, |
| "step": 3350 |
| }, |
| { |
| "epoch": 3.7169603524229076, |
| "grad_norm": 0.7438422441482544, |
| "learning_rate": 1.7051282051282053e-05, |
| "loss": 0.3177, |
| "step": 3375 |
| }, |
| { |
| "epoch": 3.744493392070485, |
| "grad_norm": 0.9074159264564514, |
| "learning_rate": 1.7025641025641026e-05, |
| "loss": 0.3646, |
| "step": 3400 |
| }, |
| { |
| "epoch": 3.7720264317180616, |
| "grad_norm": 0.7645012736320496, |
| "learning_rate": 1.7e-05, |
| "loss": 0.3503, |
| "step": 3425 |
| }, |
| { |
| "epoch": 3.799559471365639, |
| "grad_norm": 0.7805877327919006, |
| "learning_rate": 1.6974358974358977e-05, |
| "loss": 0.3463, |
| "step": 3450 |
| }, |
| { |
| "epoch": 3.827092511013216, |
| "grad_norm": 0.7890722751617432, |
| "learning_rate": 1.694871794871795e-05, |
| "loss": 0.342, |
| "step": 3475 |
| }, |
| { |
| "epoch": 3.854625550660793, |
| "grad_norm": 0.8052636384963989, |
| "learning_rate": 1.6923076923076924e-05, |
| "loss": 0.409, |
| "step": 3500 |
| }, |
| { |
| "epoch": 3.88215859030837, |
| "grad_norm": 0.930600643157959, |
| "learning_rate": 1.6897435897435898e-05, |
| "loss": 0.3418, |
| "step": 3525 |
| }, |
| { |
| "epoch": 3.909691629955947, |
| "grad_norm": 0.9123186469078064, |
| "learning_rate": 1.687179487179487e-05, |
| "loss": 0.3632, |
| "step": 3550 |
| }, |
| { |
| "epoch": 3.9372246696035242, |
| "grad_norm": 0.7236383557319641, |
| "learning_rate": 1.684615384615385e-05, |
| "loss": 0.3868, |
| "step": 3575 |
| }, |
| { |
| "epoch": 3.964757709251101, |
| "grad_norm": 0.7813886404037476, |
| "learning_rate": 1.6820512820512822e-05, |
| "loss": 0.3713, |
| "step": 3600 |
| }, |
| { |
| "epoch": 3.9922907488986783, |
| "grad_norm": 1.0662919282913208, |
| "learning_rate": 1.6794871794871796e-05, |
| "loss": 0.3781, |
| "step": 3625 |
| }, |
| { |
| "epoch": 4.0198237885462555, |
| "grad_norm": 0.6088477373123169, |
| "learning_rate": 1.676923076923077e-05, |
| "loss": 0.2498, |
| "step": 3650 |
| }, |
| { |
| "epoch": 4.047356828193832, |
| "grad_norm": 0.7222326397895813, |
| "learning_rate": 1.6743589743589747e-05, |
| "loss": 0.2371, |
| "step": 3675 |
| }, |
| { |
| "epoch": 4.07488986784141, |
| "grad_norm": 0.6792022585868835, |
| "learning_rate": 1.671794871794872e-05, |
| "loss": 0.2171, |
| "step": 3700 |
| }, |
| { |
| "epoch": 4.102422907488987, |
| "grad_norm": 0.5797551274299622, |
| "learning_rate": 1.6692307692307694e-05, |
| "loss": 0.2476, |
| "step": 3725 |
| }, |
| { |
| "epoch": 4.129955947136564, |
| "grad_norm": 0.7084588408470154, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 0.2364, |
| "step": 3750 |
| }, |
| { |
| "epoch": 4.157488986784141, |
| "grad_norm": 0.6838762164115906, |
| "learning_rate": 1.6641025641025645e-05, |
| "loss": 0.2214, |
| "step": 3775 |
| }, |
| { |
| "epoch": 4.185022026431718, |
| "grad_norm": 1.180957317352295, |
| "learning_rate": 1.6615384615384618e-05, |
| "loss": 0.1928, |
| "step": 3800 |
| }, |
| { |
| "epoch": 4.212555066079295, |
| "grad_norm": 0.7658706903457642, |
| "learning_rate": 1.6589743589743592e-05, |
| "loss": 0.2226, |
| "step": 3825 |
| }, |
| { |
| "epoch": 4.240088105726873, |
| "grad_norm": 0.6503696441650391, |
| "learning_rate": 1.6564102564102565e-05, |
| "loss": 0.2154, |
| "step": 3850 |
| }, |
| { |
| "epoch": 4.2676211453744495, |
| "grad_norm": 0.6417099237442017, |
| "learning_rate": 1.653846153846154e-05, |
| "loss": 0.256, |
| "step": 3875 |
| }, |
| { |
| "epoch": 4.295154185022026, |
| "grad_norm": 0.9082858562469482, |
| "learning_rate": 1.6512820512820513e-05, |
| "loss": 0.2386, |
| "step": 3900 |
| }, |
| { |
| "epoch": 4.322687224669604, |
| "grad_norm": 0.6636332273483276, |
| "learning_rate": 1.6487179487179486e-05, |
| "loss": 0.2556, |
| "step": 3925 |
| }, |
| { |
| "epoch": 4.350220264317181, |
| "grad_norm": 0.7827299237251282, |
| "learning_rate": 1.6461538461538463e-05, |
| "loss": 0.2346, |
| "step": 3950 |
| }, |
| { |
| "epoch": 4.377753303964758, |
| "grad_norm": 0.5680423378944397, |
| "learning_rate": 1.6435897435897437e-05, |
| "loss": 0.2628, |
| "step": 3975 |
| }, |
| { |
| "epoch": 4.405286343612334, |
| "grad_norm": 0.7307997345924377, |
| "learning_rate": 1.641025641025641e-05, |
| "loss": 0.2463, |
| "step": 4000 |
| }, |
| { |
| "epoch": 4.405286343612334, |
| "eval_cer": 27.274655233684502, |
| "eval_loss": 0.7554384469985962, |
| "eval_runtime": 1304.6023, |
| "eval_samples_per_second": 8.111, |
| "eval_steps_per_second": 2.028, |
| "eval_wer": 86.4875058934465, |
| "step": 4000 |
| }, |
| { |
| "epoch": 4.432819383259912, |
| "grad_norm": 0.6854081153869629, |
| "learning_rate": 1.6384615384615384e-05, |
| "loss": 0.2609, |
| "step": 4025 |
| }, |
| { |
| "epoch": 4.460352422907489, |
| "grad_norm": 0.8021137714385986, |
| "learning_rate": 1.635897435897436e-05, |
| "loss": 0.2604, |
| "step": 4050 |
| }, |
| { |
| "epoch": 4.487885462555066, |
| "grad_norm": 0.9230350255966187, |
| "learning_rate": 1.6333333333333335e-05, |
| "loss": 0.2594, |
| "step": 4075 |
| }, |
| { |
| "epoch": 4.515418502202643, |
| "grad_norm": 0.8251164555549622, |
| "learning_rate": 1.630769230769231e-05, |
| "loss": 0.2191, |
| "step": 4100 |
| }, |
| { |
| "epoch": 4.54295154185022, |
| "grad_norm": 0.7396982312202454, |
| "learning_rate": 1.6282051282051282e-05, |
| "loss": 0.2508, |
| "step": 4125 |
| }, |
| { |
| "epoch": 4.570484581497797, |
| "grad_norm": 0.7925761938095093, |
| "learning_rate": 1.625641025641026e-05, |
| "loss": 0.2603, |
| "step": 4150 |
| }, |
| { |
| "epoch": 4.598017621145375, |
| "grad_norm": 0.6864134669303894, |
| "learning_rate": 1.6230769230769233e-05, |
| "loss": 0.249, |
| "step": 4175 |
| }, |
| { |
| "epoch": 4.6255506607929515, |
| "grad_norm": 0.5276267528533936, |
| "learning_rate": 1.6205128205128207e-05, |
| "loss": 0.2445, |
| "step": 4200 |
| }, |
| { |
| "epoch": 4.653083700440528, |
| "grad_norm": 1.1504285335540771, |
| "learning_rate": 1.617948717948718e-05, |
| "loss": 0.2383, |
| "step": 4225 |
| }, |
| { |
| "epoch": 4.680616740088106, |
| "grad_norm": 0.7452952861785889, |
| "learning_rate": 1.6153846153846154e-05, |
| "loss": 0.2319, |
| "step": 4250 |
| }, |
| { |
| "epoch": 4.708149779735683, |
| "grad_norm": 0.5664868950843811, |
| "learning_rate": 1.612820512820513e-05, |
| "loss": 0.2386, |
| "step": 4275 |
| }, |
| { |
| "epoch": 4.73568281938326, |
| "grad_norm": 0.7903388738632202, |
| "learning_rate": 1.6102564102564105e-05, |
| "loss": 0.2476, |
| "step": 4300 |
| }, |
| { |
| "epoch": 4.763215859030837, |
| "grad_norm": 0.6549268364906311, |
| "learning_rate": 1.607692307692308e-05, |
| "loss": 0.2392, |
| "step": 4325 |
| }, |
| { |
| "epoch": 4.790748898678414, |
| "grad_norm": 0.6780884265899658, |
| "learning_rate": 1.6051282051282052e-05, |
| "loss": 0.2354, |
| "step": 4350 |
| }, |
| { |
| "epoch": 4.818281938325991, |
| "grad_norm": 0.9416743516921997, |
| "learning_rate": 1.602564102564103e-05, |
| "loss": 0.2514, |
| "step": 4375 |
| }, |
| { |
| "epoch": 4.845814977973569, |
| "grad_norm": 0.5058385133743286, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.2173, |
| "step": 4400 |
| }, |
| { |
| "epoch": 4.8733480176211454, |
| "grad_norm": 0.6523875594139099, |
| "learning_rate": 1.5974358974358976e-05, |
| "loss": 0.2323, |
| "step": 4425 |
| }, |
| { |
| "epoch": 4.900881057268722, |
| "grad_norm": 0.6458995342254639, |
| "learning_rate": 1.594871794871795e-05, |
| "loss": 0.242, |
| "step": 4450 |
| }, |
| { |
| "epoch": 4.9284140969163, |
| "grad_norm": 0.5865331292152405, |
| "learning_rate": 1.5923076923076924e-05, |
| "loss": 0.2181, |
| "step": 4475 |
| }, |
| { |
| "epoch": 4.955947136563877, |
| "grad_norm": 0.5348775386810303, |
| "learning_rate": 1.5897435897435897e-05, |
| "loss": 0.2379, |
| "step": 4500 |
| }, |
| { |
| "epoch": 4.983480176211454, |
| "grad_norm": 0.6599372029304504, |
| "learning_rate": 1.587179487179487e-05, |
| "loss": 0.2482, |
| "step": 4525 |
| }, |
| { |
| "epoch": 5.011013215859031, |
| "grad_norm": 0.625035285949707, |
| "learning_rate": 1.5846153846153848e-05, |
| "loss": 0.1922, |
| "step": 4550 |
| }, |
| { |
| "epoch": 5.038546255506608, |
| "grad_norm": 0.6018031239509583, |
| "learning_rate": 1.582051282051282e-05, |
| "loss": 0.1687, |
| "step": 4575 |
| }, |
| { |
| "epoch": 5.066079295154185, |
| "grad_norm": 0.6142588257789612, |
| "learning_rate": 1.5794871794871795e-05, |
| "loss": 0.1399, |
| "step": 4600 |
| }, |
| { |
| "epoch": 5.093612334801762, |
| "grad_norm": 0.445803701877594, |
| "learning_rate": 1.576923076923077e-05, |
| "loss": 0.1796, |
| "step": 4625 |
| }, |
| { |
| "epoch": 5.121145374449339, |
| "grad_norm": 0.4978330135345459, |
| "learning_rate": 1.5743589743589746e-05, |
| "loss": 0.1716, |
| "step": 4650 |
| }, |
| { |
| "epoch": 5.148678414096916, |
| "grad_norm": 0.43810775876045227, |
| "learning_rate": 1.571794871794872e-05, |
| "loss": 0.176, |
| "step": 4675 |
| }, |
| { |
| "epoch": 5.176211453744493, |
| "grad_norm": 0.5028232336044312, |
| "learning_rate": 1.5692307692307693e-05, |
| "loss": 0.1435, |
| "step": 4700 |
| }, |
| { |
| "epoch": 5.203744493392071, |
| "grad_norm": 0.3771626949310303, |
| "learning_rate": 1.5666666666666667e-05, |
| "loss": 0.1577, |
| "step": 4725 |
| }, |
| { |
| "epoch": 5.2312775330396475, |
| "grad_norm": 0.823710024356842, |
| "learning_rate": 1.5641025641025644e-05, |
| "loss": 0.1346, |
| "step": 4750 |
| }, |
| { |
| "epoch": 5.258810572687224, |
| "grad_norm": 0.4752519428730011, |
| "learning_rate": 1.5615384615384618e-05, |
| "loss": 0.1996, |
| "step": 4775 |
| }, |
| { |
| "epoch": 5.286343612334802, |
| "grad_norm": 0.3718922436237335, |
| "learning_rate": 1.558974358974359e-05, |
| "loss": 0.1477, |
| "step": 4800 |
| }, |
| { |
| "epoch": 5.313876651982379, |
| "grad_norm": 0.37068554759025574, |
| "learning_rate": 1.5564102564102565e-05, |
| "loss": 0.1384, |
| "step": 4825 |
| }, |
| { |
| "epoch": 5.341409691629956, |
| "grad_norm": 0.4219229221343994, |
| "learning_rate": 1.553846153846154e-05, |
| "loss": 0.1534, |
| "step": 4850 |
| }, |
| { |
| "epoch": 5.368942731277533, |
| "grad_norm": 0.6927037835121155, |
| "learning_rate": 1.5512820512820516e-05, |
| "loss": 0.1623, |
| "step": 4875 |
| }, |
| { |
| "epoch": 5.39647577092511, |
| "grad_norm": 0.6902387142181396, |
| "learning_rate": 1.548717948717949e-05, |
| "loss": 0.1451, |
| "step": 4900 |
| }, |
| { |
| "epoch": 5.424008810572687, |
| "grad_norm": 0.5900410413742065, |
| "learning_rate": 1.5461538461538463e-05, |
| "loss": 0.1971, |
| "step": 4925 |
| }, |
| { |
| "epoch": 5.451541850220265, |
| "grad_norm": 0.5337275266647339, |
| "learning_rate": 1.5435897435897436e-05, |
| "loss": 0.1688, |
| "step": 4950 |
| }, |
| { |
| "epoch": 5.479074889867841, |
| "grad_norm": 0.5641874074935913, |
| "learning_rate": 1.5410256410256414e-05, |
| "loss": 0.1567, |
| "step": 4975 |
| }, |
| { |
| "epoch": 5.506607929515418, |
| "grad_norm": 0.5372440218925476, |
| "learning_rate": 1.5384615384615387e-05, |
| "loss": 0.1609, |
| "step": 5000 |
| }, |
| { |
| "epoch": 5.506607929515418, |
| "eval_cer": 28.45941329542719, |
| "eval_loss": 0.7670999765396118, |
| "eval_runtime": 1323.4879, |
| "eval_samples_per_second": 7.995, |
| "eval_steps_per_second": 1.999, |
| "eval_wer": 86.11975483262611, |
| "step": 5000 |
| }, |
| { |
| "epoch": 5.534140969162996, |
| "grad_norm": 0.6520294547080994, |
| "learning_rate": 1.535897435897436e-05, |
| "loss": 0.1639, |
| "step": 5025 |
| }, |
| { |
| "epoch": 5.561674008810573, |
| "grad_norm": 0.6221819519996643, |
| "learning_rate": 1.5333333333333334e-05, |
| "loss": 0.1851, |
| "step": 5050 |
| }, |
| { |
| "epoch": 5.5892070484581495, |
| "grad_norm": 0.5925490856170654, |
| "learning_rate": 1.5307692307692308e-05, |
| "loss": 0.1733, |
| "step": 5075 |
| }, |
| { |
| "epoch": 5.616740088105727, |
| "grad_norm": 1.0411094427108765, |
| "learning_rate": 1.5282051282051282e-05, |
| "loss": 0.1502, |
| "step": 5100 |
| }, |
| { |
| "epoch": 5.644273127753304, |
| "grad_norm": 0.8545799255371094, |
| "learning_rate": 1.5256410256410257e-05, |
| "loss": 0.1681, |
| "step": 5125 |
| }, |
| { |
| "epoch": 5.671806167400881, |
| "grad_norm": 0.5849359631538391, |
| "learning_rate": 1.523076923076923e-05, |
| "loss": 0.1778, |
| "step": 5150 |
| }, |
| { |
| "epoch": 5.6993392070484585, |
| "grad_norm": 0.5703755617141724, |
| "learning_rate": 1.5205128205128206e-05, |
| "loss": 0.1328, |
| "step": 5175 |
| }, |
| { |
| "epoch": 5.726872246696035, |
| "grad_norm": 0.7638615965843201, |
| "learning_rate": 1.517948717948718e-05, |
| "loss": 0.1929, |
| "step": 5200 |
| }, |
| { |
| "epoch": 5.754405286343612, |
| "grad_norm": 0.5087124109268188, |
| "learning_rate": 1.5153846153846155e-05, |
| "loss": 0.1545, |
| "step": 5225 |
| }, |
| { |
| "epoch": 5.78193832599119, |
| "grad_norm": 0.8324174284934998, |
| "learning_rate": 1.5128205128205129e-05, |
| "loss": 0.1975, |
| "step": 5250 |
| }, |
| { |
| "epoch": 5.809471365638767, |
| "grad_norm": 0.4413852095603943, |
| "learning_rate": 1.5102564102564104e-05, |
| "loss": 0.1663, |
| "step": 5275 |
| }, |
| { |
| "epoch": 5.8370044052863435, |
| "grad_norm": 0.708247184753418, |
| "learning_rate": 1.5076923076923078e-05, |
| "loss": 0.1543, |
| "step": 5300 |
| }, |
| { |
| "epoch": 5.864537444933921, |
| "grad_norm": 0.689794659614563, |
| "learning_rate": 1.5051282051282053e-05, |
| "loss": 0.1621, |
| "step": 5325 |
| }, |
| { |
| "epoch": 5.892070484581498, |
| "grad_norm": 0.5020695328712463, |
| "learning_rate": 1.5025641025641027e-05, |
| "loss": 0.1774, |
| "step": 5350 |
| }, |
| { |
| "epoch": 5.919603524229075, |
| "grad_norm": 0.4771401882171631, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 0.1442, |
| "step": 5375 |
| }, |
| { |
| "epoch": 5.9471365638766525, |
| "grad_norm": 1.0237714052200317, |
| "learning_rate": 1.4974358974358976e-05, |
| "loss": 0.1629, |
| "step": 5400 |
| }, |
| { |
| "epoch": 5.974669603524229, |
| "grad_norm": 0.7134143114089966, |
| "learning_rate": 1.494871794871795e-05, |
| "loss": 0.144, |
| "step": 5425 |
| }, |
| { |
| "epoch": 6.002202643171806, |
| "grad_norm": 0.30046069622039795, |
| "learning_rate": 1.4923076923076925e-05, |
| "loss": 0.1373, |
| "step": 5450 |
| }, |
| { |
| "epoch": 6.029735682819383, |
| "grad_norm": 0.4057641923427582, |
| "learning_rate": 1.4897435897435898e-05, |
| "loss": 0.1082, |
| "step": 5475 |
| }, |
| { |
| "epoch": 6.057268722466961, |
| "grad_norm": 0.486996591091156, |
| "learning_rate": 1.4871794871794874e-05, |
| "loss": 0.0848, |
| "step": 5500 |
| }, |
| { |
| "epoch": 6.084801762114537, |
| "grad_norm": 0.46409764885902405, |
| "learning_rate": 1.4846153846153847e-05, |
| "loss": 0.1075, |
| "step": 5525 |
| }, |
| { |
| "epoch": 6.112334801762114, |
| "grad_norm": 0.4308403432369232, |
| "learning_rate": 1.4820512820512823e-05, |
| "loss": 0.1193, |
| "step": 5550 |
| }, |
| { |
| "epoch": 6.139867841409692, |
| "grad_norm": 0.34751376509666443, |
| "learning_rate": 1.4794871794871796e-05, |
| "loss": 0.1139, |
| "step": 5575 |
| }, |
| { |
| "epoch": 6.167400881057269, |
| "grad_norm": 0.8365034461021423, |
| "learning_rate": 1.4769230769230772e-05, |
| "loss": 0.1273, |
| "step": 5600 |
| }, |
| { |
| "epoch": 6.1949339207048455, |
| "grad_norm": 0.34338051080703735, |
| "learning_rate": 1.4743589743589745e-05, |
| "loss": 0.0895, |
| "step": 5625 |
| }, |
| { |
| "epoch": 6.222466960352423, |
| "grad_norm": 0.6777989864349365, |
| "learning_rate": 1.471794871794872e-05, |
| "loss": 0.121, |
| "step": 5650 |
| }, |
| { |
| "epoch": 6.25, |
| "grad_norm": 0.5982616543769836, |
| "learning_rate": 1.4692307692307694e-05, |
| "loss": 0.1214, |
| "step": 5675 |
| }, |
| { |
| "epoch": 6.277533039647577, |
| "grad_norm": 0.5918659567832947, |
| "learning_rate": 1.4666666666666666e-05, |
| "loss": 0.0863, |
| "step": 5700 |
| }, |
| { |
| "epoch": 6.3050660792951545, |
| "grad_norm": 0.35085636377334595, |
| "learning_rate": 1.4641025641025642e-05, |
| "loss": 0.11, |
| "step": 5725 |
| }, |
| { |
| "epoch": 6.332599118942731, |
| "grad_norm": 0.43525975942611694, |
| "learning_rate": 1.4615384615384615e-05, |
| "loss": 0.0945, |
| "step": 5750 |
| }, |
| { |
| "epoch": 6.360132158590308, |
| "grad_norm": 0.3799566328525543, |
| "learning_rate": 1.458974358974359e-05, |
| "loss": 0.0984, |
| "step": 5775 |
| }, |
| { |
| "epoch": 6.387665198237886, |
| "grad_norm": 0.7915482521057129, |
| "learning_rate": 1.4564102564102564e-05, |
| "loss": 0.1154, |
| "step": 5800 |
| }, |
| { |
| "epoch": 6.415198237885463, |
| "grad_norm": 0.47404220700263977, |
| "learning_rate": 1.453846153846154e-05, |
| "loss": 0.0984, |
| "step": 5825 |
| }, |
| { |
| "epoch": 6.442731277533039, |
| "grad_norm": 0.3866804838180542, |
| "learning_rate": 1.4512820512820513e-05, |
| "loss": 0.1273, |
| "step": 5850 |
| }, |
| { |
| "epoch": 6.470264317180617, |
| "grad_norm": 0.4380825459957123, |
| "learning_rate": 1.4487179487179489e-05, |
| "loss": 0.1332, |
| "step": 5875 |
| }, |
| { |
| "epoch": 6.497797356828194, |
| "grad_norm": 0.4749581515789032, |
| "learning_rate": 1.4461538461538462e-05, |
| "loss": 0.1243, |
| "step": 5900 |
| }, |
| { |
| "epoch": 6.525330396475771, |
| "grad_norm": 0.820015549659729, |
| "learning_rate": 1.4435897435897438e-05, |
| "loss": 0.1305, |
| "step": 5925 |
| }, |
| { |
| "epoch": 6.5528634361233475, |
| "grad_norm": 0.5644270181655884, |
| "learning_rate": 1.4410256410256411e-05, |
| "loss": 0.1167, |
| "step": 5950 |
| }, |
| { |
| "epoch": 6.580396475770925, |
| "grad_norm": 0.4454534649848938, |
| "learning_rate": 1.4384615384615387e-05, |
| "loss": 0.1101, |
| "step": 5975 |
| }, |
| { |
| "epoch": 6.607929515418502, |
| "grad_norm": 0.5850095748901367, |
| "learning_rate": 1.435897435897436e-05, |
| "loss": 0.104, |
| "step": 6000 |
| }, |
| { |
| "epoch": 6.607929515418502, |
| "eval_cer": 24.67816078777527, |
| "eval_loss": 0.7848840355873108, |
| "eval_runtime": 1300.262, |
| "eval_samples_per_second": 8.138, |
| "eval_steps_per_second": 2.035, |
| "eval_wer": 84.6016030174446, |
| "step": 6000 |
| }, |
| { |
| "epoch": 6.635462555066079, |
| "grad_norm": 0.6678868532180786, |
| "learning_rate": 1.4333333333333334e-05, |
| "loss": 0.0875, |
| "step": 6025 |
| }, |
| { |
| "epoch": 6.6629955947136565, |
| "grad_norm": 0.279801607131958, |
| "learning_rate": 1.430769230769231e-05, |
| "loss": 0.1333, |
| "step": 6050 |
| }, |
| { |
| "epoch": 6.690528634361233, |
| "grad_norm": 0.8138979077339172, |
| "learning_rate": 1.4282051282051283e-05, |
| "loss": 0.1458, |
| "step": 6075 |
| }, |
| { |
| "epoch": 6.71806167400881, |
| "grad_norm": 0.6547926068305969, |
| "learning_rate": 1.4256410256410258e-05, |
| "loss": 0.1421, |
| "step": 6100 |
| }, |
| { |
| "epoch": 6.745594713656388, |
| "grad_norm": 0.6097781658172607, |
| "learning_rate": 1.4230769230769232e-05, |
| "loss": 0.1285, |
| "step": 6125 |
| }, |
| { |
| "epoch": 6.773127753303965, |
| "grad_norm": 0.4184475839138031, |
| "learning_rate": 1.4205128205128207e-05, |
| "loss": 0.1139, |
| "step": 6150 |
| }, |
| { |
| "epoch": 6.8006607929515415, |
| "grad_norm": 0.4548538029193878, |
| "learning_rate": 1.4179487179487181e-05, |
| "loss": 0.1214, |
| "step": 6175 |
| }, |
| { |
| "epoch": 6.828193832599119, |
| "grad_norm": 0.3974076509475708, |
| "learning_rate": 1.4153846153846156e-05, |
| "loss": 0.1051, |
| "step": 6200 |
| }, |
| { |
| "epoch": 6.855726872246696, |
| "grad_norm": 0.7746002078056335, |
| "learning_rate": 1.412820512820513e-05, |
| "loss": 0.1349, |
| "step": 6225 |
| }, |
| { |
| "epoch": 6.883259911894273, |
| "grad_norm": 0.3809688687324524, |
| "learning_rate": 1.4102564102564105e-05, |
| "loss": 0.11, |
| "step": 6250 |
| }, |
| { |
| "epoch": 6.9107929515418505, |
| "grad_norm": 0.39594364166259766, |
| "learning_rate": 1.4076923076923079e-05, |
| "loss": 0.1006, |
| "step": 6275 |
| }, |
| { |
| "epoch": 6.938325991189427, |
| "grad_norm": 0.5483039617538452, |
| "learning_rate": 1.405128205128205e-05, |
| "loss": 0.1122, |
| "step": 6300 |
| }, |
| { |
| "epoch": 6.965859030837004, |
| "grad_norm": 0.6642032861709595, |
| "learning_rate": 1.4025641025641026e-05, |
| "loss": 0.1232, |
| "step": 6325 |
| }, |
| { |
| "epoch": 6.993392070484582, |
| "grad_norm": 0.42328912019729614, |
| "learning_rate": 1.4e-05, |
| "loss": 0.1125, |
| "step": 6350 |
| }, |
| { |
| "epoch": 7.020925110132159, |
| "grad_norm": 0.6559634804725647, |
| "learning_rate": 1.3974358974358975e-05, |
| "loss": 0.0831, |
| "step": 6375 |
| }, |
| { |
| "epoch": 7.048458149779735, |
| "grad_norm": 0.5906537175178528, |
| "learning_rate": 1.3948717948717949e-05, |
| "loss": 0.0752, |
| "step": 6400 |
| }, |
| { |
| "epoch": 7.075991189427313, |
| "grad_norm": 0.5048888921737671, |
| "learning_rate": 1.3923076923076924e-05, |
| "loss": 0.0702, |
| "step": 6425 |
| }, |
| { |
| "epoch": 7.10352422907489, |
| "grad_norm": 0.5171650648117065, |
| "learning_rate": 1.3897435897435898e-05, |
| "loss": 0.0622, |
| "step": 6450 |
| }, |
| { |
| "epoch": 7.131057268722467, |
| "grad_norm": 0.608253538608551, |
| "learning_rate": 1.3871794871794873e-05, |
| "loss": 0.0795, |
| "step": 6475 |
| }, |
| { |
| "epoch": 7.158590308370044, |
| "grad_norm": 0.3569038212299347, |
| "learning_rate": 1.3846153846153847e-05, |
| "loss": 0.0862, |
| "step": 6500 |
| }, |
| { |
| "epoch": 7.186123348017621, |
| "grad_norm": 0.4823140501976013, |
| "learning_rate": 1.3820512820512822e-05, |
| "loss": 0.0749, |
| "step": 6525 |
| }, |
| { |
| "epoch": 7.213656387665198, |
| "grad_norm": 0.6069055199623108, |
| "learning_rate": 1.3794871794871796e-05, |
| "loss": 0.0732, |
| "step": 6550 |
| }, |
| { |
| "epoch": 7.241189427312776, |
| "grad_norm": 0.3300100564956665, |
| "learning_rate": 1.3769230769230771e-05, |
| "loss": 0.0831, |
| "step": 6575 |
| }, |
| { |
| "epoch": 7.2687224669603525, |
| "grad_norm": 0.5404714941978455, |
| "learning_rate": 1.3743589743589745e-05, |
| "loss": 0.0783, |
| "step": 6600 |
| }, |
| { |
| "epoch": 7.296255506607929, |
| "grad_norm": 0.6272768974304199, |
| "learning_rate": 1.3717948717948718e-05, |
| "loss": 0.0878, |
| "step": 6625 |
| }, |
| { |
| "epoch": 7.323788546255507, |
| "grad_norm": 0.4452053904533386, |
| "learning_rate": 1.3692307692307694e-05, |
| "loss": 0.0808, |
| "step": 6650 |
| }, |
| { |
| "epoch": 7.351321585903084, |
| "grad_norm": 0.3930460810661316, |
| "learning_rate": 1.3666666666666667e-05, |
| "loss": 0.0861, |
| "step": 6675 |
| }, |
| { |
| "epoch": 7.378854625550661, |
| "grad_norm": 0.31089282035827637, |
| "learning_rate": 1.3641025641025643e-05, |
| "loss": 0.0642, |
| "step": 6700 |
| }, |
| { |
| "epoch": 7.406387665198238, |
| "grad_norm": 0.219461590051651, |
| "learning_rate": 1.3615384615384616e-05, |
| "loss": 0.0589, |
| "step": 6725 |
| }, |
| { |
| "epoch": 7.433920704845815, |
| "grad_norm": 0.435345321893692, |
| "learning_rate": 1.3589743589743592e-05, |
| "loss": 0.076, |
| "step": 6750 |
| }, |
| { |
| "epoch": 7.461453744493392, |
| "grad_norm": 0.5823142528533936, |
| "learning_rate": 1.3564102564102565e-05, |
| "loss": 0.0884, |
| "step": 6775 |
| }, |
| { |
| "epoch": 7.48898678414097, |
| "grad_norm": 0.6687197685241699, |
| "learning_rate": 1.353846153846154e-05, |
| "loss": 0.0831, |
| "step": 6800 |
| }, |
| { |
| "epoch": 7.516519823788546, |
| "grad_norm": 0.25778886675834656, |
| "learning_rate": 1.3512820512820514e-05, |
| "loss": 0.0794, |
| "step": 6825 |
| }, |
| { |
| "epoch": 7.544052863436123, |
| "grad_norm": 0.5225228071212769, |
| "learning_rate": 1.348717948717949e-05, |
| "loss": 0.0772, |
| "step": 6850 |
| }, |
| { |
| "epoch": 7.5715859030837, |
| "grad_norm": 0.4801703095436096, |
| "learning_rate": 1.3461538461538463e-05, |
| "loss": 0.0717, |
| "step": 6875 |
| }, |
| { |
| "epoch": 7.599118942731278, |
| "grad_norm": 0.48749440908432007, |
| "learning_rate": 1.3435897435897435e-05, |
| "loss": 0.0952, |
| "step": 6900 |
| }, |
| { |
| "epoch": 7.6266519823788546, |
| "grad_norm": 0.34919875860214233, |
| "learning_rate": 1.341025641025641e-05, |
| "loss": 0.0755, |
| "step": 6925 |
| }, |
| { |
| "epoch": 7.654185022026431, |
| "grad_norm": 0.6202211976051331, |
| "learning_rate": 1.3384615384615384e-05, |
| "loss": 0.095, |
| "step": 6950 |
| }, |
| { |
| "epoch": 7.681718061674009, |
| "grad_norm": 0.3826860189437866, |
| "learning_rate": 1.335897435897436e-05, |
| "loss": 0.095, |
| "step": 6975 |
| }, |
| { |
| "epoch": 7.709251101321586, |
| "grad_norm": 0.6930757761001587, |
| "learning_rate": 1.3333333333333333e-05, |
| "loss": 0.0641, |
| "step": 7000 |
| }, |
| { |
| "epoch": 7.709251101321586, |
| "eval_cer": 38.884367154631846, |
| "eval_loss": 0.8057100772857666, |
| "eval_runtime": 1352.106, |
| "eval_samples_per_second": 7.826, |
| "eval_steps_per_second": 1.957, |
| "eval_wer": 84.13012729844414, |
| "step": 7000 |
| }, |
| { |
| "epoch": 7.736784140969163, |
| "grad_norm": 0.5741052627563477, |
| "learning_rate": 1.3307692307692309e-05, |
| "loss": 0.0909, |
| "step": 7025 |
| }, |
| { |
| "epoch": 7.76431718061674, |
| "grad_norm": 0.7617988586425781, |
| "learning_rate": 1.3282051282051282e-05, |
| "loss": 0.0753, |
| "step": 7050 |
| }, |
| { |
| "epoch": 7.791850220264317, |
| "grad_norm": 0.5416741967201233, |
| "learning_rate": 1.3256410256410258e-05, |
| "loss": 0.0731, |
| "step": 7075 |
| }, |
| { |
| "epoch": 7.819383259911894, |
| "grad_norm": 0.6349952816963196, |
| "learning_rate": 1.3230769230769231e-05, |
| "loss": 0.0703, |
| "step": 7100 |
| }, |
| { |
| "epoch": 7.846916299559472, |
| "grad_norm": 0.33196786046028137, |
| "learning_rate": 1.3205128205128207e-05, |
| "loss": 0.0656, |
| "step": 7125 |
| }, |
| { |
| "epoch": 7.8744493392070485, |
| "grad_norm": 0.379213809967041, |
| "learning_rate": 1.317948717948718e-05, |
| "loss": 0.0917, |
| "step": 7150 |
| }, |
| { |
| "epoch": 7.901982378854625, |
| "grad_norm": 0.34401291608810425, |
| "learning_rate": 1.3153846153846156e-05, |
| "loss": 0.0911, |
| "step": 7175 |
| }, |
| { |
| "epoch": 7.929515418502203, |
| "grad_norm": 0.4732189178466797, |
| "learning_rate": 1.312820512820513e-05, |
| "loss": 0.101, |
| "step": 7200 |
| }, |
| { |
| "epoch": 7.95704845814978, |
| "grad_norm": 0.5580617785453796, |
| "learning_rate": 1.3102564102564103e-05, |
| "loss": 0.0898, |
| "step": 7225 |
| }, |
| { |
| "epoch": 7.984581497797357, |
| "grad_norm": 0.42180871963500977, |
| "learning_rate": 1.3076923076923078e-05, |
| "loss": 0.086, |
| "step": 7250 |
| }, |
| { |
| "epoch": 8.012114537444933, |
| "grad_norm": 0.2615683376789093, |
| "learning_rate": 1.3051282051282052e-05, |
| "loss": 0.0898, |
| "step": 7275 |
| }, |
| { |
| "epoch": 8.039647577092511, |
| "grad_norm": 0.44722801446914673, |
| "learning_rate": 1.3025641025641027e-05, |
| "loss": 0.0602, |
| "step": 7300 |
| }, |
| { |
| "epoch": 8.067180616740089, |
| "grad_norm": 0.3499121367931366, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 0.0549, |
| "step": 7325 |
| }, |
| { |
| "epoch": 8.094713656387665, |
| "grad_norm": 0.37767261266708374, |
| "learning_rate": 1.2974358974358976e-05, |
| "loss": 0.0573, |
| "step": 7350 |
| }, |
| { |
| "epoch": 8.122246696035242, |
| "grad_norm": 0.4645783007144928, |
| "learning_rate": 1.294871794871795e-05, |
| "loss": 0.0566, |
| "step": 7375 |
| }, |
| { |
| "epoch": 8.14977973568282, |
| "grad_norm": 0.6134966015815735, |
| "learning_rate": 1.2923076923076925e-05, |
| "loss": 0.0507, |
| "step": 7400 |
| }, |
| { |
| "epoch": 8.177312775330396, |
| "grad_norm": 0.28678062558174133, |
| "learning_rate": 1.2897435897435899e-05, |
| "loss": 0.051, |
| "step": 7425 |
| }, |
| { |
| "epoch": 8.204845814977974, |
| "grad_norm": 0.299078106880188, |
| "learning_rate": 1.2871794871794874e-05, |
| "loss": 0.045, |
| "step": 7450 |
| }, |
| { |
| "epoch": 8.232378854625551, |
| "grad_norm": 0.6386272311210632, |
| "learning_rate": 1.2846153846153848e-05, |
| "loss": 0.0665, |
| "step": 7475 |
| }, |
| { |
| "epoch": 8.259911894273127, |
| "grad_norm": 0.1515616923570633, |
| "learning_rate": 1.2820512820512823e-05, |
| "loss": 0.052, |
| "step": 7500 |
| }, |
| { |
| "epoch": 8.287444933920705, |
| "grad_norm": 0.61899733543396, |
| "learning_rate": 1.2794871794871795e-05, |
| "loss": 0.0462, |
| "step": 7525 |
| }, |
| { |
| "epoch": 8.314977973568283, |
| "grad_norm": 0.6535860300064087, |
| "learning_rate": 1.2769230769230769e-05, |
| "loss": 0.0518, |
| "step": 7550 |
| }, |
| { |
| "epoch": 8.342511013215859, |
| "grad_norm": 0.4084964692592621, |
| "learning_rate": 1.2743589743589744e-05, |
| "loss": 0.0574, |
| "step": 7575 |
| }, |
| { |
| "epoch": 8.370044052863436, |
| "grad_norm": 0.4185622036457062, |
| "learning_rate": 1.2717948717948718e-05, |
| "loss": 0.0466, |
| "step": 7600 |
| }, |
| { |
| "epoch": 8.397577092511014, |
| "grad_norm": 0.5417298078536987, |
| "learning_rate": 1.2692307692307693e-05, |
| "loss": 0.0595, |
| "step": 7625 |
| }, |
| { |
| "epoch": 8.42511013215859, |
| "grad_norm": 0.0882943645119667, |
| "learning_rate": 1.2666666666666667e-05, |
| "loss": 0.0441, |
| "step": 7650 |
| }, |
| { |
| "epoch": 8.452643171806168, |
| "grad_norm": 0.5028131008148193, |
| "learning_rate": 1.2641025641025642e-05, |
| "loss": 0.0584, |
| "step": 7675 |
| }, |
| { |
| "epoch": 8.480176211453745, |
| "grad_norm": 0.32492244243621826, |
| "learning_rate": 1.2615384615384616e-05, |
| "loss": 0.0519, |
| "step": 7700 |
| }, |
| { |
| "epoch": 8.507709251101321, |
| "grad_norm": 0.199100524187088, |
| "learning_rate": 1.2589743589743591e-05, |
| "loss": 0.0519, |
| "step": 7725 |
| }, |
| { |
| "epoch": 8.535242290748899, |
| "grad_norm": 0.560196578502655, |
| "learning_rate": 1.2564102564102565e-05, |
| "loss": 0.0601, |
| "step": 7750 |
| }, |
| { |
| "epoch": 8.562775330396477, |
| "grad_norm": 0.3848872780799866, |
| "learning_rate": 1.253846153846154e-05, |
| "loss": 0.0561, |
| "step": 7775 |
| }, |
| { |
| "epoch": 8.590308370044053, |
| "grad_norm": 0.6430539488792419, |
| "learning_rate": 1.2512820512820514e-05, |
| "loss": 0.0573, |
| "step": 7800 |
| }, |
| { |
| "epoch": 8.61784140969163, |
| "grad_norm": 0.1402147263288498, |
| "learning_rate": 1.2487179487179487e-05, |
| "loss": 0.0613, |
| "step": 7825 |
| }, |
| { |
| "epoch": 8.645374449339208, |
| "grad_norm": 0.3411605656147003, |
| "learning_rate": 1.2461538461538463e-05, |
| "loss": 0.0401, |
| "step": 7850 |
| }, |
| { |
| "epoch": 8.672907488986784, |
| "grad_norm": 0.4999459981918335, |
| "learning_rate": 1.2435897435897436e-05, |
| "loss": 0.0393, |
| "step": 7875 |
| }, |
| { |
| "epoch": 8.700440528634362, |
| "grad_norm": 0.6794586777687073, |
| "learning_rate": 1.2410256410256412e-05, |
| "loss": 0.061, |
| "step": 7900 |
| }, |
| { |
| "epoch": 8.72797356828194, |
| "grad_norm": 0.30914783477783203, |
| "learning_rate": 1.2384615384615385e-05, |
| "loss": 0.0552, |
| "step": 7925 |
| }, |
| { |
| "epoch": 8.755506607929515, |
| "grad_norm": 0.311613529920578, |
| "learning_rate": 1.235897435897436e-05, |
| "loss": 0.0556, |
| "step": 7950 |
| }, |
| { |
| "epoch": 8.783039647577093, |
| "grad_norm": 0.48470553755760193, |
| "learning_rate": 1.2333333333333334e-05, |
| "loss": 0.0553, |
| "step": 7975 |
| }, |
| { |
| "epoch": 8.810572687224669, |
| "grad_norm": 0.5605005621910095, |
| "learning_rate": 1.230769230769231e-05, |
| "loss": 0.0673, |
| "step": 8000 |
| }, |
| { |
| "epoch": 8.810572687224669, |
| "eval_cer": 25.173386218518225, |
| "eval_loss": 0.8125333189964294, |
| "eval_runtime": 1304.9459, |
| "eval_samples_per_second": 8.108, |
| "eval_steps_per_second": 2.028, |
| "eval_wer": 85.53512494106553, |
| "step": 8000 |
| }, |
| { |
| "epoch": 8.838105726872246, |
| "grad_norm": 0.33088424801826477, |
| "learning_rate": 1.2282051282051283e-05, |
| "loss": 0.049, |
| "step": 8025 |
| }, |
| { |
| "epoch": 8.865638766519824, |
| "grad_norm": 0.3602592945098877, |
| "learning_rate": 1.2256410256410259e-05, |
| "loss": 0.0572, |
| "step": 8050 |
| }, |
| { |
| "epoch": 8.8931718061674, |
| "grad_norm": 0.35909441113471985, |
| "learning_rate": 1.2230769230769232e-05, |
| "loss": 0.051, |
| "step": 8075 |
| }, |
| { |
| "epoch": 8.920704845814978, |
| "grad_norm": 0.45318055152893066, |
| "learning_rate": 1.2205128205128208e-05, |
| "loss": 0.0642, |
| "step": 8100 |
| }, |
| { |
| "epoch": 8.948237885462555, |
| "grad_norm": 1.001381754875183, |
| "learning_rate": 1.217948717948718e-05, |
| "loss": 0.0522, |
| "step": 8125 |
| }, |
| { |
| "epoch": 8.975770925110131, |
| "grad_norm": 0.5000578761100769, |
| "learning_rate": 1.2153846153846153e-05, |
| "loss": 0.0423, |
| "step": 8150 |
| }, |
| { |
| "epoch": 9.003303964757709, |
| "grad_norm": 0.29771438241004944, |
| "learning_rate": 1.2128205128205129e-05, |
| "loss": 0.063, |
| "step": 8175 |
| }, |
| { |
| "epoch": 9.030837004405287, |
| "grad_norm": 0.43198081851005554, |
| "learning_rate": 1.2102564102564102e-05, |
| "loss": 0.0416, |
| "step": 8200 |
| }, |
| { |
| "epoch": 9.058370044052863, |
| "grad_norm": 0.31377923488616943, |
| "learning_rate": 1.2076923076923078e-05, |
| "loss": 0.0358, |
| "step": 8225 |
| }, |
| { |
| "epoch": 9.08590308370044, |
| "grad_norm": 1.0352481603622437, |
| "learning_rate": 1.2051282051282051e-05, |
| "loss": 0.0348, |
| "step": 8250 |
| }, |
| { |
| "epoch": 9.113436123348018, |
| "grad_norm": 0.421749472618103, |
| "learning_rate": 1.2025641025641027e-05, |
| "loss": 0.0403, |
| "step": 8275 |
| }, |
| { |
| "epoch": 9.140969162995594, |
| "grad_norm": 0.39680853486061096, |
| "learning_rate": 1.2e-05, |
| "loss": 0.0348, |
| "step": 8300 |
| }, |
| { |
| "epoch": 9.168502202643172, |
| "grad_norm": 0.25886261463165283, |
| "learning_rate": 1.1974358974358976e-05, |
| "loss": 0.0255, |
| "step": 8325 |
| }, |
| { |
| "epoch": 9.19603524229075, |
| "grad_norm": 0.29378727078437805, |
| "learning_rate": 1.194871794871795e-05, |
| "loss": 0.0421, |
| "step": 8350 |
| }, |
| { |
| "epoch": 9.223568281938325, |
| "grad_norm": 0.14189021289348602, |
| "learning_rate": 1.1923076923076925e-05, |
| "loss": 0.0346, |
| "step": 8375 |
| }, |
| { |
| "epoch": 9.251101321585903, |
| "grad_norm": 0.3648456335067749, |
| "learning_rate": 1.1897435897435898e-05, |
| "loss": 0.0358, |
| "step": 8400 |
| }, |
| { |
| "epoch": 9.27863436123348, |
| "grad_norm": 0.22953101992607117, |
| "learning_rate": 1.1871794871794872e-05, |
| "loss": 0.0377, |
| "step": 8425 |
| }, |
| { |
| "epoch": 9.306167400881057, |
| "grad_norm": 0.13100098073482513, |
| "learning_rate": 1.1846153846153847e-05, |
| "loss": 0.0345, |
| "step": 8450 |
| }, |
| { |
| "epoch": 9.333700440528634, |
| "grad_norm": 0.41983163356781006, |
| "learning_rate": 1.1820512820512821e-05, |
| "loss": 0.0358, |
| "step": 8475 |
| }, |
| { |
| "epoch": 9.361233480176212, |
| "grad_norm": 0.28245824575424194, |
| "learning_rate": 1.1794871794871796e-05, |
| "loss": 0.0467, |
| "step": 8500 |
| }, |
| { |
| "epoch": 9.388766519823788, |
| "grad_norm": 0.46235162019729614, |
| "learning_rate": 1.176923076923077e-05, |
| "loss": 0.0407, |
| "step": 8525 |
| }, |
| { |
| "epoch": 9.416299559471366, |
| "grad_norm": 0.4608246684074402, |
| "learning_rate": 1.1743589743589745e-05, |
| "loss": 0.0407, |
| "step": 8550 |
| }, |
| { |
| "epoch": 9.443832599118943, |
| "grad_norm": 0.32346612215042114, |
| "learning_rate": 1.1717948717948719e-05, |
| "loss": 0.0347, |
| "step": 8575 |
| }, |
| { |
| "epoch": 9.47136563876652, |
| "grad_norm": 0.3498935401439667, |
| "learning_rate": 1.1692307692307694e-05, |
| "loss": 0.0413, |
| "step": 8600 |
| }, |
| { |
| "epoch": 9.498898678414097, |
| "grad_norm": 0.48518890142440796, |
| "learning_rate": 1.1666666666666668e-05, |
| "loss": 0.0503, |
| "step": 8625 |
| }, |
| { |
| "epoch": 9.526431718061675, |
| "grad_norm": 0.20332852005958557, |
| "learning_rate": 1.1641025641025643e-05, |
| "loss": 0.0472, |
| "step": 8650 |
| }, |
| { |
| "epoch": 9.55396475770925, |
| "grad_norm": 0.3680901825428009, |
| "learning_rate": 1.1615384615384617e-05, |
| "loss": 0.039, |
| "step": 8675 |
| }, |
| { |
| "epoch": 9.581497797356828, |
| "grad_norm": 0.4770890176296234, |
| "learning_rate": 1.1589743589743592e-05, |
| "loss": 0.0325, |
| "step": 8700 |
| }, |
| { |
| "epoch": 9.609030837004406, |
| "grad_norm": 0.3051774501800537, |
| "learning_rate": 1.1564102564102566e-05, |
| "loss": 0.0406, |
| "step": 8725 |
| }, |
| { |
| "epoch": 9.636563876651982, |
| "grad_norm": 0.4181124269962311, |
| "learning_rate": 1.1538461538461538e-05, |
| "loss": 0.0425, |
| "step": 8750 |
| }, |
| { |
| "epoch": 9.66409691629956, |
| "grad_norm": 0.3570977449417114, |
| "learning_rate": 1.1512820512820513e-05, |
| "loss": 0.0349, |
| "step": 8775 |
| }, |
| { |
| "epoch": 9.691629955947137, |
| "grad_norm": 0.40113458037376404, |
| "learning_rate": 1.1487179487179487e-05, |
| "loss": 0.0332, |
| "step": 8800 |
| }, |
| { |
| "epoch": 9.719162995594713, |
| "grad_norm": 0.5174753665924072, |
| "learning_rate": 1.1461538461538462e-05, |
| "loss": 0.0365, |
| "step": 8825 |
| }, |
| { |
| "epoch": 9.746696035242291, |
| "grad_norm": 0.6039919853210449, |
| "learning_rate": 1.1435897435897436e-05, |
| "loss": 0.0423, |
| "step": 8850 |
| }, |
| { |
| "epoch": 9.774229074889869, |
| "grad_norm": 0.33104389905929565, |
| "learning_rate": 1.1410256410256411e-05, |
| "loss": 0.0492, |
| "step": 8875 |
| }, |
| { |
| "epoch": 9.801762114537445, |
| "grad_norm": 0.5245941877365112, |
| "learning_rate": 1.1384615384615385e-05, |
| "loss": 0.0403, |
| "step": 8900 |
| }, |
| { |
| "epoch": 9.829295154185022, |
| "grad_norm": 0.4713122546672821, |
| "learning_rate": 1.135897435897436e-05, |
| "loss": 0.0539, |
| "step": 8925 |
| }, |
| { |
| "epoch": 9.8568281938326, |
| "grad_norm": 0.3693457841873169, |
| "learning_rate": 1.1333333333333334e-05, |
| "loss": 0.0419, |
| "step": 8950 |
| }, |
| { |
| "epoch": 9.884361233480176, |
| "grad_norm": 0.4538707733154297, |
| "learning_rate": 1.1307692307692309e-05, |
| "loss": 0.0345, |
| "step": 8975 |
| }, |
| { |
| "epoch": 9.911894273127754, |
| "grad_norm": 0.25317588448524475, |
| "learning_rate": 1.1282051282051283e-05, |
| "loss": 0.0329, |
| "step": 9000 |
| }, |
| { |
| "epoch": 9.911894273127754, |
| "eval_cer": 24.903992755035365, |
| "eval_loss": 0.8292160034179688, |
| "eval_runtime": 1301.6266, |
| "eval_samples_per_second": 8.129, |
| "eval_steps_per_second": 2.033, |
| "eval_wer": 84.72418670438473, |
| "step": 9000 |
| }, |
| { |
| "epoch": 9.939427312775331, |
| "grad_norm": 0.9093891382217407, |
| "learning_rate": 1.1256410256410258e-05, |
| "loss": 0.0354, |
| "step": 9025 |
| }, |
| { |
| "epoch": 9.966960352422907, |
| "grad_norm": 0.526305615901947, |
| "learning_rate": 1.1230769230769232e-05, |
| "loss": 0.04, |
| "step": 9050 |
| }, |
| { |
| "epoch": 9.994493392070485, |
| "grad_norm": 0.4748174846172333, |
| "learning_rate": 1.1205128205128205e-05, |
| "loss": 0.0405, |
| "step": 9075 |
| }, |
| { |
| "epoch": 10.022026431718063, |
| "grad_norm": 0.23602962493896484, |
| "learning_rate": 1.117948717948718e-05, |
| "loss": 0.0245, |
| "step": 9100 |
| }, |
| { |
| "epoch": 10.049559471365638, |
| "grad_norm": 0.2989708185195923, |
| "learning_rate": 1.1153846153846154e-05, |
| "loss": 0.0231, |
| "step": 9125 |
| }, |
| { |
| "epoch": 10.077092511013216, |
| "grad_norm": 0.34653839468955994, |
| "learning_rate": 1.112820512820513e-05, |
| "loss": 0.0306, |
| "step": 9150 |
| }, |
| { |
| "epoch": 10.104625550660794, |
| "grad_norm": 0.4413544535636902, |
| "learning_rate": 1.1102564102564103e-05, |
| "loss": 0.0242, |
| "step": 9175 |
| }, |
| { |
| "epoch": 10.13215859030837, |
| "grad_norm": 0.44882041215896606, |
| "learning_rate": 1.1076923076923079e-05, |
| "loss": 0.036, |
| "step": 9200 |
| }, |
| { |
| "epoch": 10.159691629955947, |
| "grad_norm": 0.049951497465372086, |
| "learning_rate": 1.1051282051282052e-05, |
| "loss": 0.0249, |
| "step": 9225 |
| }, |
| { |
| "epoch": 10.187224669603523, |
| "grad_norm": 0.34928587079048157, |
| "learning_rate": 1.1025641025641028e-05, |
| "loss": 0.0322, |
| "step": 9250 |
| }, |
| { |
| "epoch": 10.214757709251101, |
| "grad_norm": 0.18765118718147278, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 0.0249, |
| "step": 9275 |
| }, |
| { |
| "epoch": 10.242290748898679, |
| "grad_norm": 0.09570558369159698, |
| "learning_rate": 1.0974358974358977e-05, |
| "loss": 0.0241, |
| "step": 9300 |
| }, |
| { |
| "epoch": 10.269823788546255, |
| "grad_norm": 0.36708030104637146, |
| "learning_rate": 1.094871794871795e-05, |
| "loss": 0.0267, |
| "step": 9325 |
| }, |
| { |
| "epoch": 10.297356828193832, |
| "grad_norm": 0.6306156516075134, |
| "learning_rate": 1.0923076923076922e-05, |
| "loss": 0.028, |
| "step": 9350 |
| }, |
| { |
| "epoch": 10.32488986784141, |
| "grad_norm": 0.47958239912986755, |
| "learning_rate": 1.0897435897435898e-05, |
| "loss": 0.0374, |
| "step": 9375 |
| }, |
| { |
| "epoch": 10.352422907488986, |
| "grad_norm": 0.5049773454666138, |
| "learning_rate": 1.0871794871794871e-05, |
| "loss": 0.0252, |
| "step": 9400 |
| }, |
| { |
| "epoch": 10.379955947136564, |
| "grad_norm": 0.18035492300987244, |
| "learning_rate": 1.0846153846153847e-05, |
| "loss": 0.032, |
| "step": 9425 |
| }, |
| { |
| "epoch": 10.407488986784141, |
| "grad_norm": 0.40862882137298584, |
| "learning_rate": 1.082051282051282e-05, |
| "loss": 0.0317, |
| "step": 9450 |
| }, |
| { |
| "epoch": 10.435022026431717, |
| "grad_norm": 0.4345795512199402, |
| "learning_rate": 1.0794871794871796e-05, |
| "loss": 0.0227, |
| "step": 9475 |
| }, |
| { |
| "epoch": 10.462555066079295, |
| "grad_norm": 0.32652077078819275, |
| "learning_rate": 1.076923076923077e-05, |
| "loss": 0.0274, |
| "step": 9500 |
| }, |
| { |
| "epoch": 10.490088105726873, |
| "grad_norm": 0.49059435725212097, |
| "learning_rate": 1.0743589743589745e-05, |
| "loss": 0.0336, |
| "step": 9525 |
| }, |
| { |
| "epoch": 10.517621145374449, |
| "grad_norm": 0.14571261405944824, |
| "learning_rate": 1.0717948717948718e-05, |
| "loss": 0.0244, |
| "step": 9550 |
| }, |
| { |
| "epoch": 10.545154185022026, |
| "grad_norm": 0.2149128019809723, |
| "learning_rate": 1.0692307692307694e-05, |
| "loss": 0.0252, |
| "step": 9575 |
| }, |
| { |
| "epoch": 10.572687224669604, |
| "grad_norm": 0.20995257794857025, |
| "learning_rate": 1.0666666666666667e-05, |
| "loss": 0.0311, |
| "step": 9600 |
| }, |
| { |
| "epoch": 10.60022026431718, |
| "grad_norm": 0.4227479100227356, |
| "learning_rate": 1.0641025641025643e-05, |
| "loss": 0.0261, |
| "step": 9625 |
| }, |
| { |
| "epoch": 10.627753303964758, |
| "grad_norm": 0.1345728039741516, |
| "learning_rate": 1.0615384615384616e-05, |
| "loss": 0.026, |
| "step": 9650 |
| }, |
| { |
| "epoch": 10.655286343612335, |
| "grad_norm": 0.5568249821662903, |
| "learning_rate": 1.058974358974359e-05, |
| "loss": 0.0275, |
| "step": 9675 |
| }, |
| { |
| "epoch": 10.682819383259911, |
| "grad_norm": 0.5649207234382629, |
| "learning_rate": 1.0564102564102565e-05, |
| "loss": 0.03, |
| "step": 9700 |
| }, |
| { |
| "epoch": 10.710352422907489, |
| "grad_norm": 0.23224163055419922, |
| "learning_rate": 1.0538461538461539e-05, |
| "loss": 0.0292, |
| "step": 9725 |
| }, |
| { |
| "epoch": 10.737885462555067, |
| "grad_norm": 0.2227552831172943, |
| "learning_rate": 1.0512820512820514e-05, |
| "loss": 0.028, |
| "step": 9750 |
| }, |
| { |
| "epoch": 10.765418502202643, |
| "grad_norm": 0.07342702895402908, |
| "learning_rate": 1.0487179487179488e-05, |
| "loss": 0.0227, |
| "step": 9775 |
| }, |
| { |
| "epoch": 10.79295154185022, |
| "grad_norm": 0.3385262191295624, |
| "learning_rate": 1.0461538461538463e-05, |
| "loss": 0.0325, |
| "step": 9800 |
| }, |
| { |
| "epoch": 10.820484581497798, |
| "grad_norm": 0.2666647434234619, |
| "learning_rate": 1.0435897435897437e-05, |
| "loss": 0.0264, |
| "step": 9825 |
| }, |
| { |
| "epoch": 10.848017621145374, |
| "grad_norm": 0.13147205114364624, |
| "learning_rate": 1.0410256410256412e-05, |
| "loss": 0.0184, |
| "step": 9850 |
| }, |
| { |
| "epoch": 10.875550660792952, |
| "grad_norm": 0.24823608994483948, |
| "learning_rate": 1.0384615384615386e-05, |
| "loss": 0.0249, |
| "step": 9875 |
| }, |
| { |
| "epoch": 10.90308370044053, |
| "grad_norm": 0.265788197517395, |
| "learning_rate": 1.0358974358974361e-05, |
| "loss": 0.0217, |
| "step": 9900 |
| }, |
| { |
| "epoch": 10.930616740088105, |
| "grad_norm": 0.2914508879184723, |
| "learning_rate": 1.0333333333333335e-05, |
| "loss": 0.0199, |
| "step": 9925 |
| }, |
| { |
| "epoch": 10.958149779735683, |
| "grad_norm": 0.19100092351436615, |
| "learning_rate": 1.0307692307692307e-05, |
| "loss": 0.0232, |
| "step": 9950 |
| }, |
| { |
| "epoch": 10.98568281938326, |
| "grad_norm": 0.2141091227531433, |
| "learning_rate": 1.0282051282051282e-05, |
| "loss": 0.0276, |
| "step": 9975 |
| }, |
| { |
| "epoch": 11.013215859030836, |
| "grad_norm": 0.09335622936487198, |
| "learning_rate": 1.0256410256410256e-05, |
| "loss": 0.0186, |
| "step": 10000 |
| }, |
| { |
| "epoch": 11.013215859030836, |
| "eval_cer": 25.171093508190705, |
| "eval_loss": 0.8366118669509888, |
| "eval_runtime": 1307.8053, |
| "eval_samples_per_second": 8.091, |
| "eval_steps_per_second": 2.023, |
| "eval_wer": 84.47901933050449, |
| "step": 10000 |
| }, |
| { |
| "epoch": 11.040748898678414, |
| "grad_norm": 0.29987862706184387, |
| "learning_rate": 1.0230769230769231e-05, |
| "loss": 0.0117, |
| "step": 10025 |
| }, |
| { |
| "epoch": 11.068281938325992, |
| "grad_norm": 0.22261077165603638, |
| "learning_rate": 1.0205128205128205e-05, |
| "loss": 0.0199, |
| "step": 10050 |
| }, |
| { |
| "epoch": 11.095814977973568, |
| "grad_norm": 0.7212164402008057, |
| "learning_rate": 1.017948717948718e-05, |
| "loss": 0.0194, |
| "step": 10075 |
| }, |
| { |
| "epoch": 11.123348017621145, |
| "grad_norm": 0.18654099106788635, |
| "learning_rate": 1.0153846153846154e-05, |
| "loss": 0.0191, |
| "step": 10100 |
| }, |
| { |
| "epoch": 11.150881057268723, |
| "grad_norm": 0.1351199895143509, |
| "learning_rate": 1.012820512820513e-05, |
| "loss": 0.0151, |
| "step": 10125 |
| }, |
| { |
| "epoch": 11.178414096916299, |
| "grad_norm": 0.24383758008480072, |
| "learning_rate": 1.0102564102564103e-05, |
| "loss": 0.0142, |
| "step": 10150 |
| }, |
| { |
| "epoch": 11.205947136563877, |
| "grad_norm": 0.1962803304195404, |
| "learning_rate": 1.0076923076923078e-05, |
| "loss": 0.0159, |
| "step": 10175 |
| }, |
| { |
| "epoch": 11.233480176211454, |
| "grad_norm": 0.1277613639831543, |
| "learning_rate": 1.0051282051282052e-05, |
| "loss": 0.018, |
| "step": 10200 |
| }, |
| { |
| "epoch": 11.26101321585903, |
| "grad_norm": 0.17365778982639313, |
| "learning_rate": 1.0025641025641027e-05, |
| "loss": 0.0198, |
| "step": 10225 |
| }, |
| { |
| "epoch": 11.288546255506608, |
| "grad_norm": 0.5494518876075745, |
| "learning_rate": 1e-05, |
| "loss": 0.0157, |
| "step": 10250 |
| }, |
| { |
| "epoch": 11.316079295154186, |
| "grad_norm": 0.11686886101961136, |
| "learning_rate": 9.974358974358974e-06, |
| "loss": 0.024, |
| "step": 10275 |
| }, |
| { |
| "epoch": 11.343612334801762, |
| "grad_norm": 0.15467554330825806, |
| "learning_rate": 9.94871794871795e-06, |
| "loss": 0.0174, |
| "step": 10300 |
| }, |
| { |
| "epoch": 11.37114537444934, |
| "grad_norm": 0.10721301287412643, |
| "learning_rate": 9.923076923076923e-06, |
| "loss": 0.0169, |
| "step": 10325 |
| }, |
| { |
| "epoch": 11.398678414096917, |
| "grad_norm": 0.1287498027086258, |
| "learning_rate": 9.897435897435899e-06, |
| "loss": 0.0202, |
| "step": 10350 |
| }, |
| { |
| "epoch": 11.426211453744493, |
| "grad_norm": 0.4366730749607086, |
| "learning_rate": 9.871794871794872e-06, |
| "loss": 0.0166, |
| "step": 10375 |
| }, |
| { |
| "epoch": 11.45374449339207, |
| "grad_norm": 0.12972579896450043, |
| "learning_rate": 9.846153846153848e-06, |
| "loss": 0.0177, |
| "step": 10400 |
| }, |
| { |
| "epoch": 11.481277533039648, |
| "grad_norm": 0.810859203338623, |
| "learning_rate": 9.820512820512821e-06, |
| "loss": 0.0173, |
| "step": 10425 |
| }, |
| { |
| "epoch": 11.508810572687224, |
| "grad_norm": 0.1165216714143753, |
| "learning_rate": 9.794871794871795e-06, |
| "loss": 0.0194, |
| "step": 10450 |
| }, |
| { |
| "epoch": 11.536343612334802, |
| "grad_norm": 0.16423256695270538, |
| "learning_rate": 9.76923076923077e-06, |
| "loss": 0.017, |
| "step": 10475 |
| }, |
| { |
| "epoch": 11.56387665198238, |
| "grad_norm": 0.6200090050697327, |
| "learning_rate": 9.743589743589744e-06, |
| "loss": 0.0233, |
| "step": 10500 |
| }, |
| { |
| "epoch": 11.591409691629956, |
| "grad_norm": 0.3650573790073395, |
| "learning_rate": 9.71794871794872e-06, |
| "loss": 0.0188, |
| "step": 10525 |
| }, |
| { |
| "epoch": 11.618942731277533, |
| "grad_norm": 0.23086689412593842, |
| "learning_rate": 9.692307692307693e-06, |
| "loss": 0.0166, |
| "step": 10550 |
| }, |
| { |
| "epoch": 11.646475770925111, |
| "grad_norm": 0.28406432271003723, |
| "learning_rate": 9.666666666666667e-06, |
| "loss": 0.0199, |
| "step": 10575 |
| }, |
| { |
| "epoch": 11.674008810572687, |
| "grad_norm": 0.13203246891498566, |
| "learning_rate": 9.641025641025642e-06, |
| "loss": 0.0169, |
| "step": 10600 |
| }, |
| { |
| "epoch": 11.701541850220265, |
| "grad_norm": 0.3809435963630676, |
| "learning_rate": 9.615384615384616e-06, |
| "loss": 0.0167, |
| "step": 10625 |
| }, |
| { |
| "epoch": 11.729074889867842, |
| "grad_norm": 0.2622781991958618, |
| "learning_rate": 9.589743589743591e-06, |
| "loss": 0.023, |
| "step": 10650 |
| }, |
| { |
| "epoch": 11.756607929515418, |
| "grad_norm": 0.3118574321269989, |
| "learning_rate": 9.564102564102565e-06, |
| "loss": 0.0162, |
| "step": 10675 |
| }, |
| { |
| "epoch": 11.784140969162996, |
| "grad_norm": 0.29195636510849, |
| "learning_rate": 9.53846153846154e-06, |
| "loss": 0.0166, |
| "step": 10700 |
| }, |
| { |
| "epoch": 11.811674008810574, |
| "grad_norm": 0.16257286071777344, |
| "learning_rate": 9.512820512820514e-06, |
| "loss": 0.0186, |
| "step": 10725 |
| }, |
| { |
| "epoch": 11.83920704845815, |
| "grad_norm": 0.2690454125404358, |
| "learning_rate": 9.487179487179487e-06, |
| "loss": 0.0184, |
| "step": 10750 |
| }, |
| { |
| "epoch": 11.866740088105727, |
| "grad_norm": 0.07074102014303207, |
| "learning_rate": 9.461538461538463e-06, |
| "loss": 0.0147, |
| "step": 10775 |
| }, |
| { |
| "epoch": 11.894273127753303, |
| "grad_norm": 0.0660664364695549, |
| "learning_rate": 9.435897435897436e-06, |
| "loss": 0.017, |
| "step": 10800 |
| }, |
| { |
| "epoch": 11.92180616740088, |
| "grad_norm": 0.42482617497444153, |
| "learning_rate": 9.410256410256412e-06, |
| "loss": 0.0164, |
| "step": 10825 |
| }, |
| { |
| "epoch": 11.949339207048459, |
| "grad_norm": 0.16394160687923431, |
| "learning_rate": 9.384615384615385e-06, |
| "loss": 0.0154, |
| "step": 10850 |
| }, |
| { |
| "epoch": 11.976872246696034, |
| "grad_norm": 0.39682498574256897, |
| "learning_rate": 9.358974358974359e-06, |
| "loss": 0.0198, |
| "step": 10875 |
| }, |
| { |
| "epoch": 12.004405286343612, |
| "grad_norm": 0.1381184458732605, |
| "learning_rate": 9.333333333333334e-06, |
| "loss": 0.0193, |
| "step": 10900 |
| }, |
| { |
| "epoch": 12.03193832599119, |
| "grad_norm": 0.15030303597450256, |
| "learning_rate": 9.307692307692308e-06, |
| "loss": 0.0199, |
| "step": 10925 |
| }, |
| { |
| "epoch": 12.059471365638766, |
| "grad_norm": 0.5344926714897156, |
| "learning_rate": 9.282051282051283e-06, |
| "loss": 0.0197, |
| "step": 10950 |
| }, |
| { |
| "epoch": 12.087004405286343, |
| "grad_norm": 0.18761467933654785, |
| "learning_rate": 9.256410256410257e-06, |
| "loss": 0.0166, |
| "step": 10975 |
| }, |
| { |
| "epoch": 12.114537444933921, |
| "grad_norm": 0.22124651074409485, |
| "learning_rate": 9.230769230769232e-06, |
| "loss": 0.0123, |
| "step": 11000 |
| }, |
| { |
| "epoch": 12.114537444933921, |
| "eval_cer": 24.39787695023672, |
| "eval_loss": 0.8476730585098267, |
| "eval_runtime": 1307.0774, |
| "eval_samples_per_second": 8.095, |
| "eval_steps_per_second": 2.024, |
| "eval_wer": 83.86610089580387, |
| "step": 11000 |
| }, |
| { |
| "epoch": 12.142070484581497, |
| "grad_norm": 0.24581079185009003, |
| "learning_rate": 9.205128205128206e-06, |
| "loss": 0.0142, |
| "step": 11025 |
| }, |
| { |
| "epoch": 12.169603524229075, |
| "grad_norm": 0.1644822657108307, |
| "learning_rate": 9.17948717948718e-06, |
| "loss": 0.0115, |
| "step": 11050 |
| }, |
| { |
| "epoch": 12.197136563876652, |
| "grad_norm": 0.0707065761089325, |
| "learning_rate": 9.153846153846155e-06, |
| "loss": 0.0146, |
| "step": 11075 |
| }, |
| { |
| "epoch": 12.224669603524228, |
| "grad_norm": 0.20994262397289276, |
| "learning_rate": 9.128205128205129e-06, |
| "loss": 0.0118, |
| "step": 11100 |
| }, |
| { |
| "epoch": 12.252202643171806, |
| "grad_norm": 0.04189275577664375, |
| "learning_rate": 9.102564102564104e-06, |
| "loss": 0.0098, |
| "step": 11125 |
| }, |
| { |
| "epoch": 12.279735682819384, |
| "grad_norm": 0.2779877781867981, |
| "learning_rate": 9.076923076923078e-06, |
| "loss": 0.0132, |
| "step": 11150 |
| }, |
| { |
| "epoch": 12.30726872246696, |
| "grad_norm": 0.04877633601427078, |
| "learning_rate": 9.051282051282051e-06, |
| "loss": 0.0125, |
| "step": 11175 |
| }, |
| { |
| "epoch": 12.334801762114537, |
| "grad_norm": 0.15319667756557465, |
| "learning_rate": 9.025641025641027e-06, |
| "loss": 0.0111, |
| "step": 11200 |
| }, |
| { |
| "epoch": 12.362334801762115, |
| "grad_norm": 0.5198453664779663, |
| "learning_rate": 9e-06, |
| "loss": 0.0119, |
| "step": 11225 |
| }, |
| { |
| "epoch": 12.389867841409691, |
| "grad_norm": 0.1989358365535736, |
| "learning_rate": 8.974358974358976e-06, |
| "loss": 0.0112, |
| "step": 11250 |
| }, |
| { |
| "epoch": 12.417400881057269, |
| "grad_norm": 0.5313608646392822, |
| "learning_rate": 8.94871794871795e-06, |
| "loss": 0.0094, |
| "step": 11275 |
| }, |
| { |
| "epoch": 12.444933920704846, |
| "grad_norm": 0.17706328630447388, |
| "learning_rate": 8.923076923076925e-06, |
| "loss": 0.0128, |
| "step": 11300 |
| }, |
| { |
| "epoch": 12.472466960352422, |
| "grad_norm": 0.45485684275627136, |
| "learning_rate": 8.897435897435898e-06, |
| "loss": 0.0136, |
| "step": 11325 |
| }, |
| { |
| "epoch": 12.5, |
| "grad_norm": 0.14598797261714935, |
| "learning_rate": 8.871794871794872e-06, |
| "loss": 0.0129, |
| "step": 11350 |
| }, |
| { |
| "epoch": 12.527533039647578, |
| "grad_norm": 0.20234304666519165, |
| "learning_rate": 8.846153846153847e-06, |
| "loss": 0.0133, |
| "step": 11375 |
| }, |
| { |
| "epoch": 12.555066079295154, |
| "grad_norm": 0.15167205035686493, |
| "learning_rate": 8.820512820512821e-06, |
| "loss": 0.013, |
| "step": 11400 |
| }, |
| { |
| "epoch": 12.582599118942731, |
| "grad_norm": 0.10332977026700974, |
| "learning_rate": 8.794871794871796e-06, |
| "loss": 0.012, |
| "step": 11425 |
| }, |
| { |
| "epoch": 12.610132158590309, |
| "grad_norm": 0.06746497750282288, |
| "learning_rate": 8.76923076923077e-06, |
| "loss": 0.0102, |
| "step": 11450 |
| }, |
| { |
| "epoch": 12.637665198237885, |
| "grad_norm": 0.21585692465305328, |
| "learning_rate": 8.743589743589743e-06, |
| "loss": 0.0131, |
| "step": 11475 |
| }, |
| { |
| "epoch": 12.665198237885463, |
| "grad_norm": 0.11972617357969284, |
| "learning_rate": 8.717948717948719e-06, |
| "loss": 0.0124, |
| "step": 11500 |
| }, |
| { |
| "epoch": 12.69273127753304, |
| "grad_norm": 0.16483718156814575, |
| "learning_rate": 8.692307692307692e-06, |
| "loss": 0.0136, |
| "step": 11525 |
| }, |
| { |
| "epoch": 12.720264317180616, |
| "grad_norm": 0.04187220335006714, |
| "learning_rate": 8.666666666666668e-06, |
| "loss": 0.0122, |
| "step": 11550 |
| }, |
| { |
| "epoch": 12.747797356828194, |
| "grad_norm": 0.37281060218811035, |
| "learning_rate": 8.641025641025641e-06, |
| "loss": 0.0117, |
| "step": 11575 |
| }, |
| { |
| "epoch": 12.775330396475772, |
| "grad_norm": 0.42664897441864014, |
| "learning_rate": 8.615384615384617e-06, |
| "loss": 0.0132, |
| "step": 11600 |
| }, |
| { |
| "epoch": 12.802863436123348, |
| "grad_norm": 0.08013510704040527, |
| "learning_rate": 8.58974358974359e-06, |
| "loss": 0.0129, |
| "step": 11625 |
| }, |
| { |
| "epoch": 12.830396475770925, |
| "grad_norm": 0.24972140789031982, |
| "learning_rate": 8.564102564102564e-06, |
| "loss": 0.0132, |
| "step": 11650 |
| }, |
| { |
| "epoch": 12.857929515418503, |
| "grad_norm": 1.2854641675949097, |
| "learning_rate": 8.53846153846154e-06, |
| "loss": 0.0129, |
| "step": 11675 |
| }, |
| { |
| "epoch": 12.885462555066079, |
| "grad_norm": 0.5620148777961731, |
| "learning_rate": 8.512820512820513e-06, |
| "loss": 0.0165, |
| "step": 11700 |
| }, |
| { |
| "epoch": 12.912995594713657, |
| "grad_norm": 0.1447388082742691, |
| "learning_rate": 8.487179487179488e-06, |
| "loss": 0.0152, |
| "step": 11725 |
| }, |
| { |
| "epoch": 12.940528634361234, |
| "grad_norm": 0.17876878380775452, |
| "learning_rate": 8.461538461538462e-06, |
| "loss": 0.015, |
| "step": 11750 |
| }, |
| { |
| "epoch": 12.96806167400881, |
| "grad_norm": 0.29610171914100647, |
| "learning_rate": 8.435897435897436e-06, |
| "loss": 0.0153, |
| "step": 11775 |
| }, |
| { |
| "epoch": 12.995594713656388, |
| "grad_norm": 0.1498490869998932, |
| "learning_rate": 8.410256410256411e-06, |
| "loss": 0.0144, |
| "step": 11800 |
| }, |
| { |
| "epoch": 13.023127753303966, |
| "grad_norm": 0.15733398497104645, |
| "learning_rate": 8.384615384615385e-06, |
| "loss": 0.0099, |
| "step": 11825 |
| }, |
| { |
| "epoch": 13.050660792951541, |
| "grad_norm": 0.13376560807228088, |
| "learning_rate": 8.35897435897436e-06, |
| "loss": 0.0093, |
| "step": 11850 |
| }, |
| { |
| "epoch": 13.07819383259912, |
| "grad_norm": 0.30212774872779846, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 0.0117, |
| "step": 11875 |
| }, |
| { |
| "epoch": 13.105726872246697, |
| "grad_norm": 0.6191049814224243, |
| "learning_rate": 8.307692307692309e-06, |
| "loss": 0.0088, |
| "step": 11900 |
| }, |
| { |
| "epoch": 13.133259911894273, |
| "grad_norm": 0.12833338975906372, |
| "learning_rate": 8.282051282051283e-06, |
| "loss": 0.0081, |
| "step": 11925 |
| }, |
| { |
| "epoch": 13.16079295154185, |
| "grad_norm": 0.02582469768822193, |
| "learning_rate": 8.256410256410256e-06, |
| "loss": 0.0075, |
| "step": 11950 |
| }, |
| { |
| "epoch": 13.188325991189428, |
| "grad_norm": 0.40551483631134033, |
| "learning_rate": 8.230769230769232e-06, |
| "loss": 0.0106, |
| "step": 11975 |
| }, |
| { |
| "epoch": 13.215859030837004, |
| "grad_norm": 0.178267240524292, |
| "learning_rate": 8.205128205128205e-06, |
| "loss": 0.0105, |
| "step": 12000 |
| }, |
| { |
| "epoch": 13.215859030837004, |
| "eval_cer": 23.92385909002327, |
| "eval_loss": 0.8548922538757324, |
| "eval_runtime": 1301.0663, |
| "eval_samples_per_second": 8.133, |
| "eval_steps_per_second": 2.034, |
| "eval_wer": 83.48892032060348, |
| "step": 12000 |
| }, |
| { |
| "epoch": 13.243392070484582, |
| "grad_norm": 0.25860708951950073, |
| "learning_rate": 8.17948717948718e-06, |
| "loss": 0.0107, |
| "step": 12025 |
| }, |
| { |
| "epoch": 13.270925110132158, |
| "grad_norm": 0.0770430937409401, |
| "learning_rate": 8.153846153846154e-06, |
| "loss": 0.011, |
| "step": 12050 |
| }, |
| { |
| "epoch": 13.298458149779735, |
| "grad_norm": 0.2660870850086212, |
| "learning_rate": 8.12820512820513e-06, |
| "loss": 0.0105, |
| "step": 12075 |
| }, |
| { |
| "epoch": 13.325991189427313, |
| "grad_norm": 0.0884290263056755, |
| "learning_rate": 8.102564102564103e-06, |
| "loss": 0.0116, |
| "step": 12100 |
| }, |
| { |
| "epoch": 13.353524229074889, |
| "grad_norm": 0.10695531964302063, |
| "learning_rate": 8.076923076923077e-06, |
| "loss": 0.0107, |
| "step": 12125 |
| }, |
| { |
| "epoch": 13.381057268722467, |
| "grad_norm": 0.03739326447248459, |
| "learning_rate": 8.051282051282052e-06, |
| "loss": 0.0157, |
| "step": 12150 |
| }, |
| { |
| "epoch": 13.408590308370044, |
| "grad_norm": 0.4565247595310211, |
| "learning_rate": 8.025641025641026e-06, |
| "loss": 0.0126, |
| "step": 12175 |
| }, |
| { |
| "epoch": 13.43612334801762, |
| "grad_norm": 0.06541293859481812, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.0131, |
| "step": 12200 |
| }, |
| { |
| "epoch": 13.463656387665198, |
| "grad_norm": 0.10597793757915497, |
| "learning_rate": 7.974358974358975e-06, |
| "loss": 0.0107, |
| "step": 12225 |
| }, |
| { |
| "epoch": 13.491189427312776, |
| "grad_norm": 0.2990114390850067, |
| "learning_rate": 7.948717948717949e-06, |
| "loss": 0.0107, |
| "step": 12250 |
| }, |
| { |
| "epoch": 13.518722466960352, |
| "grad_norm": 0.1924736499786377, |
| "learning_rate": 7.923076923076924e-06, |
| "loss": 0.0126, |
| "step": 12275 |
| }, |
| { |
| "epoch": 13.54625550660793, |
| "grad_norm": 0.13384470343589783, |
| "learning_rate": 7.897435897435898e-06, |
| "loss": 0.0144, |
| "step": 12300 |
| }, |
| { |
| "epoch": 13.573788546255507, |
| "grad_norm": 0.195987269282341, |
| "learning_rate": 7.871794871794873e-06, |
| "loss": 0.011, |
| "step": 12325 |
| }, |
| { |
| "epoch": 13.601321585903083, |
| "grad_norm": 0.6262577772140503, |
| "learning_rate": 7.846153846153847e-06, |
| "loss": 0.0102, |
| "step": 12350 |
| }, |
| { |
| "epoch": 13.62885462555066, |
| "grad_norm": 0.09502260386943817, |
| "learning_rate": 7.820512820512822e-06, |
| "loss": 0.0129, |
| "step": 12375 |
| }, |
| { |
| "epoch": 13.656387665198238, |
| "grad_norm": 0.14882908761501312, |
| "learning_rate": 7.794871794871796e-06, |
| "loss": 0.0103, |
| "step": 12400 |
| }, |
| { |
| "epoch": 13.683920704845814, |
| "grad_norm": 0.421539306640625, |
| "learning_rate": 7.76923076923077e-06, |
| "loss": 0.0092, |
| "step": 12425 |
| }, |
| { |
| "epoch": 13.711453744493392, |
| "grad_norm": 0.04390239343047142, |
| "learning_rate": 7.743589743589745e-06, |
| "loss": 0.0141, |
| "step": 12450 |
| }, |
| { |
| "epoch": 13.73898678414097, |
| "grad_norm": 0.14362525939941406, |
| "learning_rate": 7.717948717948718e-06, |
| "loss": 0.0108, |
| "step": 12475 |
| }, |
| { |
| "epoch": 13.766519823788546, |
| "grad_norm": 0.25303810834884644, |
| "learning_rate": 7.692307692307694e-06, |
| "loss": 0.0151, |
| "step": 12500 |
| }, |
| { |
| "epoch": 13.794052863436123, |
| "grad_norm": 0.37157806754112244, |
| "learning_rate": 7.666666666666667e-06, |
| "loss": 0.0136, |
| "step": 12525 |
| }, |
| { |
| "epoch": 13.821585903083701, |
| "grad_norm": 0.36168116331100464, |
| "learning_rate": 7.641025641025641e-06, |
| "loss": 0.0139, |
| "step": 12550 |
| }, |
| { |
| "epoch": 13.849118942731277, |
| "grad_norm": 0.32778996229171753, |
| "learning_rate": 7.615384615384615e-06, |
| "loss": 0.0111, |
| "step": 12575 |
| }, |
| { |
| "epoch": 13.876651982378855, |
| "grad_norm": 0.16989374160766602, |
| "learning_rate": 7.58974358974359e-06, |
| "loss": 0.0111, |
| "step": 12600 |
| }, |
| { |
| "epoch": 13.904185022026432, |
| "grad_norm": 0.05724957212805748, |
| "learning_rate": 7.564102564102564e-06, |
| "loss": 0.0125, |
| "step": 12625 |
| }, |
| { |
| "epoch": 13.931718061674008, |
| "grad_norm": 0.35101988911628723, |
| "learning_rate": 7.538461538461539e-06, |
| "loss": 0.0147, |
| "step": 12650 |
| }, |
| { |
| "epoch": 13.959251101321586, |
| "grad_norm": 0.0790194496512413, |
| "learning_rate": 7.512820512820513e-06, |
| "loss": 0.013, |
| "step": 12675 |
| }, |
| { |
| "epoch": 13.986784140969164, |
| "grad_norm": 0.2358320951461792, |
| "learning_rate": 7.487179487179488e-06, |
| "loss": 0.0147, |
| "step": 12700 |
| }, |
| { |
| "epoch": 14.01431718061674, |
| "grad_norm": 0.06501065939664841, |
| "learning_rate": 7.461538461538462e-06, |
| "loss": 0.0094, |
| "step": 12725 |
| }, |
| { |
| "epoch": 14.041850220264317, |
| "grad_norm": 0.028322290629148483, |
| "learning_rate": 7.435897435897437e-06, |
| "loss": 0.01, |
| "step": 12750 |
| }, |
| { |
| "epoch": 14.069383259911895, |
| "grad_norm": 0.04854853078722954, |
| "learning_rate": 7.410256410256411e-06, |
| "loss": 0.0109, |
| "step": 12775 |
| }, |
| { |
| "epoch": 14.09691629955947, |
| "grad_norm": 0.042353082448244095, |
| "learning_rate": 7.384615384615386e-06, |
| "loss": 0.0069, |
| "step": 12800 |
| }, |
| { |
| "epoch": 14.124449339207048, |
| "grad_norm": 0.24730762839317322, |
| "learning_rate": 7.35897435897436e-06, |
| "loss": 0.009, |
| "step": 12825 |
| }, |
| { |
| "epoch": 14.151982378854626, |
| "grad_norm": 0.13546280562877655, |
| "learning_rate": 7.333333333333333e-06, |
| "loss": 0.0082, |
| "step": 12850 |
| }, |
| { |
| "epoch": 14.179515418502202, |
| "grad_norm": 0.1111743226647377, |
| "learning_rate": 7.307692307692308e-06, |
| "loss": 0.0083, |
| "step": 12875 |
| }, |
| { |
| "epoch": 14.20704845814978, |
| "grad_norm": 0.0262732096016407, |
| "learning_rate": 7.282051282051282e-06, |
| "loss": 0.007, |
| "step": 12900 |
| }, |
| { |
| "epoch": 14.234581497797357, |
| "grad_norm": 0.3866499960422516, |
| "learning_rate": 7.256410256410257e-06, |
| "loss": 0.008, |
| "step": 12925 |
| }, |
| { |
| "epoch": 14.262114537444933, |
| "grad_norm": 0.07846901565790176, |
| "learning_rate": 7.230769230769231e-06, |
| "loss": 0.0069, |
| "step": 12950 |
| }, |
| { |
| "epoch": 14.289647577092511, |
| "grad_norm": 0.044436316937208176, |
| "learning_rate": 7.205128205128206e-06, |
| "loss": 0.0081, |
| "step": 12975 |
| }, |
| { |
| "epoch": 14.317180616740089, |
| "grad_norm": 0.18789108097553253, |
| "learning_rate": 7.17948717948718e-06, |
| "loss": 0.0074, |
| "step": 13000 |
| }, |
| { |
| "epoch": 14.317180616740089, |
| "eval_cer": 24.13134937466326, |
| "eval_loss": 0.8608656525611877, |
| "eval_runtime": 1301.5926, |
| "eval_samples_per_second": 8.129, |
| "eval_steps_per_second": 2.033, |
| "eval_wer": 83.63979255068365, |
| "step": 13000 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 20000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 23, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.233586142437127e+20, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|