{ "best_global_step": 13000, "best_metric": 13.793103448275861, "best_model_checkpoint": "./whisper-large-v3-atc-mrezzat/checkpoint-13000", "epoch": 27.956989247311828, "eval_steps": 500, "global_step": 13000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.053763440860215055, "grad_norm": 7.093221187591553, "learning_rate": 4.800000000000001e-07, "loss": 1.2803, "step": 25 }, { "epoch": 0.10752688172043011, "grad_norm": 3.561824321746826, "learning_rate": 9.800000000000001e-07, "loss": 1.024, "step": 50 }, { "epoch": 0.16129032258064516, "grad_norm": 3.972370147705078, "learning_rate": 1.48e-06, "loss": 0.8323, "step": 75 }, { "epoch": 0.21505376344086022, "grad_norm": 5.043467044830322, "learning_rate": 1.98e-06, "loss": 0.7706, "step": 100 }, { "epoch": 0.26881720430107525, "grad_norm": 3.704352855682373, "learning_rate": 2.4800000000000004e-06, "loss": 0.6839, "step": 125 }, { "epoch": 0.3225806451612903, "grad_norm": 2.454521894454956, "learning_rate": 2.9800000000000003e-06, "loss": 0.6756, "step": 150 }, { "epoch": 0.3763440860215054, "grad_norm": 4.109330654144287, "learning_rate": 3.48e-06, "loss": 0.6282, "step": 175 }, { "epoch": 0.43010752688172044, "grad_norm": 3.429811477661133, "learning_rate": 3.980000000000001e-06, "loss": 0.5735, "step": 200 }, { "epoch": 0.4838709677419355, "grad_norm": 3.641101121902466, "learning_rate": 4.48e-06, "loss": 0.535, "step": 225 }, { "epoch": 0.5376344086021505, "grad_norm": 3.169020652770996, "learning_rate": 4.980000000000001e-06, "loss": 0.5174, "step": 250 }, { "epoch": 0.5913978494623656, "grad_norm": 2.9315528869628906, "learning_rate": 5.480000000000001e-06, "loss": 0.446, "step": 275 }, { "epoch": 0.6451612903225806, "grad_norm": 3.473388671875, "learning_rate": 5.98e-06, "loss": 0.5147, "step": 300 }, { "epoch": 0.6989247311827957, "grad_norm": 3.428112506866455, "learning_rate": 6.480000000000001e-06, "loss": 0.466, "step": 325 }, { "epoch": 0.7526881720430108, "grad_norm": 2.7643871307373047, "learning_rate": 6.98e-06, "loss": 0.4712, "step": 350 }, { "epoch": 0.8064516129032258, "grad_norm": 2.77138352394104, "learning_rate": 7.48e-06, "loss": 0.4456, "step": 375 }, { "epoch": 0.8602150537634409, "grad_norm": 3.0843310356140137, "learning_rate": 7.980000000000002e-06, "loss": 0.508, "step": 400 }, { "epoch": 0.9139784946236559, "grad_norm": 2.7599642276763916, "learning_rate": 8.48e-06, "loss": 0.4453, "step": 425 }, { "epoch": 0.967741935483871, "grad_norm": 3.672558546066284, "learning_rate": 8.98e-06, "loss": 0.4443, "step": 450 }, { "epoch": 1.021505376344086, "grad_norm": 3.0827476978302, "learning_rate": 9.48e-06, "loss": 0.3988, "step": 475 }, { "epoch": 1.075268817204301, "grad_norm": 2.4267773628234863, "learning_rate": 9.980000000000001e-06, "loss": 0.3737, "step": 500 }, { "epoch": 1.075268817204301, "eval_loss": 0.27068084478378296, "eval_runtime": 204.8229, "eval_samples_per_second": 4.619, "eval_steps_per_second": 0.581, "eval_wer": 16.612739206804108, "step": 500 }, { "epoch": 1.129032258064516, "grad_norm": 1.7745027542114258, "learning_rate": 9.982156133828997e-06, "loss": 0.3749, "step": 525 }, { "epoch": 1.1827956989247312, "grad_norm": 2.5673892498016357, "learning_rate": 9.963568773234202e-06, "loss": 0.3547, "step": 550 }, { "epoch": 1.2365591397849462, "grad_norm": 3.1428632736206055, "learning_rate": 9.944981412639407e-06, "loss": 0.3335, "step": 575 }, { "epoch": 1.2903225806451613, "grad_norm": 3.2400989532470703, "learning_rate": 9.92639405204461e-06, "loss": 0.3382, "step": 600 }, { "epoch": 1.3440860215053765, "grad_norm": 3.075026273727417, "learning_rate": 9.907806691449815e-06, "loss": 0.3641, "step": 625 }, { "epoch": 1.3978494623655915, "grad_norm": 2.6721091270446777, "learning_rate": 9.88921933085502e-06, "loss": 0.358, "step": 650 }, { "epoch": 1.4516129032258065, "grad_norm": 1.545538306236267, "learning_rate": 9.870631970260223e-06, "loss": 0.334, "step": 675 }, { "epoch": 1.5053763440860215, "grad_norm": 2.7524378299713135, "learning_rate": 9.852044609665428e-06, "loss": 0.3401, "step": 700 }, { "epoch": 1.5591397849462365, "grad_norm": 2.7014448642730713, "learning_rate": 9.833457249070633e-06, "loss": 0.3305, "step": 725 }, { "epoch": 1.6129032258064515, "grad_norm": 2.5529074668884277, "learning_rate": 9.814869888475837e-06, "loss": 0.316, "step": 750 }, { "epoch": 1.6666666666666665, "grad_norm": 2.5125572681427, "learning_rate": 9.796282527881042e-06, "loss": 0.3324, "step": 775 }, { "epoch": 1.7204301075268817, "grad_norm": 3.541673183441162, "learning_rate": 9.777695167286247e-06, "loss": 0.3025, "step": 800 }, { "epoch": 1.7741935483870968, "grad_norm": 2.809391498565674, "learning_rate": 9.75910780669145e-06, "loss": 0.3263, "step": 825 }, { "epoch": 1.827956989247312, "grad_norm": 3.627777576446533, "learning_rate": 9.740520446096655e-06, "loss": 0.3209, "step": 850 }, { "epoch": 1.881720430107527, "grad_norm": 2.672043561935425, "learning_rate": 9.721933085501858e-06, "loss": 0.2879, "step": 875 }, { "epoch": 1.935483870967742, "grad_norm": 2.783828020095825, "learning_rate": 9.703345724907063e-06, "loss": 0.2813, "step": 900 }, { "epoch": 1.989247311827957, "grad_norm": 3.2919387817382812, "learning_rate": 9.684758364312268e-06, "loss": 0.3037, "step": 925 }, { "epoch": 2.043010752688172, "grad_norm": 2.6845808029174805, "learning_rate": 9.666171003717473e-06, "loss": 0.2344, "step": 950 }, { "epoch": 2.096774193548387, "grad_norm": 2.9660229682922363, "learning_rate": 9.647583643122678e-06, "loss": 0.1819, "step": 975 }, { "epoch": 2.150537634408602, "grad_norm": 1.8242266178131104, "learning_rate": 9.628996282527881e-06, "loss": 0.2016, "step": 1000 }, { "epoch": 2.150537634408602, "eval_loss": 0.27269652485847473, "eval_runtime": 202.1375, "eval_samples_per_second": 4.68, "eval_steps_per_second": 0.589, "eval_wer": 14.246094111121382, "step": 1000 }, { "epoch": 2.204301075268817, "grad_norm": 2.224641799926758, "learning_rate": 9.610408921933086e-06, "loss": 0.2209, "step": 1025 }, { "epoch": 2.258064516129032, "grad_norm": 2.039360761642456, "learning_rate": 9.59182156133829e-06, "loss": 0.2072, "step": 1050 }, { "epoch": 2.3118279569892475, "grad_norm": 2.753037929534912, "learning_rate": 9.573234200743495e-06, "loss": 0.1707, "step": 1075 }, { "epoch": 2.3655913978494625, "grad_norm": 2.8433048725128174, "learning_rate": 9.5546468401487e-06, "loss": 0.2005, "step": 1100 }, { "epoch": 2.4193548387096775, "grad_norm": 2.2583348751068115, "learning_rate": 9.536059479553905e-06, "loss": 0.1792, "step": 1125 }, { "epoch": 2.4731182795698925, "grad_norm": 2.151129722595215, "learning_rate": 9.51747211895911e-06, "loss": 0.2076, "step": 1150 }, { "epoch": 2.5268817204301075, "grad_norm": 2.289693593978882, "learning_rate": 9.498884758364313e-06, "loss": 0.1901, "step": 1175 }, { "epoch": 2.5806451612903225, "grad_norm": 2.0624475479125977, "learning_rate": 9.480297397769518e-06, "loss": 0.1938, "step": 1200 }, { "epoch": 2.6344086021505375, "grad_norm": 2.455775737762451, "learning_rate": 9.461710037174721e-06, "loss": 0.2116, "step": 1225 }, { "epoch": 2.688172043010753, "grad_norm": 1.853768229484558, "learning_rate": 9.443122676579926e-06, "loss": 0.2008, "step": 1250 }, { "epoch": 2.741935483870968, "grad_norm": 2.4691860675811768, "learning_rate": 9.424535315985131e-06, "loss": 0.2167, "step": 1275 }, { "epoch": 2.795698924731183, "grad_norm": 1.8937417268753052, "learning_rate": 9.405947955390336e-06, "loss": 0.1864, "step": 1300 }, { "epoch": 2.849462365591398, "grad_norm": 2.786158323287964, "learning_rate": 9.38736059479554e-06, "loss": 0.1916, "step": 1325 }, { "epoch": 2.903225806451613, "grad_norm": 2.7571518421173096, "learning_rate": 9.368773234200744e-06, "loss": 0.1982, "step": 1350 }, { "epoch": 2.956989247311828, "grad_norm": 2.334691286087036, "learning_rate": 9.35018587360595e-06, "loss": 0.1864, "step": 1375 }, { "epoch": 3.010752688172043, "grad_norm": 1.4865392446517944, "learning_rate": 9.331598513011153e-06, "loss": 0.1593, "step": 1400 }, { "epoch": 3.064516129032258, "grad_norm": 1.797865629196167, "learning_rate": 9.313011152416358e-06, "loss": 0.1123, "step": 1425 }, { "epoch": 3.118279569892473, "grad_norm": 1.5422674417495728, "learning_rate": 9.294423791821563e-06, "loss": 0.1132, "step": 1450 }, { "epoch": 3.172043010752688, "grad_norm": 1.649880290031433, "learning_rate": 9.275836431226768e-06, "loss": 0.1075, "step": 1475 }, { "epoch": 3.225806451612903, "grad_norm": 2.0276639461517334, "learning_rate": 9.25724907063197e-06, "loss": 0.1164, "step": 1500 }, { "epoch": 3.225806451612903, "eval_loss": 0.28087103366851807, "eval_runtime": 202.3199, "eval_samples_per_second": 4.676, "eval_steps_per_second": 0.588, "eval_wer": 15.050383655357308, "step": 1500 }, { "epoch": 3.279569892473118, "grad_norm": 2.4856255054473877, "learning_rate": 9.238661710037176e-06, "loss": 0.1112, "step": 1525 }, { "epoch": 3.3333333333333335, "grad_norm": 2.857877492904663, "learning_rate": 9.220074349442379e-06, "loss": 0.1131, "step": 1550 }, { "epoch": 3.3870967741935485, "grad_norm": 1.7496925592422485, "learning_rate": 9.201486988847584e-06, "loss": 0.1306, "step": 1575 }, { "epoch": 3.4408602150537635, "grad_norm": 1.9851291179656982, "learning_rate": 9.182899628252789e-06, "loss": 0.114, "step": 1600 }, { "epoch": 3.4946236559139785, "grad_norm": 2.6501877307891846, "learning_rate": 9.164312267657994e-06, "loss": 0.1208, "step": 1625 }, { "epoch": 3.5483870967741935, "grad_norm": 1.6141562461853027, "learning_rate": 9.145724907063197e-06, "loss": 0.1194, "step": 1650 }, { "epoch": 3.6021505376344085, "grad_norm": 2.246312141418457, "learning_rate": 9.127137546468402e-06, "loss": 0.121, "step": 1675 }, { "epoch": 3.6559139784946235, "grad_norm": 1.7408199310302734, "learning_rate": 9.108550185873607e-06, "loss": 0.1178, "step": 1700 }, { "epoch": 3.709677419354839, "grad_norm": 2.1992955207824707, "learning_rate": 9.08996282527881e-06, "loss": 0.0901, "step": 1725 }, { "epoch": 3.763440860215054, "grad_norm": 2.057574987411499, "learning_rate": 9.071375464684016e-06, "loss": 0.0999, "step": 1750 }, { "epoch": 3.817204301075269, "grad_norm": 2.032602548599243, "learning_rate": 9.052788104089219e-06, "loss": 0.1057, "step": 1775 }, { "epoch": 3.870967741935484, "grad_norm": 1.700415849685669, "learning_rate": 9.034200743494424e-06, "loss": 0.1127, "step": 1800 }, { "epoch": 3.924731182795699, "grad_norm": 2.944364070892334, "learning_rate": 9.015613382899629e-06, "loss": 0.1304, "step": 1825 }, { "epoch": 3.978494623655914, "grad_norm": 2.807861804962158, "learning_rate": 8.997026022304834e-06, "loss": 0.1215, "step": 1850 }, { "epoch": 4.032258064516129, "grad_norm": 2.064152717590332, "learning_rate": 8.978438661710039e-06, "loss": 0.0942, "step": 1875 }, { "epoch": 4.086021505376344, "grad_norm": 2.315067768096924, "learning_rate": 8.959851301115242e-06, "loss": 0.0636, "step": 1900 }, { "epoch": 4.139784946236559, "grad_norm": 1.4923697710037231, "learning_rate": 8.941263940520447e-06, "loss": 0.0553, "step": 1925 }, { "epoch": 4.193548387096774, "grad_norm": 1.0652992725372314, "learning_rate": 8.92267657992565e-06, "loss": 0.0545, "step": 1950 }, { "epoch": 4.247311827956989, "grad_norm": 1.9834535121917725, "learning_rate": 8.904089219330855e-06, "loss": 0.049, "step": 1975 }, { "epoch": 4.301075268817204, "grad_norm": 1.4393575191497803, "learning_rate": 8.88550185873606e-06, "loss": 0.0551, "step": 2000 }, { "epoch": 4.301075268817204, "eval_loss": 0.3065280318260193, "eval_runtime": 202.95, "eval_samples_per_second": 4.661, "eval_steps_per_second": 0.586, "eval_wer": 15.087362484977351, "step": 2000 }, { "epoch": 4.354838709677419, "grad_norm": 1.7886149883270264, "learning_rate": 8.866914498141265e-06, "loss": 0.0629, "step": 2025 }, { "epoch": 4.408602150537634, "grad_norm": 1.470372200012207, "learning_rate": 8.84832713754647e-06, "loss": 0.0569, "step": 2050 }, { "epoch": 4.462365591397849, "grad_norm": 2.3206701278686523, "learning_rate": 8.829739776951673e-06, "loss": 0.0612, "step": 2075 }, { "epoch": 4.516129032258064, "grad_norm": 1.5979121923446655, "learning_rate": 8.811152416356878e-06, "loss": 0.0703, "step": 2100 }, { "epoch": 4.56989247311828, "grad_norm": 2.818779945373535, "learning_rate": 8.792565055762082e-06, "loss": 0.0637, "step": 2125 }, { "epoch": 4.623655913978495, "grad_norm": 2.848932981491089, "learning_rate": 8.773977695167287e-06, "loss": 0.0636, "step": 2150 }, { "epoch": 4.67741935483871, "grad_norm": 1.2150336503982544, "learning_rate": 8.755390334572492e-06, "loss": 0.0898, "step": 2175 }, { "epoch": 4.731182795698925, "grad_norm": 2.5077340602874756, "learning_rate": 8.736802973977697e-06, "loss": 0.0509, "step": 2200 }, { "epoch": 4.78494623655914, "grad_norm": 2.6455860137939453, "learning_rate": 8.7182156133829e-06, "loss": 0.0662, "step": 2225 }, { "epoch": 4.838709677419355, "grad_norm": 2.1598827838897705, "learning_rate": 8.699628252788105e-06, "loss": 0.0618, "step": 2250 }, { "epoch": 4.89247311827957, "grad_norm": 1.961423397064209, "learning_rate": 8.68104089219331e-06, "loss": 0.0687, "step": 2275 }, { "epoch": 4.946236559139785, "grad_norm": 1.5583302974700928, "learning_rate": 8.662453531598513e-06, "loss": 0.0587, "step": 2300 }, { "epoch": 5.0, "grad_norm": 1.7334260940551758, "learning_rate": 8.643866171003718e-06, "loss": 0.0639, "step": 2325 }, { "epoch": 5.053763440860215, "grad_norm": 1.385697364807129, "learning_rate": 8.625278810408923e-06, "loss": 0.0295, "step": 2350 }, { "epoch": 5.10752688172043, "grad_norm": 1.8390223979949951, "learning_rate": 8.606691449814128e-06, "loss": 0.0336, "step": 2375 }, { "epoch": 5.161290322580645, "grad_norm": 1.6100000143051147, "learning_rate": 8.588104089219331e-06, "loss": 0.0346, "step": 2400 }, { "epoch": 5.21505376344086, "grad_norm": 1.8762363195419312, "learning_rate": 8.569516728624536e-06, "loss": 0.0355, "step": 2425 }, { "epoch": 5.268817204301075, "grad_norm": 0.8988639116287231, "learning_rate": 8.55092936802974e-06, "loss": 0.038, "step": 2450 }, { "epoch": 5.32258064516129, "grad_norm": 1.372160792350769, "learning_rate": 8.532342007434945e-06, "loss": 0.0443, "step": 2475 }, { "epoch": 5.376344086021505, "grad_norm": 1.3820526599884033, "learning_rate": 8.51375464684015e-06, "loss": 0.0345, "step": 2500 }, { "epoch": 5.376344086021505, "eval_loss": 0.31700512766838074, "eval_runtime": 202.9264, "eval_samples_per_second": 4.662, "eval_steps_per_second": 0.586, "eval_wer": 14.902468336877137, "step": 2500 }, { "epoch": 5.43010752688172, "grad_norm": 1.8369241952896118, "learning_rate": 8.495167286245355e-06, "loss": 0.0362, "step": 2525 }, { "epoch": 5.483870967741936, "grad_norm": 1.735297441482544, "learning_rate": 8.476579925650558e-06, "loss": 0.0369, "step": 2550 }, { "epoch": 5.53763440860215, "grad_norm": 2.1869583129882812, "learning_rate": 8.457992565055763e-06, "loss": 0.0359, "step": 2575 }, { "epoch": 5.591397849462366, "grad_norm": 0.9142827987670898, "learning_rate": 8.439405204460968e-06, "loss": 0.0369, "step": 2600 }, { "epoch": 5.645161290322581, "grad_norm": 1.5425326824188232, "learning_rate": 8.420817843866171e-06, "loss": 0.0413, "step": 2625 }, { "epoch": 5.698924731182796, "grad_norm": 1.532554268836975, "learning_rate": 8.402230483271376e-06, "loss": 0.0455, "step": 2650 }, { "epoch": 5.752688172043011, "grad_norm": 1.7818132638931274, "learning_rate": 8.38364312267658e-06, "loss": 0.0351, "step": 2675 }, { "epoch": 5.806451612903226, "grad_norm": 0.8005560040473938, "learning_rate": 8.365055762081784e-06, "loss": 0.0446, "step": 2700 }, { "epoch": 5.860215053763441, "grad_norm": 1.37205171585083, "learning_rate": 8.34646840148699e-06, "loss": 0.0447, "step": 2725 }, { "epoch": 5.913978494623656, "grad_norm": 0.9380530714988708, "learning_rate": 8.327881040892194e-06, "loss": 0.0335, "step": 2750 }, { "epoch": 5.967741935483871, "grad_norm": 2.020190954208374, "learning_rate": 8.3092936802974e-06, "loss": 0.0354, "step": 2775 }, { "epoch": 6.021505376344086, "grad_norm": 0.7758223414421082, "learning_rate": 8.290706319702603e-06, "loss": 0.0245, "step": 2800 }, { "epoch": 6.075268817204301, "grad_norm": 1.127894639968872, "learning_rate": 8.272118959107808e-06, "loss": 0.0233, "step": 2825 }, { "epoch": 6.129032258064516, "grad_norm": 0.7980286478996277, "learning_rate": 8.253531598513011e-06, "loss": 0.0234, "step": 2850 }, { "epoch": 6.182795698924731, "grad_norm": 1.1685783863067627, "learning_rate": 8.234944237918216e-06, "loss": 0.0311, "step": 2875 }, { "epoch": 6.236559139784946, "grad_norm": 1.3557145595550537, "learning_rate": 8.216356877323421e-06, "loss": 0.0219, "step": 2900 }, { "epoch": 6.290322580645161, "grad_norm": 0.7474266290664673, "learning_rate": 8.197769516728626e-06, "loss": 0.0244, "step": 2925 }, { "epoch": 6.344086021505376, "grad_norm": 0.9074203968048096, "learning_rate": 8.179182156133829e-06, "loss": 0.0195, "step": 2950 }, { "epoch": 6.397849462365591, "grad_norm": 1.0333547592163086, "learning_rate": 8.160594795539034e-06, "loss": 0.0284, "step": 2975 }, { "epoch": 6.451612903225806, "grad_norm": 1.9100396633148193, "learning_rate": 8.142007434944239e-06, "loss": 0.0186, "step": 3000 }, { "epoch": 6.451612903225806, "eval_loss": 0.3364327549934387, "eval_runtime": 203.4115, "eval_samples_per_second": 4.651, "eval_steps_per_second": 0.585, "eval_wer": 14.77304243320699, "step": 3000 }, { "epoch": 6.505376344086022, "grad_norm": 0.6991943120956421, "learning_rate": 8.123420074349442e-06, "loss": 0.0201, "step": 3025 }, { "epoch": 6.559139784946236, "grad_norm": 2.668675422668457, "learning_rate": 8.104832713754647e-06, "loss": 0.0257, "step": 3050 }, { "epoch": 6.612903225806452, "grad_norm": 1.0620919466018677, "learning_rate": 8.086245353159852e-06, "loss": 0.0275, "step": 3075 }, { "epoch": 6.666666666666667, "grad_norm": 1.9009549617767334, "learning_rate": 8.067657992565057e-06, "loss": 0.0215, "step": 3100 }, { "epoch": 6.720430107526882, "grad_norm": 0.8860704302787781, "learning_rate": 8.04907063197026e-06, "loss": 0.0199, "step": 3125 }, { "epoch": 6.774193548387097, "grad_norm": 1.2130790948867798, "learning_rate": 8.030483271375466e-06, "loss": 0.0237, "step": 3150 }, { "epoch": 6.827956989247312, "grad_norm": 1.5909550189971924, "learning_rate": 8.011895910780669e-06, "loss": 0.0269, "step": 3175 }, { "epoch": 6.881720430107527, "grad_norm": 1.6755486726760864, "learning_rate": 7.993308550185874e-06, "loss": 0.0339, "step": 3200 }, { "epoch": 6.935483870967742, "grad_norm": 1.2641445398330688, "learning_rate": 7.974721189591079e-06, "loss": 0.0204, "step": 3225 }, { "epoch": 6.989247311827957, "grad_norm": 1.7877347469329834, "learning_rate": 7.956133828996284e-06, "loss": 0.0266, "step": 3250 }, { "epoch": 7.043010752688172, "grad_norm": 0.9837028384208679, "learning_rate": 7.937546468401489e-06, "loss": 0.02, "step": 3275 }, { "epoch": 7.096774193548387, "grad_norm": 1.2097680568695068, "learning_rate": 7.918959107806692e-06, "loss": 0.0126, "step": 3300 }, { "epoch": 7.150537634408602, "grad_norm": 1.447039246559143, "learning_rate": 7.900371747211897e-06, "loss": 0.018, "step": 3325 }, { "epoch": 7.204301075268817, "grad_norm": 0.8316716551780701, "learning_rate": 7.8817843866171e-06, "loss": 0.0178, "step": 3350 }, { "epoch": 7.258064516129032, "grad_norm": 0.9670646786689758, "learning_rate": 7.863197026022305e-06, "loss": 0.0122, "step": 3375 }, { "epoch": 7.311827956989247, "grad_norm": 1.4154245853424072, "learning_rate": 7.84460966542751e-06, "loss": 0.0171, "step": 3400 }, { "epoch": 7.365591397849462, "grad_norm": 1.3647488355636597, "learning_rate": 7.826022304832714e-06, "loss": 0.0151, "step": 3425 }, { "epoch": 7.419354838709677, "grad_norm": 1.548120141029358, "learning_rate": 7.807434944237919e-06, "loss": 0.0149, "step": 3450 }, { "epoch": 7.473118279569892, "grad_norm": 1.6091225147247314, "learning_rate": 7.788847583643124e-06, "loss": 0.0168, "step": 3475 }, { "epoch": 7.526881720430108, "grad_norm": 1.1116617918014526, "learning_rate": 7.770260223048329e-06, "loss": 0.0161, "step": 3500 }, { "epoch": 7.526881720430108, "eval_loss": 0.34663301706314087, "eval_runtime": 202.0433, "eval_samples_per_second": 4.682, "eval_steps_per_second": 0.589, "eval_wer": 14.551169455486734, "step": 3500 }, { "epoch": 7.580645161290323, "grad_norm": 1.468459129333496, "learning_rate": 7.751672862453532e-06, "loss": 0.0193, "step": 3525 }, { "epoch": 7.634408602150538, "grad_norm": 1.2769989967346191, "learning_rate": 7.733085501858737e-06, "loss": 0.0184, "step": 3550 }, { "epoch": 7.688172043010753, "grad_norm": 1.0488286018371582, "learning_rate": 7.71449814126394e-06, "loss": 0.019, "step": 3575 }, { "epoch": 7.741935483870968, "grad_norm": 0.8325207829475403, "learning_rate": 7.695910780669145e-06, "loss": 0.0252, "step": 3600 }, { "epoch": 7.795698924731183, "grad_norm": 0.7656351923942566, "learning_rate": 7.67732342007435e-06, "loss": 0.0155, "step": 3625 }, { "epoch": 7.849462365591398, "grad_norm": 0.9184199571609497, "learning_rate": 7.658736059479555e-06, "loss": 0.028, "step": 3650 }, { "epoch": 7.903225806451613, "grad_norm": 1.2135573625564575, "learning_rate": 7.64014869888476e-06, "loss": 0.0178, "step": 3675 }, { "epoch": 7.956989247311828, "grad_norm": 0.8172153830528259, "learning_rate": 7.621561338289963e-06, "loss": 0.0254, "step": 3700 }, { "epoch": 8.010752688172044, "grad_norm": 0.431659072637558, "learning_rate": 7.602973977695168e-06, "loss": 0.0145, "step": 3725 }, { "epoch": 8.064516129032258, "grad_norm": 0.9533307552337646, "learning_rate": 7.584386617100372e-06, "loss": 0.0165, "step": 3750 }, { "epoch": 8.118279569892474, "grad_norm": 0.7198922038078308, "learning_rate": 7.565799256505577e-06, "loss": 0.0107, "step": 3775 }, { "epoch": 8.172043010752688, "grad_norm": 0.8582783937454224, "learning_rate": 7.547211895910781e-06, "loss": 0.0123, "step": 3800 }, { "epoch": 8.225806451612904, "grad_norm": 1.0113513469696045, "learning_rate": 7.528624535315986e-06, "loss": 0.014, "step": 3825 }, { "epoch": 8.279569892473118, "grad_norm": 0.7275539040565491, "learning_rate": 7.51003717472119e-06, "loss": 0.0115, "step": 3850 }, { "epoch": 8.333333333333334, "grad_norm": 0.4298296570777893, "learning_rate": 7.491449814126395e-06, "loss": 0.0104, "step": 3875 }, { "epoch": 8.387096774193548, "grad_norm": 0.7536816596984863, "learning_rate": 7.4728624535316e-06, "loss": 0.0132, "step": 3900 }, { "epoch": 8.440860215053764, "grad_norm": 1.0941580533981323, "learning_rate": 7.454275092936804e-06, "loss": 0.012, "step": 3925 }, { "epoch": 8.494623655913978, "grad_norm": 1.0508357286453247, "learning_rate": 7.435687732342009e-06, "loss": 0.0135, "step": 3950 }, { "epoch": 8.548387096774194, "grad_norm": 0.6876735687255859, "learning_rate": 7.417100371747212e-06, "loss": 0.0156, "step": 3975 }, { "epoch": 8.602150537634408, "grad_norm": 0.8525980114936829, "learning_rate": 7.398513011152417e-06, "loss": 0.0106, "step": 4000 }, { "epoch": 8.602150537634408, "eval_loss": 0.3538697063922882, "eval_runtime": 202.0959, "eval_samples_per_second": 4.681, "eval_steps_per_second": 0.589, "eval_wer": 14.338541185171488, "step": 4000 }, { "epoch": 8.655913978494624, "grad_norm": 0.5362399220466614, "learning_rate": 7.379925650557621e-06, "loss": 0.0123, "step": 4025 }, { "epoch": 8.709677419354838, "grad_norm": 0.8804866671562195, "learning_rate": 7.361338289962826e-06, "loss": 0.0171, "step": 4050 }, { "epoch": 8.763440860215054, "grad_norm": 0.8643043041229248, "learning_rate": 7.34275092936803e-06, "loss": 0.0144, "step": 4075 }, { "epoch": 8.817204301075268, "grad_norm": 0.8704060912132263, "learning_rate": 7.3241635687732344e-06, "loss": 0.0121, "step": 4100 }, { "epoch": 8.870967741935484, "grad_norm": 0.6113823056221008, "learning_rate": 7.305576208178439e-06, "loss": 0.0154, "step": 4125 }, { "epoch": 8.924731182795698, "grad_norm": 1.1631172895431519, "learning_rate": 7.2869888475836436e-06, "loss": 0.0109, "step": 4150 }, { "epoch": 8.978494623655914, "grad_norm": 0.8042282462120056, "learning_rate": 7.2684014869888485e-06, "loss": 0.0158, "step": 4175 }, { "epoch": 9.03225806451613, "grad_norm": 0.18347720801830292, "learning_rate": 7.249814126394053e-06, "loss": 0.0132, "step": 4200 }, { "epoch": 9.086021505376344, "grad_norm": 0.7228168845176697, "learning_rate": 7.231226765799258e-06, "loss": 0.0099, "step": 4225 }, { "epoch": 9.13978494623656, "grad_norm": 0.35777589678764343, "learning_rate": 7.212639405204461e-06, "loss": 0.0129, "step": 4250 }, { "epoch": 9.193548387096774, "grad_norm": 0.2701317071914673, "learning_rate": 7.194052044609666e-06, "loss": 0.0095, "step": 4275 }, { "epoch": 9.24731182795699, "grad_norm": 1.6921519041061401, "learning_rate": 7.17546468401487e-06, "loss": 0.0082, "step": 4300 }, { "epoch": 9.301075268817204, "grad_norm": 0.27636006474494934, "learning_rate": 7.156877323420075e-06, "loss": 0.0075, "step": 4325 }, { "epoch": 9.35483870967742, "grad_norm": 0.7335753440856934, "learning_rate": 7.138289962825279e-06, "loss": 0.0102, "step": 4350 }, { "epoch": 9.408602150537634, "grad_norm": 0.9723600149154663, "learning_rate": 7.119702602230484e-06, "loss": 0.0104, "step": 4375 }, { "epoch": 9.46236559139785, "grad_norm": 0.2777242660522461, "learning_rate": 7.101115241635689e-06, "loss": 0.0138, "step": 4400 }, { "epoch": 9.516129032258064, "grad_norm": 0.42475125193595886, "learning_rate": 7.082527881040892e-06, "loss": 0.0074, "step": 4425 }, { "epoch": 9.56989247311828, "grad_norm": 0.6463161110877991, "learning_rate": 7.063940520446097e-06, "loss": 0.0105, "step": 4450 }, { "epoch": 9.623655913978494, "grad_norm": 1.2284172773361206, "learning_rate": 7.0453531598513015e-06, "loss": 0.0086, "step": 4475 }, { "epoch": 9.67741935483871, "grad_norm": 0.6127483248710632, "learning_rate": 7.0267657992565065e-06, "loss": 0.013, "step": 4500 }, { "epoch": 9.67741935483871, "eval_loss": 0.3432445228099823, "eval_runtime": 202.5879, "eval_samples_per_second": 4.67, "eval_steps_per_second": 0.587, "eval_wer": 14.883978922067117, "step": 4500 }, { "epoch": 9.731182795698924, "grad_norm": 1.6641124486923218, "learning_rate": 7.008178438661711e-06, "loss": 0.0117, "step": 4525 }, { "epoch": 9.78494623655914, "grad_norm": 0.49032703042030334, "learning_rate": 6.989591078066915e-06, "loss": 0.0113, "step": 4550 }, { "epoch": 9.838709677419354, "grad_norm": 1.0603209733963013, "learning_rate": 6.971003717472119e-06, "loss": 0.0093, "step": 4575 }, { "epoch": 9.89247311827957, "grad_norm": 1.1902903318405151, "learning_rate": 6.952416356877324e-06, "loss": 0.009, "step": 4600 }, { "epoch": 9.946236559139784, "grad_norm": 0.4575275182723999, "learning_rate": 6.933828996282529e-06, "loss": 0.0103, "step": 4625 }, { "epoch": 10.0, "grad_norm": 2.514280080795288, "learning_rate": 6.915241635687733e-06, "loss": 0.0146, "step": 4650 }, { "epoch": 10.053763440860216, "grad_norm": 0.28566455841064453, "learning_rate": 6.896654275092938e-06, "loss": 0.008, "step": 4675 }, { "epoch": 10.10752688172043, "grad_norm": 0.3524170219898224, "learning_rate": 6.878066914498141e-06, "loss": 0.0057, "step": 4700 }, { "epoch": 10.161290322580646, "grad_norm": 1.7274552583694458, "learning_rate": 6.859479553903346e-06, "loss": 0.0083, "step": 4725 }, { "epoch": 10.21505376344086, "grad_norm": 0.31285515427589417, "learning_rate": 6.84089219330855e-06, "loss": 0.0071, "step": 4750 }, { "epoch": 10.268817204301076, "grad_norm": 0.8492361307144165, "learning_rate": 6.822304832713755e-06, "loss": 0.0086, "step": 4775 }, { "epoch": 10.32258064516129, "grad_norm": 0.39797672629356384, "learning_rate": 6.8037174721189595e-06, "loss": 0.0088, "step": 4800 }, { "epoch": 10.376344086021506, "grad_norm": 0.410177618265152, "learning_rate": 6.7851301115241644e-06, "loss": 0.0085, "step": 4825 }, { "epoch": 10.43010752688172, "grad_norm": 0.45091158151626587, "learning_rate": 6.766542750929369e-06, "loss": 0.0062, "step": 4850 }, { "epoch": 10.483870967741936, "grad_norm": 1.3592181205749512, "learning_rate": 6.747955390334573e-06, "loss": 0.01, "step": 4875 }, { "epoch": 10.53763440860215, "grad_norm": 0.4976150691509247, "learning_rate": 6.729368029739778e-06, "loss": 0.0069, "step": 4900 }, { "epoch": 10.591397849462366, "grad_norm": 0.14256972074508667, "learning_rate": 6.710780669144982e-06, "loss": 0.0064, "step": 4925 }, { "epoch": 10.64516129032258, "grad_norm": 0.7307581901550293, "learning_rate": 6.692193308550187e-06, "loss": 0.0075, "step": 4950 }, { "epoch": 10.698924731182796, "grad_norm": 0.8009108901023865, "learning_rate": 6.673605947955391e-06, "loss": 0.0071, "step": 4975 }, { "epoch": 10.75268817204301, "grad_norm": 0.7494556307792664, "learning_rate": 6.655018587360595e-06, "loss": 0.0092, "step": 5000 }, { "epoch": 10.75268817204301, "eval_loss": 0.3648987114429474, "eval_runtime": 202.4649, "eval_samples_per_second": 4.672, "eval_steps_per_second": 0.588, "eval_wer": 14.218359988906352, "step": 5000 }, { "epoch": 10.806451612903226, "grad_norm": 1.12769615650177, "learning_rate": 6.636431226765799e-06, "loss": 0.0091, "step": 5025 }, { "epoch": 10.86021505376344, "grad_norm": 0.7359474897384644, "learning_rate": 6.617843866171004e-06, "loss": 0.0112, "step": 5050 }, { "epoch": 10.913978494623656, "grad_norm": 0.4451664090156555, "learning_rate": 6.599256505576209e-06, "loss": 0.0098, "step": 5075 }, { "epoch": 10.967741935483872, "grad_norm": 0.364681214094162, "learning_rate": 6.580669144981413e-06, "loss": 0.0087, "step": 5100 }, { "epoch": 11.021505376344086, "grad_norm": 0.416103720664978, "learning_rate": 6.562081784386618e-06, "loss": 0.0072, "step": 5125 }, { "epoch": 11.075268817204302, "grad_norm": 0.2710916996002197, "learning_rate": 6.5434944237918215e-06, "loss": 0.0063, "step": 5150 }, { "epoch": 11.129032258064516, "grad_norm": 1.4234521389007568, "learning_rate": 6.5249070631970265e-06, "loss": 0.0062, "step": 5175 }, { "epoch": 11.182795698924732, "grad_norm": 0.800237238407135, "learning_rate": 6.506319702602231e-06, "loss": 0.0075, "step": 5200 }, { "epoch": 11.236559139784946, "grad_norm": 0.4724205732345581, "learning_rate": 6.487732342007436e-06, "loss": 0.0053, "step": 5225 }, { "epoch": 11.290322580645162, "grad_norm": 0.12521684169769287, "learning_rate": 6.46914498141264e-06, "loss": 0.0079, "step": 5250 }, { "epoch": 11.344086021505376, "grad_norm": 0.2039920538663864, "learning_rate": 6.450557620817845e-06, "loss": 0.0069, "step": 5275 }, { "epoch": 11.397849462365592, "grad_norm": 1.678312063217163, "learning_rate": 6.43197026022305e-06, "loss": 0.008, "step": 5300 }, { "epoch": 11.451612903225806, "grad_norm": 0.8350504636764526, "learning_rate": 6.413382899628253e-06, "loss": 0.0069, "step": 5325 }, { "epoch": 11.505376344086022, "grad_norm": 0.6541998982429504, "learning_rate": 6.394795539033458e-06, "loss": 0.009, "step": 5350 }, { "epoch": 11.559139784946236, "grad_norm": 1.2869340181350708, "learning_rate": 6.376208178438662e-06, "loss": 0.0051, "step": 5375 }, { "epoch": 11.612903225806452, "grad_norm": 0.987830638885498, "learning_rate": 6.357620817843867e-06, "loss": 0.0072, "step": 5400 }, { "epoch": 11.666666666666666, "grad_norm": 0.4543008804321289, "learning_rate": 6.339033457249071e-06, "loss": 0.0065, "step": 5425 }, { "epoch": 11.720430107526882, "grad_norm": 0.866301953792572, "learning_rate": 6.320446096654275e-06, "loss": 0.0071, "step": 5450 }, { "epoch": 11.774193548387096, "grad_norm": 1.1665536165237427, "learning_rate": 6.3018587360594795e-06, "loss": 0.0089, "step": 5475 }, { "epoch": 11.827956989247312, "grad_norm": 0.5745353102684021, "learning_rate": 6.2832713754646845e-06, "loss": 0.0086, "step": 5500 }, { "epoch": 11.827956989247312, "eval_loss": 0.3715842068195343, "eval_runtime": 202.8772, "eval_samples_per_second": 4.663, "eval_steps_per_second": 0.587, "eval_wer": 15.586576684847925, "step": 5500 }, { "epoch": 11.881720430107526, "grad_norm": 0.7137680053710938, "learning_rate": 6.2646840148698895e-06, "loss": 0.0063, "step": 5525 }, { "epoch": 11.935483870967742, "grad_norm": 1.8331615924835205, "learning_rate": 6.246096654275094e-06, "loss": 0.0052, "step": 5550 }, { "epoch": 11.989247311827956, "grad_norm": 1.306740403175354, "learning_rate": 6.2275092936802986e-06, "loss": 0.0084, "step": 5575 }, { "epoch": 12.043010752688172, "grad_norm": 0.4689745008945465, "learning_rate": 6.208921933085502e-06, "loss": 0.0054, "step": 5600 }, { "epoch": 12.096774193548388, "grad_norm": 0.8853312134742737, "learning_rate": 6.190334572490707e-06, "loss": 0.0038, "step": 5625 }, { "epoch": 12.150537634408602, "grad_norm": 0.18394626677036285, "learning_rate": 6.171747211895911e-06, "loss": 0.0058, "step": 5650 }, { "epoch": 12.204301075268818, "grad_norm": 0.35906341671943665, "learning_rate": 6.153159851301116e-06, "loss": 0.0048, "step": 5675 }, { "epoch": 12.258064516129032, "grad_norm": 0.0934007316827774, "learning_rate": 6.13457249070632e-06, "loss": 0.0056, "step": 5700 }, { "epoch": 12.311827956989248, "grad_norm": 0.6383976340293884, "learning_rate": 6.115985130111525e-06, "loss": 0.0049, "step": 5725 }, { "epoch": 12.365591397849462, "grad_norm": 0.3622893989086151, "learning_rate": 6.097397769516728e-06, "loss": 0.0064, "step": 5750 }, { "epoch": 12.419354838709678, "grad_norm": 0.21196268498897552, "learning_rate": 6.078810408921933e-06, "loss": 0.0064, "step": 5775 }, { "epoch": 12.473118279569892, "grad_norm": 0.3381194472312927, "learning_rate": 6.060223048327138e-06, "loss": 0.0054, "step": 5800 }, { "epoch": 12.526881720430108, "grad_norm": 1.9906443357467651, "learning_rate": 6.041635687732342e-06, "loss": 0.0061, "step": 5825 }, { "epoch": 12.580645161290322, "grad_norm": 0.3197634220123291, "learning_rate": 6.023048327137547e-06, "loss": 0.0053, "step": 5850 }, { "epoch": 12.634408602150538, "grad_norm": 0.18474631011486053, "learning_rate": 6.0044609665427515e-06, "loss": 0.0065, "step": 5875 }, { "epoch": 12.688172043010752, "grad_norm": 0.8498281240463257, "learning_rate": 5.985873605947956e-06, "loss": 0.004, "step": 5900 }, { "epoch": 12.741935483870968, "grad_norm": 0.4391692578792572, "learning_rate": 5.96728624535316e-06, "loss": 0.006, "step": 5925 }, { "epoch": 12.795698924731182, "grad_norm": 0.6688899993896484, "learning_rate": 5.948698884758365e-06, "loss": 0.0053, "step": 5950 }, { "epoch": 12.849462365591398, "grad_norm": 0.9713292121887207, "learning_rate": 5.930111524163569e-06, "loss": 0.0072, "step": 5975 }, { "epoch": 12.903225806451612, "grad_norm": 0.8484262228012085, "learning_rate": 5.911524163568774e-06, "loss": 0.0068, "step": 6000 }, { "epoch": 12.903225806451612, "eval_loss": 0.37204521894454956, "eval_runtime": 201.9047, "eval_samples_per_second": 4.685, "eval_steps_per_second": 0.589, "eval_wer": 14.588148285106776, "step": 6000 }, { "epoch": 12.956989247311828, "grad_norm": 1.0728837251663208, "learning_rate": 5.892936802973979e-06, "loss": 0.0084, "step": 6025 }, { "epoch": 13.010752688172044, "grad_norm": 0.4754142761230469, "learning_rate": 5.874349442379182e-06, "loss": 0.0087, "step": 6050 }, { "epoch": 13.064516129032258, "grad_norm": 0.3025985062122345, "learning_rate": 5.855762081784387e-06, "loss": 0.0063, "step": 6075 }, { "epoch": 13.118279569892474, "grad_norm": 0.3236280083656311, "learning_rate": 5.837174721189591e-06, "loss": 0.0055, "step": 6100 }, { "epoch": 13.172043010752688, "grad_norm": 0.508432924747467, "learning_rate": 5.818587360594796e-06, "loss": 0.0053, "step": 6125 }, { "epoch": 13.225806451612904, "grad_norm": 1.6511017084121704, "learning_rate": 5.8e-06, "loss": 0.0046, "step": 6150 }, { "epoch": 13.279569892473118, "grad_norm": 0.142063707113266, "learning_rate": 5.781412639405205e-06, "loss": 0.0051, "step": 6175 }, { "epoch": 13.333333333333334, "grad_norm": 0.11750756949186325, "learning_rate": 5.762825278810409e-06, "loss": 0.0048, "step": 6200 }, { "epoch": 13.387096774193548, "grad_norm": 0.8060685396194458, "learning_rate": 5.744237918215614e-06, "loss": 0.0057, "step": 6225 }, { "epoch": 13.440860215053764, "grad_norm": 0.452999472618103, "learning_rate": 5.725650557620819e-06, "loss": 0.0059, "step": 6250 }, { "epoch": 13.494623655913978, "grad_norm": 1.3556956052780151, "learning_rate": 5.707063197026023e-06, "loss": 0.0049, "step": 6275 }, { "epoch": 13.548387096774194, "grad_norm": 0.1406233310699463, "learning_rate": 5.688475836431228e-06, "loss": 0.0041, "step": 6300 }, { "epoch": 13.602150537634408, "grad_norm": 0.6670034527778625, "learning_rate": 5.669888475836432e-06, "loss": 0.0057, "step": 6325 }, { "epoch": 13.655913978494624, "grad_norm": 1.7057311534881592, "learning_rate": 5.651301115241636e-06, "loss": 0.0056, "step": 6350 }, { "epoch": 13.709677419354838, "grad_norm": 0.7842967510223389, "learning_rate": 5.63271375464684e-06, "loss": 0.0062, "step": 6375 }, { "epoch": 13.763440860215054, "grad_norm": 0.7574280500411987, "learning_rate": 5.614126394052045e-06, "loss": 0.006, "step": 6400 }, { "epoch": 13.817204301075268, "grad_norm": 1.1247819662094116, "learning_rate": 5.595539033457249e-06, "loss": 0.0091, "step": 6425 }, { "epoch": 13.870967741935484, "grad_norm": 0.5980854034423828, "learning_rate": 5.576951672862454e-06, "loss": 0.005, "step": 6450 }, { "epoch": 13.924731182795698, "grad_norm": 0.6640056371688843, "learning_rate": 5.558364312267659e-06, "loss": 0.0061, "step": 6475 }, { "epoch": 13.978494623655914, "grad_norm": 0.6742274165153503, "learning_rate": 5.5397769516728625e-06, "loss": 0.0056, "step": 6500 }, { "epoch": 13.978494623655914, "eval_loss": 0.37743857502937317, "eval_runtime": 202.8285, "eval_samples_per_second": 4.664, "eval_steps_per_second": 0.587, "eval_wer": 14.819265970232042, "step": 6500 }, { "epoch": 14.03225806451613, "grad_norm": 0.1915878802537918, "learning_rate": 5.5211895910780674e-06, "loss": 0.0045, "step": 6525 }, { "epoch": 14.086021505376344, "grad_norm": 0.09815018624067307, "learning_rate": 5.5026022304832716e-06, "loss": 0.0057, "step": 6550 }, { "epoch": 14.13978494623656, "grad_norm": 0.04359288886189461, "learning_rate": 5.4840148698884765e-06, "loss": 0.005, "step": 6575 }, { "epoch": 14.193548387096774, "grad_norm": 0.28134745359420776, "learning_rate": 5.465427509293681e-06, "loss": 0.0029, "step": 6600 }, { "epoch": 14.24731182795699, "grad_norm": 0.6944845914840698, "learning_rate": 5.446840148698886e-06, "loss": 0.0056, "step": 6625 }, { "epoch": 14.301075268817204, "grad_norm": 1.5637778043746948, "learning_rate": 5.428252788104089e-06, "loss": 0.0037, "step": 6650 }, { "epoch": 14.35483870967742, "grad_norm": 0.49470245838165283, "learning_rate": 5.409665427509294e-06, "loss": 0.0064, "step": 6675 }, { "epoch": 14.408602150537634, "grad_norm": 0.055743150413036346, "learning_rate": 5.391078066914499e-06, "loss": 0.0033, "step": 6700 }, { "epoch": 14.46236559139785, "grad_norm": 0.20047767460346222, "learning_rate": 5.372490706319703e-06, "loss": 0.0047, "step": 6725 }, { "epoch": 14.516129032258064, "grad_norm": 0.36383625864982605, "learning_rate": 5.353903345724908e-06, "loss": 0.0037, "step": 6750 }, { "epoch": 14.56989247311828, "grad_norm": 0.07147414237260818, "learning_rate": 5.335315985130112e-06, "loss": 0.0026, "step": 6775 }, { "epoch": 14.623655913978494, "grad_norm": 0.28435853123664856, "learning_rate": 5.316728624535316e-06, "loss": 0.0043, "step": 6800 }, { "epoch": 14.67741935483871, "grad_norm": 0.27296435832977295, "learning_rate": 5.29814126394052e-06, "loss": 0.0068, "step": 6825 }, { "epoch": 14.731182795698924, "grad_norm": 0.9266132116317749, "learning_rate": 5.279553903345725e-06, "loss": 0.0065, "step": 6850 }, { "epoch": 14.78494623655914, "grad_norm": 0.4447098970413208, "learning_rate": 5.2609665427509295e-06, "loss": 0.0051, "step": 6875 }, { "epoch": 14.838709677419354, "grad_norm": 0.6710329055786133, "learning_rate": 5.2423791821561345e-06, "loss": 0.0044, "step": 6900 }, { "epoch": 14.89247311827957, "grad_norm": 0.5553959012031555, "learning_rate": 5.2237918215613395e-06, "loss": 0.0062, "step": 6925 }, { "epoch": 14.946236559139784, "grad_norm": 0.867906928062439, "learning_rate": 5.205204460966543e-06, "loss": 0.0075, "step": 6950 }, { "epoch": 15.0, "grad_norm": 0.5631603002548218, "learning_rate": 5.186617100371748e-06, "loss": 0.0056, "step": 6975 }, { "epoch": 15.053763440860216, "grad_norm": 0.16968116164207458, "learning_rate": 5.168029739776952e-06, "loss": 0.0032, "step": 7000 }, { "epoch": 15.053763440860216, "eval_loss": 0.3897517919540405, "eval_runtime": 202.1138, "eval_samples_per_second": 4.681, "eval_steps_per_second": 0.589, "eval_wer": 14.597392992511788, "step": 7000 }, { "epoch": 15.10752688172043, "grad_norm": 0.43974125385284424, "learning_rate": 5.149442379182157e-06, "loss": 0.005, "step": 7025 }, { "epoch": 15.161290322580646, "grad_norm": 0.1777154952287674, "learning_rate": 5.130855018587361e-06, "loss": 0.0023, "step": 7050 }, { "epoch": 15.21505376344086, "grad_norm": 0.0768185630440712, "learning_rate": 5.112267657992566e-06, "loss": 0.0045, "step": 7075 }, { "epoch": 15.268817204301076, "grad_norm": 0.04717967286705971, "learning_rate": 5.093680297397769e-06, "loss": 0.0043, "step": 7100 }, { "epoch": 15.32258064516129, "grad_norm": 0.25022652745246887, "learning_rate": 5.075092936802974e-06, "loss": 0.003, "step": 7125 }, { "epoch": 15.376344086021506, "grad_norm": 0.07506144791841507, "learning_rate": 5.056505576208179e-06, "loss": 0.0043, "step": 7150 }, { "epoch": 15.43010752688172, "grad_norm": 0.850889265537262, "learning_rate": 5.037918215613383e-06, "loss": 0.0055, "step": 7175 }, { "epoch": 15.483870967741936, "grad_norm": 0.5556985139846802, "learning_rate": 5.019330855018588e-06, "loss": 0.0051, "step": 7200 }, { "epoch": 15.53763440860215, "grad_norm": 0.0634092465043068, "learning_rate": 5.0007434944237924e-06, "loss": 0.003, "step": 7225 }, { "epoch": 15.591397849462366, "grad_norm": 0.2446642518043518, "learning_rate": 4.982156133828997e-06, "loss": 0.0047, "step": 7250 }, { "epoch": 15.64516129032258, "grad_norm": 1.191821575164795, "learning_rate": 4.9635687732342016e-06, "loss": 0.0045, "step": 7275 }, { "epoch": 15.698924731182796, "grad_norm": 0.4117543399333954, "learning_rate": 4.944981412639406e-06, "loss": 0.0062, "step": 7300 }, { "epoch": 15.75268817204301, "grad_norm": 0.8248342275619507, "learning_rate": 4.92639405204461e-06, "loss": 0.0043, "step": 7325 }, { "epoch": 15.806451612903226, "grad_norm": 0.29120975732803345, "learning_rate": 4.907806691449815e-06, "loss": 0.0061, "step": 7350 }, { "epoch": 15.86021505376344, "grad_norm": 0.0745767205953598, "learning_rate": 4.889219330855019e-06, "loss": 0.0048, "step": 7375 }, { "epoch": 15.913978494623656, "grad_norm": 0.10059848427772522, "learning_rate": 4.870631970260223e-06, "loss": 0.004, "step": 7400 }, { "epoch": 15.967741935483872, "grad_norm": 0.11489495635032654, "learning_rate": 4.852044609665428e-06, "loss": 0.004, "step": 7425 }, { "epoch": 16.021505376344088, "grad_norm": 0.04976237937808037, "learning_rate": 4.833457249070632e-06, "loss": 0.0036, "step": 7450 }, { "epoch": 16.0752688172043, "grad_norm": 0.13619866967201233, "learning_rate": 4.814869888475836e-06, "loss": 0.0031, "step": 7475 }, { "epoch": 16.129032258064516, "grad_norm": 0.35101068019866943, "learning_rate": 4.796282527881041e-06, "loss": 0.0037, "step": 7500 }, { "epoch": 16.129032258064516, "eval_loss": 0.390476793050766, "eval_runtime": 202.418, "eval_samples_per_second": 4.673, "eval_steps_per_second": 0.588, "eval_wer": 14.708329481371916, "step": 7500 }, { "epoch": 16.182795698924732, "grad_norm": 0.315719872713089, "learning_rate": 4.777695167286246e-06, "loss": 0.003, "step": 7525 }, { "epoch": 16.236559139784948, "grad_norm": 1.0123934745788574, "learning_rate": 4.75910780669145e-06, "loss": 0.0035, "step": 7550 }, { "epoch": 16.29032258064516, "grad_norm": 0.517242968082428, "learning_rate": 4.7405204460966545e-06, "loss": 0.003, "step": 7575 }, { "epoch": 16.344086021505376, "grad_norm": 0.06284263730049133, "learning_rate": 4.7219330855018595e-06, "loss": 0.0024, "step": 7600 }, { "epoch": 16.397849462365592, "grad_norm": 0.02318274788558483, "learning_rate": 4.703345724907064e-06, "loss": 0.0039, "step": 7625 }, { "epoch": 16.451612903225808, "grad_norm": 0.2524121105670929, "learning_rate": 4.684758364312268e-06, "loss": 0.0041, "step": 7650 }, { "epoch": 16.50537634408602, "grad_norm": 0.047711629420518875, "learning_rate": 4.666171003717473e-06, "loss": 0.0041, "step": 7675 }, { "epoch": 16.559139784946236, "grad_norm": 0.32103028893470764, "learning_rate": 4.647583643122677e-06, "loss": 0.0061, "step": 7700 }, { "epoch": 16.612903225806452, "grad_norm": 1.1334346532821655, "learning_rate": 4.628996282527882e-06, "loss": 0.0043, "step": 7725 }, { "epoch": 16.666666666666668, "grad_norm": 0.11029840260744095, "learning_rate": 4.610408921933086e-06, "loss": 0.0015, "step": 7750 }, { "epoch": 16.72043010752688, "grad_norm": 0.03998972475528717, "learning_rate": 4.59182156133829e-06, "loss": 0.0035, "step": 7775 }, { "epoch": 16.774193548387096, "grad_norm": 0.9175609946250916, "learning_rate": 4.573234200743495e-06, "loss": 0.0037, "step": 7800 }, { "epoch": 16.827956989247312, "grad_norm": 0.055633947253227234, "learning_rate": 4.554646840148699e-06, "loss": 0.0035, "step": 7825 }, { "epoch": 16.881720430107528, "grad_norm": 0.28876572847366333, "learning_rate": 4.536059479553903e-06, "loss": 0.0036, "step": 7850 }, { "epoch": 16.93548387096774, "grad_norm": 0.09186781197786331, "learning_rate": 4.517472118959108e-06, "loss": 0.0034, "step": 7875 }, { "epoch": 16.989247311827956, "grad_norm": 0.024787306785583496, "learning_rate": 4.4988847583643125e-06, "loss": 0.0035, "step": 7900 }, { "epoch": 17.043010752688172, "grad_norm": 0.30542510747909546, "learning_rate": 4.480297397769517e-06, "loss": 0.0027, "step": 7925 }, { "epoch": 17.096774193548388, "grad_norm": 0.20729881525039673, "learning_rate": 4.461710037174722e-06, "loss": 0.0016, "step": 7950 }, { "epoch": 17.150537634408604, "grad_norm": 0.17623752355575562, "learning_rate": 4.4431226765799266e-06, "loss": 0.003, "step": 7975 }, { "epoch": 17.204301075268816, "grad_norm": 1.2774063348770142, "learning_rate": 4.424535315985131e-06, "loss": 0.0041, "step": 8000 }, { "epoch": 17.204301075268816, "eval_loss": 0.38328301906585693, "eval_runtime": 202.8745, "eval_samples_per_second": 4.663, "eval_steps_per_second": 0.587, "eval_wer": 14.440232966626606, "step": 8000 }, { "epoch": 17.258064516129032, "grad_norm": 0.24027810990810394, "learning_rate": 4.405947955390335e-06, "loss": 0.0042, "step": 8025 }, { "epoch": 17.311827956989248, "grad_norm": 0.6575544476509094, "learning_rate": 4.38736059479554e-06, "loss": 0.0033, "step": 8050 }, { "epoch": 17.365591397849464, "grad_norm": 0.7652745842933655, "learning_rate": 4.368773234200744e-06, "loss": 0.0025, "step": 8075 }, { "epoch": 17.419354838709676, "grad_norm": 1.0893921852111816, "learning_rate": 4.350185873605948e-06, "loss": 0.0044, "step": 8100 }, { "epoch": 17.473118279569892, "grad_norm": 0.381245881319046, "learning_rate": 4.331598513011153e-06, "loss": 0.0053, "step": 8125 }, { "epoch": 17.526881720430108, "grad_norm": 0.6958642601966858, "learning_rate": 4.313011152416357e-06, "loss": 0.003, "step": 8150 }, { "epoch": 17.580645161290324, "grad_norm": 0.3542903661727905, "learning_rate": 4.294423791821561e-06, "loss": 0.0031, "step": 8175 }, { "epoch": 17.634408602150536, "grad_norm": 0.12086351215839386, "learning_rate": 4.275836431226766e-06, "loss": 0.0024, "step": 8200 }, { "epoch": 17.688172043010752, "grad_norm": 0.41448554396629333, "learning_rate": 4.2572490706319704e-06, "loss": 0.0035, "step": 8225 }, { "epoch": 17.741935483870968, "grad_norm": 0.06691323965787888, "learning_rate": 4.238661710037175e-06, "loss": 0.0035, "step": 8250 }, { "epoch": 17.795698924731184, "grad_norm": 0.7434226870536804, "learning_rate": 4.2200743494423795e-06, "loss": 0.0053, "step": 8275 }, { "epoch": 17.849462365591396, "grad_norm": 0.36501583456993103, "learning_rate": 4.201486988847584e-06, "loss": 0.004, "step": 8300 }, { "epoch": 17.903225806451612, "grad_norm": 0.2055322229862213, "learning_rate": 4.182899628252789e-06, "loss": 0.0043, "step": 8325 }, { "epoch": 17.956989247311828, "grad_norm": 0.7392027378082275, "learning_rate": 4.164312267657993e-06, "loss": 0.004, "step": 8350 }, { "epoch": 18.010752688172044, "grad_norm": 0.07294179499149323, "learning_rate": 4.145724907063197e-06, "loss": 0.0055, "step": 8375 }, { "epoch": 18.06451612903226, "grad_norm": 0.0656030923128128, "learning_rate": 4.127137546468402e-06, "loss": 0.0024, "step": 8400 }, { "epoch": 18.118279569892472, "grad_norm": 0.05267421901226044, "learning_rate": 4.108550185873607e-06, "loss": 0.0033, "step": 8425 }, { "epoch": 18.172043010752688, "grad_norm": 0.704990565776825, "learning_rate": 4.089962825278811e-06, "loss": 0.0047, "step": 8450 }, { "epoch": 18.225806451612904, "grad_norm": 0.24530240893363953, "learning_rate": 4.071375464684015e-06, "loss": 0.0043, "step": 8475 }, { "epoch": 18.27956989247312, "grad_norm": 0.19997531175613403, "learning_rate": 4.05278810408922e-06, "loss": 0.0035, "step": 8500 }, { "epoch": 18.27956989247312, "eval_loss": 0.3821885585784912, "eval_runtime": 203.3448, "eval_samples_per_second": 4.652, "eval_steps_per_second": 0.585, "eval_wer": 14.412498844411575, "step": 8500 }, { "epoch": 18.333333333333332, "grad_norm": 0.0332474559545517, "learning_rate": 4.034200743494424e-06, "loss": 0.0031, "step": 8525 }, { "epoch": 18.387096774193548, "grad_norm": 1.3561875820159912, "learning_rate": 4.015613382899628e-06, "loss": 0.0017, "step": 8550 }, { "epoch": 18.440860215053764, "grad_norm": 0.029482562094926834, "learning_rate": 3.997026022304833e-06, "loss": 0.0021, "step": 8575 }, { "epoch": 18.49462365591398, "grad_norm": 0.12231668084859848, "learning_rate": 3.9784386617100375e-06, "loss": 0.0043, "step": 8600 }, { "epoch": 18.548387096774192, "grad_norm": 0.044476673007011414, "learning_rate": 3.959851301115242e-06, "loss": 0.0024, "step": 8625 }, { "epoch": 18.602150537634408, "grad_norm": 0.6735191345214844, "learning_rate": 3.941263940520447e-06, "loss": 0.0032, "step": 8650 }, { "epoch": 18.655913978494624, "grad_norm": 1.0479316711425781, "learning_rate": 3.922676579925651e-06, "loss": 0.0024, "step": 8675 }, { "epoch": 18.70967741935484, "grad_norm": 0.023525085300207138, "learning_rate": 3.904089219330856e-06, "loss": 0.0049, "step": 8700 }, { "epoch": 18.763440860215052, "grad_norm": 0.165565624833107, "learning_rate": 3.88550185873606e-06, "loss": 0.0039, "step": 8725 }, { "epoch": 18.817204301075268, "grad_norm": 0.5960690379142761, "learning_rate": 3.866914498141264e-06, "loss": 0.003, "step": 8750 }, { "epoch": 18.870967741935484, "grad_norm": 0.23799718916416168, "learning_rate": 3.848327137546469e-06, "loss": 0.002, "step": 8775 }, { "epoch": 18.9247311827957, "grad_norm": 0.01600775308907032, "learning_rate": 3.829739776951673e-06, "loss": 0.0022, "step": 8800 }, { "epoch": 18.978494623655912, "grad_norm": 0.3210331201553345, "learning_rate": 3.8111524163568776e-06, "loss": 0.0033, "step": 8825 }, { "epoch": 19.032258064516128, "grad_norm": 0.05005327984690666, "learning_rate": 3.7925650557620818e-06, "loss": 0.0033, "step": 8850 }, { "epoch": 19.086021505376344, "grad_norm": 0.4820277690887451, "learning_rate": 3.7739776951672863e-06, "loss": 0.0034, "step": 8875 }, { "epoch": 19.13978494623656, "grad_norm": 0.1907467395067215, "learning_rate": 3.7553903345724913e-06, "loss": 0.0025, "step": 8900 }, { "epoch": 19.193548387096776, "grad_norm": 0.023403950035572052, "learning_rate": 3.7368029739776954e-06, "loss": 0.0024, "step": 8925 }, { "epoch": 19.247311827956988, "grad_norm": 0.02337467670440674, "learning_rate": 3.7182156133829e-06, "loss": 0.0038, "step": 8950 }, { "epoch": 19.301075268817204, "grad_norm": 0.42413467168807983, "learning_rate": 3.6996282527881046e-06, "loss": 0.0048, "step": 8975 }, { "epoch": 19.35483870967742, "grad_norm": 0.0469290092587471, "learning_rate": 3.6810408921933087e-06, "loss": 0.0034, "step": 9000 }, { "epoch": 19.35483870967742, "eval_loss": 0.38839593529701233, "eval_runtime": 202.9911, "eval_samples_per_second": 4.66, "eval_steps_per_second": 0.586, "eval_wer": 14.62512711472682, "step": 9000 }, { "epoch": 19.408602150537636, "grad_norm": 0.2083800882101059, "learning_rate": 3.6624535315985132e-06, "loss": 0.0029, "step": 9025 }, { "epoch": 19.462365591397848, "grad_norm": 0.16142559051513672, "learning_rate": 3.643866171003718e-06, "loss": 0.0028, "step": 9050 }, { "epoch": 19.516129032258064, "grad_norm": 0.02445228025317192, "learning_rate": 3.625278810408922e-06, "loss": 0.0031, "step": 9075 }, { "epoch": 19.56989247311828, "grad_norm": 0.10709693282842636, "learning_rate": 3.6066914498141265e-06, "loss": 0.0028, "step": 9100 }, { "epoch": 19.623655913978496, "grad_norm": 0.48716649413108826, "learning_rate": 3.5881040892193315e-06, "loss": 0.0024, "step": 9125 }, { "epoch": 19.677419354838708, "grad_norm": 0.043807078152894974, "learning_rate": 3.5695167286245356e-06, "loss": 0.0026, "step": 9150 }, { "epoch": 19.731182795698924, "grad_norm": 0.008909267373383045, "learning_rate": 3.55092936802974e-06, "loss": 0.0027, "step": 9175 }, { "epoch": 19.78494623655914, "grad_norm": 0.0496838316321373, "learning_rate": 3.5323420074349447e-06, "loss": 0.0028, "step": 9200 }, { "epoch": 19.838709677419356, "grad_norm": 0.3045809864997864, "learning_rate": 3.513754646840149e-06, "loss": 0.0043, "step": 9225 }, { "epoch": 19.892473118279568, "grad_norm": 0.019404035061597824, "learning_rate": 3.4951672862453534e-06, "loss": 0.0022, "step": 9250 }, { "epoch": 19.946236559139784, "grad_norm": 0.05067993700504303, "learning_rate": 3.476579925650558e-06, "loss": 0.0027, "step": 9275 }, { "epoch": 20.0, "grad_norm": 0.17729219794273376, "learning_rate": 3.457992565055762e-06, "loss": 0.0021, "step": 9300 }, { "epoch": 20.053763440860216, "grad_norm": 0.166994109749794, "learning_rate": 3.4394052044609666e-06, "loss": 0.0025, "step": 9325 }, { "epoch": 20.107526881720432, "grad_norm": 0.026689428836107254, "learning_rate": 3.4208178438661716e-06, "loss": 0.0035, "step": 9350 }, { "epoch": 20.161290322580644, "grad_norm": 0.016895387321710587, "learning_rate": 3.4022304832713757e-06, "loss": 0.0026, "step": 9375 }, { "epoch": 20.21505376344086, "grad_norm": 0.06793255358934402, "learning_rate": 3.3836431226765803e-06, "loss": 0.0015, "step": 9400 }, { "epoch": 20.268817204301076, "grad_norm": 0.034562163054943085, "learning_rate": 3.365055762081785e-06, "loss": 0.0027, "step": 9425 }, { "epoch": 20.322580645161292, "grad_norm": 0.16164565086364746, "learning_rate": 3.346468401486989e-06, "loss": 0.0023, "step": 9450 }, { "epoch": 20.376344086021504, "grad_norm": 0.015665782615542412, "learning_rate": 3.3278810408921935e-06, "loss": 0.002, "step": 9475 }, { "epoch": 20.43010752688172, "grad_norm": 0.009676897898316383, "learning_rate": 3.309293680297398e-06, "loss": 0.0027, "step": 9500 }, { "epoch": 20.43010752688172, "eval_loss": 0.39532387256622314, "eval_runtime": 202.6591, "eval_samples_per_second": 4.668, "eval_steps_per_second": 0.587, "eval_wer": 14.449477674031616, "step": 9500 }, { "epoch": 20.483870967741936, "grad_norm": 0.16924279928207397, "learning_rate": 3.2907063197026022e-06, "loss": 0.0022, "step": 9525 }, { "epoch": 20.537634408602152, "grad_norm": 0.3499106168746948, "learning_rate": 3.272118959107807e-06, "loss": 0.0017, "step": 9550 }, { "epoch": 20.591397849462364, "grad_norm": 0.5156524181365967, "learning_rate": 3.2535315985130113e-06, "loss": 0.0027, "step": 9575 }, { "epoch": 20.64516129032258, "grad_norm": 0.12964314222335815, "learning_rate": 3.234944237918216e-06, "loss": 0.0029, "step": 9600 }, { "epoch": 20.698924731182796, "grad_norm": 0.4109344482421875, "learning_rate": 3.2163568773234205e-06, "loss": 0.002, "step": 9625 }, { "epoch": 20.752688172043012, "grad_norm": 0.17528752982616425, "learning_rate": 3.197769516728625e-06, "loss": 0.0025, "step": 9650 }, { "epoch": 20.806451612903224, "grad_norm": 0.27459415793418884, "learning_rate": 3.179182156133829e-06, "loss": 0.002, "step": 9675 }, { "epoch": 20.86021505376344, "grad_norm": 0.7621147036552429, "learning_rate": 3.1605947955390337e-06, "loss": 0.0019, "step": 9700 }, { "epoch": 20.913978494623656, "grad_norm": 0.03115130215883255, "learning_rate": 3.1420074349442383e-06, "loss": 0.0021, "step": 9725 }, { "epoch": 20.967741935483872, "grad_norm": 0.3843834698200226, "learning_rate": 3.1234200743494424e-06, "loss": 0.0037, "step": 9750 }, { "epoch": 21.021505376344088, "grad_norm": 0.11314116418361664, "learning_rate": 3.104832713754647e-06, "loss": 0.0019, "step": 9775 }, { "epoch": 21.0752688172043, "grad_norm": 0.008430559188127518, "learning_rate": 3.0862453531598515e-06, "loss": 0.0016, "step": 9800 }, { "epoch": 21.129032258064516, "grad_norm": 0.014893501996994019, "learning_rate": 3.067657992565056e-06, "loss": 0.0012, "step": 9825 }, { "epoch": 21.182795698924732, "grad_norm": 0.020304594188928604, "learning_rate": 3.0490706319702606e-06, "loss": 0.0014, "step": 9850 }, { "epoch": 21.236559139784948, "grad_norm": 0.12655578553676605, "learning_rate": 3.030483271375465e-06, "loss": 0.0026, "step": 9875 }, { "epoch": 21.29032258064516, "grad_norm": 0.014000285416841507, "learning_rate": 3.0118959107806693e-06, "loss": 0.0026, "step": 9900 }, { "epoch": 21.344086021505376, "grad_norm": 0.026862381026148796, "learning_rate": 2.993308550185874e-06, "loss": 0.0024, "step": 9925 }, { "epoch": 21.397849462365592, "grad_norm": 0.03989304229617119, "learning_rate": 2.9747211895910784e-06, "loss": 0.0018, "step": 9950 }, { "epoch": 21.451612903225808, "grad_norm": 0.019757866859436035, "learning_rate": 2.9561338289962825e-06, "loss": 0.0031, "step": 9975 }, { "epoch": 21.50537634408602, "grad_norm": 0.02383114956319332, "learning_rate": 2.937546468401487e-06, "loss": 0.0022, "step": 10000 }, { "epoch": 21.50537634408602, "eval_loss": 0.40046602487564087, "eval_runtime": 202.0733, "eval_samples_per_second": 4.681, "eval_steps_per_second": 0.589, "eval_wer": 14.449477674031616, "step": 10000 }, { "epoch": 21.559139784946236, "grad_norm": 0.019417457282543182, "learning_rate": 2.9189591078066916e-06, "loss": 0.0027, "step": 10025 }, { "epoch": 21.612903225806452, "grad_norm": 0.15449251234531403, "learning_rate": 2.900371747211896e-06, "loss": 0.0017, "step": 10050 }, { "epoch": 21.666666666666668, "grad_norm": 0.16010086238384247, "learning_rate": 2.8817843866171008e-06, "loss": 0.0018, "step": 10075 }, { "epoch": 21.72043010752688, "grad_norm": 0.02312368154525757, "learning_rate": 2.8631970260223053e-06, "loss": 0.0036, "step": 10100 }, { "epoch": 21.774193548387096, "grad_norm": 0.040190454572439194, "learning_rate": 2.8446096654275094e-06, "loss": 0.0024, "step": 10125 }, { "epoch": 21.827956989247312, "grad_norm": 0.030338788405060768, "learning_rate": 2.826022304832714e-06, "loss": 0.0024, "step": 10150 }, { "epoch": 21.881720430107528, "grad_norm": 0.18002262711524963, "learning_rate": 2.8074349442379186e-06, "loss": 0.0029, "step": 10175 }, { "epoch": 21.93548387096774, "grad_norm": 0.047431185841560364, "learning_rate": 2.7888475836431227e-06, "loss": 0.0013, "step": 10200 }, { "epoch": 21.989247311827956, "grad_norm": 0.12101946771144867, "learning_rate": 2.7702602230483272e-06, "loss": 0.003, "step": 10225 }, { "epoch": 22.043010752688172, "grad_norm": 0.01733570732176304, "learning_rate": 2.751672862453532e-06, "loss": 0.0013, "step": 10250 }, { "epoch": 22.096774193548388, "grad_norm": 0.02413998357951641, "learning_rate": 2.7330855018587364e-06, "loss": 0.0024, "step": 10275 }, { "epoch": 22.150537634408604, "grad_norm": 0.006610923912376165, "learning_rate": 2.714498141263941e-06, "loss": 0.0015, "step": 10300 }, { "epoch": 22.204301075268816, "grad_norm": 0.11478333920240402, "learning_rate": 2.6959107806691455e-06, "loss": 0.0014, "step": 10325 }, { "epoch": 22.258064516129032, "grad_norm": 0.8776764869689941, "learning_rate": 2.6773234200743496e-06, "loss": 0.0019, "step": 10350 }, { "epoch": 22.311827956989248, "grad_norm": 0.020020902156829834, "learning_rate": 2.658736059479554e-06, "loss": 0.0022, "step": 10375 }, { "epoch": 22.365591397849464, "grad_norm": 0.019508883357048035, "learning_rate": 2.6401486988847587e-06, "loss": 0.0017, "step": 10400 }, { "epoch": 22.419354838709676, "grad_norm": 0.02609153278172016, "learning_rate": 2.621561338289963e-06, "loss": 0.0013, "step": 10425 }, { "epoch": 22.473118279569892, "grad_norm": 0.027138570323586464, "learning_rate": 2.6029739776951674e-06, "loss": 0.0019, "step": 10450 }, { "epoch": 22.526881720430108, "grad_norm": 0.01063444558531046, "learning_rate": 2.584386617100372e-06, "loss": 0.0026, "step": 10475 }, { "epoch": 22.580645161290324, "grad_norm": 0.24903129041194916, "learning_rate": 2.565799256505576e-06, "loss": 0.0027, "step": 10500 }, { "epoch": 22.580645161290324, "eval_loss": 0.40335774421691895, "eval_runtime": 202.0679, "eval_samples_per_second": 4.682, "eval_steps_per_second": 0.589, "eval_wer": 13.93177405935102, "step": 10500 }, { "epoch": 22.634408602150536, "grad_norm": 0.1529041826725006, "learning_rate": 2.547211895910781e-06, "loss": 0.0018, "step": 10525 }, { "epoch": 22.688172043010752, "grad_norm": 0.02129989117383957, "learning_rate": 2.5286245353159856e-06, "loss": 0.0013, "step": 10550 }, { "epoch": 22.741935483870968, "grad_norm": 0.013442150317132473, "learning_rate": 2.5100371747211898e-06, "loss": 0.0028, "step": 10575 }, { "epoch": 22.795698924731184, "grad_norm": 0.024951398372650146, "learning_rate": 2.4914498141263943e-06, "loss": 0.0022, "step": 10600 }, { "epoch": 22.849462365591396, "grad_norm": 0.3933217525482178, "learning_rate": 2.472862453531599e-06, "loss": 0.0033, "step": 10625 }, { "epoch": 22.903225806451612, "grad_norm": 0.030309738591313362, "learning_rate": 2.454275092936803e-06, "loss": 0.0012, "step": 10650 }, { "epoch": 22.956989247311828, "grad_norm": 0.1965196579694748, "learning_rate": 2.4356877323420076e-06, "loss": 0.0019, "step": 10675 }, { "epoch": 23.010752688172044, "grad_norm": 0.2897844612598419, "learning_rate": 2.417100371747212e-06, "loss": 0.0017, "step": 10700 }, { "epoch": 23.06451612903226, "grad_norm": 0.010055635124444962, "learning_rate": 2.3985130111524167e-06, "loss": 0.0017, "step": 10725 }, { "epoch": 23.118279569892472, "grad_norm": 0.02669104002416134, "learning_rate": 2.379925650557621e-06, "loss": 0.0005, "step": 10750 }, { "epoch": 23.172043010752688, "grad_norm": 0.2305319756269455, "learning_rate": 2.3613382899628253e-06, "loss": 0.0015, "step": 10775 }, { "epoch": 23.225806451612904, "grad_norm": 0.009956962428987026, "learning_rate": 2.34275092936803e-06, "loss": 0.0024, "step": 10800 }, { "epoch": 23.27956989247312, "grad_norm": 0.1403415948152542, "learning_rate": 2.3241635687732345e-06, "loss": 0.0014, "step": 10825 }, { "epoch": 23.333333333333332, "grad_norm": 0.21458983421325684, "learning_rate": 2.305576208178439e-06, "loss": 0.0017, "step": 10850 }, { "epoch": 23.387096774193548, "grad_norm": 0.008475505746901035, "learning_rate": 2.286988847583643e-06, "loss": 0.001, "step": 10875 }, { "epoch": 23.440860215053764, "grad_norm": 0.02105923928320408, "learning_rate": 2.2684014869888477e-06, "loss": 0.0013, "step": 10900 }, { "epoch": 23.49462365591398, "grad_norm": 0.007669220678508282, "learning_rate": 2.2498141263940523e-06, "loss": 0.0016, "step": 10925 }, { "epoch": 23.548387096774192, "grad_norm": 0.007480244617909193, "learning_rate": 2.231226765799257e-06, "loss": 0.0013, "step": 10950 }, { "epoch": 23.602150537634408, "grad_norm": 0.00940194632858038, "learning_rate": 2.212639405204461e-06, "loss": 0.0031, "step": 10975 }, { "epoch": 23.655913978494624, "grad_norm": 0.013057105243206024, "learning_rate": 2.1940520446096655e-06, "loss": 0.0012, "step": 11000 }, { "epoch": 23.655913978494624, "eval_loss": 0.40596359968185425, "eval_runtime": 202.5237, "eval_samples_per_second": 4.671, "eval_steps_per_second": 0.588, "eval_wer": 13.941018766756033, "step": 11000 }, { "epoch": 23.70967741935484, "grad_norm": 0.0065338280983269215, "learning_rate": 2.17546468401487e-06, "loss": 0.0015, "step": 11025 }, { "epoch": 23.763440860215052, "grad_norm": 0.013204723596572876, "learning_rate": 2.1568773234200746e-06, "loss": 0.003, "step": 11050 }, { "epoch": 23.817204301075268, "grad_norm": 0.027307022362947464, "learning_rate": 2.138289962825279e-06, "loss": 0.0027, "step": 11075 }, { "epoch": 23.870967741935484, "grad_norm": 0.014446156099438667, "learning_rate": 2.1197026022304833e-06, "loss": 0.0014, "step": 11100 }, { "epoch": 23.9247311827957, "grad_norm": 0.014391875825822353, "learning_rate": 2.101115241635688e-06, "loss": 0.0031, "step": 11125 }, { "epoch": 23.978494623655912, "grad_norm": 0.0412728525698185, "learning_rate": 2.0825278810408924e-06, "loss": 0.0022, "step": 11150 }, { "epoch": 24.032258064516128, "grad_norm": 0.009646103717386723, "learning_rate": 2.063940520446097e-06, "loss": 0.0015, "step": 11175 }, { "epoch": 24.086021505376344, "grad_norm": 0.007044603582471609, "learning_rate": 2.045353159851301e-06, "loss": 0.0012, "step": 11200 }, { "epoch": 24.13978494623656, "grad_norm": 0.013142594136297703, "learning_rate": 2.0267657992565057e-06, "loss": 0.0013, "step": 11225 }, { "epoch": 24.193548387096776, "grad_norm": 0.006704692263156176, "learning_rate": 2.00817843866171e-06, "loss": 0.0015, "step": 11250 }, { "epoch": 24.247311827956988, "grad_norm": 0.005626600701361895, "learning_rate": 1.9895910780669148e-06, "loss": 0.0012, "step": 11275 }, { "epoch": 24.301075268817204, "grad_norm": 0.008840459398925304, "learning_rate": 1.9710037174721193e-06, "loss": 0.0019, "step": 11300 }, { "epoch": 24.35483870967742, "grad_norm": 0.005357383284717798, "learning_rate": 1.9524163568773235e-06, "loss": 0.0011, "step": 11325 }, { "epoch": 24.408602150537636, "grad_norm": 0.005820517428219318, "learning_rate": 1.933828996282528e-06, "loss": 0.0014, "step": 11350 }, { "epoch": 24.462365591397848, "grad_norm": 0.12261584401130676, "learning_rate": 1.9152416356877326e-06, "loss": 0.0009, "step": 11375 }, { "epoch": 24.516129032258064, "grad_norm": 0.16665996611118317, "learning_rate": 1.8966542750929371e-06, "loss": 0.0032, "step": 11400 }, { "epoch": 24.56989247311828, "grad_norm": 0.006091310176998377, "learning_rate": 1.8780669144981415e-06, "loss": 0.0016, "step": 11425 }, { "epoch": 24.623655913978496, "grad_norm": 0.027028294280171394, "learning_rate": 1.8594795539033458e-06, "loss": 0.0013, "step": 11450 }, { "epoch": 24.677419354838708, "grad_norm": 0.107554592192173, "learning_rate": 1.8408921933085502e-06, "loss": 0.0014, "step": 11475 }, { "epoch": 24.731182795698924, "grad_norm": 0.006071150302886963, "learning_rate": 1.822304832713755e-06, "loss": 0.0008, "step": 11500 }, { "epoch": 24.731182795698924, "eval_loss": 0.4129054546356201, "eval_runtime": 202.4824, "eval_samples_per_second": 4.672, "eval_steps_per_second": 0.588, "eval_wer": 13.848571692705928, "step": 11500 }, { "epoch": 24.78494623655914, "grad_norm": 0.005664244759827852, "learning_rate": 1.8037174721189593e-06, "loss": 0.0019, "step": 11525 }, { "epoch": 24.838709677419356, "grad_norm": 0.007854313589632511, "learning_rate": 1.7851301115241638e-06, "loss": 0.0019, "step": 11550 }, { "epoch": 24.892473118279568, "grad_norm": 0.1173175498843193, "learning_rate": 1.7665427509293682e-06, "loss": 0.002, "step": 11575 }, { "epoch": 24.946236559139784, "grad_norm": 0.005052879452705383, "learning_rate": 1.7479553903345725e-06, "loss": 0.0021, "step": 11600 }, { "epoch": 25.0, "grad_norm": 0.011530703864991665, "learning_rate": 1.7293680297397773e-06, "loss": 0.0017, "step": 11625 }, { "epoch": 25.053763440860216, "grad_norm": 0.07344318926334381, "learning_rate": 1.7107806691449816e-06, "loss": 0.002, "step": 11650 }, { "epoch": 25.107526881720432, "grad_norm": 0.12406457215547562, "learning_rate": 1.692193308550186e-06, "loss": 0.0017, "step": 11675 }, { "epoch": 25.161290322580644, "grad_norm": 0.005189701449126005, "learning_rate": 1.6736059479553903e-06, "loss": 0.0016, "step": 11700 }, { "epoch": 25.21505376344086, "grad_norm": 0.3264636993408203, "learning_rate": 1.655018587360595e-06, "loss": 0.0021, "step": 11725 }, { "epoch": 25.268817204301076, "grad_norm": 0.004206045996397734, "learning_rate": 1.6364312267657994e-06, "loss": 0.0014, "step": 11750 }, { "epoch": 25.322580645161292, "grad_norm": 0.12464595586061478, "learning_rate": 1.6178438661710038e-06, "loss": 0.0019, "step": 11775 }, { "epoch": 25.376344086021504, "grad_norm": 0.0052951849065721035, "learning_rate": 1.5992565055762083e-06, "loss": 0.001, "step": 11800 }, { "epoch": 25.43010752688172, "grad_norm": 0.20631186664104462, "learning_rate": 1.5806691449814127e-06, "loss": 0.0013, "step": 11825 }, { "epoch": 25.483870967741936, "grad_norm": 0.005665977951139212, "learning_rate": 1.5620817843866174e-06, "loss": 0.0023, "step": 11850 }, { "epoch": 25.537634408602152, "grad_norm": 0.18204852938652039, "learning_rate": 1.5434944237918218e-06, "loss": 0.0017, "step": 11875 }, { "epoch": 25.591397849462364, "grad_norm": 0.004289372358471155, "learning_rate": 1.5249070631970261e-06, "loss": 0.0005, "step": 11900 }, { "epoch": 25.64516129032258, "grad_norm": 0.06818500906229019, "learning_rate": 1.5063197026022305e-06, "loss": 0.0005, "step": 11925 }, { "epoch": 25.698924731182796, "grad_norm": 0.14878062903881073, "learning_rate": 1.487732342007435e-06, "loss": 0.0016, "step": 11950 }, { "epoch": 25.752688172043012, "grad_norm": 0.003890681779012084, "learning_rate": 1.4691449814126396e-06, "loss": 0.0016, "step": 11975 }, { "epoch": 25.806451612903224, "grad_norm": 0.004652164876461029, "learning_rate": 1.450557620817844e-06, "loss": 0.001, "step": 12000 }, { "epoch": 25.806451612903224, "eval_loss": 0.4189203381538391, "eval_runtime": 202.0119, "eval_samples_per_second": 4.683, "eval_steps_per_second": 0.589, "eval_wer": 13.830082277895904, "step": 12000 }, { "epoch": 25.86021505376344, "grad_norm": 0.16533516347408295, "learning_rate": 1.4319702602230485e-06, "loss": 0.0011, "step": 12025 }, { "epoch": 25.913978494623656, "grad_norm": 0.0048462748527526855, "learning_rate": 1.4133828996282528e-06, "loss": 0.001, "step": 12050 }, { "epoch": 25.967741935483872, "grad_norm": 0.0046990737318992615, "learning_rate": 1.3947955390334576e-06, "loss": 0.0017, "step": 12075 }, { "epoch": 26.021505376344088, "grad_norm": 0.003037052694708109, "learning_rate": 1.376208178438662e-06, "loss": 0.0024, "step": 12100 }, { "epoch": 26.0752688172043, "grad_norm": 0.08328765630722046, "learning_rate": 1.3576208178438663e-06, "loss": 0.0011, "step": 12125 }, { "epoch": 26.129032258064516, "grad_norm": 0.002522684633731842, "learning_rate": 1.3390334572490706e-06, "loss": 0.0022, "step": 12150 }, { "epoch": 26.182795698924732, "grad_norm": 0.00458819093182683, "learning_rate": 1.3204460966542752e-06, "loss": 0.0018, "step": 12175 }, { "epoch": 26.236559139784948, "grad_norm": 0.004143861588090658, "learning_rate": 1.3018587360594797e-06, "loss": 0.0008, "step": 12200 }, { "epoch": 26.29032258064516, "grad_norm": 0.006522686220705509, "learning_rate": 1.283271375464684e-06, "loss": 0.0014, "step": 12225 }, { "epoch": 26.344086021505376, "grad_norm": 0.0033553235698491335, "learning_rate": 1.2646840148698886e-06, "loss": 0.0018, "step": 12250 }, { "epoch": 26.397849462365592, "grad_norm": 0.004214679356664419, "learning_rate": 1.2460966542750932e-06, "loss": 0.0014, "step": 12275 }, { "epoch": 26.451612903225808, "grad_norm": 0.23780201375484467, "learning_rate": 1.2275092936802975e-06, "loss": 0.0021, "step": 12300 }, { "epoch": 26.50537634408602, "grad_norm": 0.003071879968047142, "learning_rate": 1.2089219330855019e-06, "loss": 0.0021, "step": 12325 }, { "epoch": 26.559139784946236, "grad_norm": 0.003364423755556345, "learning_rate": 1.1903345724907064e-06, "loss": 0.0015, "step": 12350 }, { "epoch": 26.612903225806452, "grad_norm": 0.30511873960494995, "learning_rate": 1.1717472118959108e-06, "loss": 0.0018, "step": 12375 }, { "epoch": 26.666666666666668, "grad_norm": 0.003765388624742627, "learning_rate": 1.1531598513011153e-06, "loss": 0.0026, "step": 12400 }, { "epoch": 26.72043010752688, "grad_norm": 0.13415110111236572, "learning_rate": 1.1345724907063199e-06, "loss": 0.0012, "step": 12425 }, { "epoch": 26.774193548387096, "grad_norm": 0.0052949776872992516, "learning_rate": 1.1159851301115242e-06, "loss": 0.0006, "step": 12450 }, { "epoch": 26.827956989247312, "grad_norm": 0.0027304012328386307, "learning_rate": 1.0973977695167288e-06, "loss": 0.001, "step": 12475 }, { "epoch": 26.881720430107528, "grad_norm": 0.004548298195004463, "learning_rate": 1.0788104089219331e-06, "loss": 0.0008, "step": 12500 }, { "epoch": 26.881720430107528, "eval_loss": 0.4191061854362488, "eval_runtime": 207.3533, "eval_samples_per_second": 4.562, "eval_steps_per_second": 0.574, "eval_wer": 13.959508181566052, "step": 12500 }, { "epoch": 26.93548387096774, "grad_norm": 0.0037072377745062113, "learning_rate": 1.0602230483271377e-06, "loss": 0.002, "step": 12525 }, { "epoch": 26.989247311827956, "grad_norm": 0.004865365568548441, "learning_rate": 1.041635687732342e-06, "loss": 0.0012, "step": 12550 }, { "epoch": 27.043010752688172, "grad_norm": 0.16591113805770874, "learning_rate": 1.0230483271375466e-06, "loss": 0.0008, "step": 12575 }, { "epoch": 27.096774193548388, "grad_norm": 0.003480426501482725, "learning_rate": 1.004460966542751e-06, "loss": 0.002, "step": 12600 }, { "epoch": 27.150537634408604, "grad_norm": 0.003888448467478156, "learning_rate": 9.858736059479555e-07, "loss": 0.001, "step": 12625 }, { "epoch": 27.204301075268816, "grad_norm": 0.004046307876706123, "learning_rate": 9.6728624535316e-07, "loss": 0.0022, "step": 12650 }, { "epoch": 27.258064516129032, "grad_norm": 0.004325231071561575, "learning_rate": 9.486988847583644e-07, "loss": 0.0024, "step": 12675 }, { "epoch": 27.311827956989248, "grad_norm": 0.1196964755654335, "learning_rate": 9.301115241635688e-07, "loss": 0.001, "step": 12700 }, { "epoch": 27.365591397849464, "grad_norm": 0.003892822889611125, "learning_rate": 9.115241635687733e-07, "loss": 0.002, "step": 12725 }, { "epoch": 27.419354838709676, "grad_norm": 0.0024752148892730474, "learning_rate": 8.929368029739778e-07, "loss": 0.0007, "step": 12750 }, { "epoch": 27.473118279569892, "grad_norm": 0.00464650196954608, "learning_rate": 8.743494423791822e-07, "loss": 0.0019, "step": 12775 }, { "epoch": 27.526881720430108, "grad_norm": 0.2570537328720093, "learning_rate": 8.557620817843867e-07, "loss": 0.0022, "step": 12800 }, { "epoch": 27.580645161290324, "grad_norm": 0.003213089657947421, "learning_rate": 8.371747211895912e-07, "loss": 0.0008, "step": 12825 }, { "epoch": 27.634408602150536, "grad_norm": 0.0038951928727328777, "learning_rate": 8.185873605947955e-07, "loss": 0.0013, "step": 12850 }, { "epoch": 27.688172043010752, "grad_norm": 0.0030759673099964857, "learning_rate": 8.000000000000001e-07, "loss": 0.0009, "step": 12875 }, { "epoch": 27.741935483870968, "grad_norm": 0.0037837938871234655, "learning_rate": 7.814126394052045e-07, "loss": 0.0022, "step": 12900 }, { "epoch": 27.795698924731184, "grad_norm": 0.0026918076910078526, "learning_rate": 7.62825278810409e-07, "loss": 0.0016, "step": 12925 }, { "epoch": 27.849462365591396, "grad_norm": 0.0030537450220435858, "learning_rate": 7.442379182156134e-07, "loss": 0.0008, "step": 12950 }, { "epoch": 27.903225806451612, "grad_norm": 0.11770904064178467, "learning_rate": 7.25650557620818e-07, "loss": 0.0014, "step": 12975 }, { "epoch": 27.956989247311828, "grad_norm": 0.0030784786213189363, "learning_rate": 7.070631970260223e-07, "loss": 0.0018, "step": 13000 }, { "epoch": 27.956989247311828, "eval_loss": 0.421833336353302, "eval_runtime": 202.7221, "eval_samples_per_second": 4.666, "eval_steps_per_second": 0.587, "eval_wer": 13.793103448275861, "step": 13000 } ], "logging_steps": 25, "max_steps": 13950, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.4130840981661286e+21, "train_batch_size": 8, "trial_name": null, "trial_params": null }