| { | |
| "best_global_step": 13000, | |
| "best_metric": 13.793103448275861, | |
| "best_model_checkpoint": "./whisper-large-v3-atc-mrezzat/checkpoint-13000", | |
| "epoch": 27.956989247311828, | |
| "eval_steps": 500, | |
| "global_step": 13000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.053763440860215055, | |
| "grad_norm": 7.093221187591553, | |
| "learning_rate": 4.800000000000001e-07, | |
| "loss": 1.2803, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.10752688172043011, | |
| "grad_norm": 3.561824321746826, | |
| "learning_rate": 9.800000000000001e-07, | |
| "loss": 1.024, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.16129032258064516, | |
| "grad_norm": 3.972370147705078, | |
| "learning_rate": 1.48e-06, | |
| "loss": 0.8323, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.21505376344086022, | |
| "grad_norm": 5.043467044830322, | |
| "learning_rate": 1.98e-06, | |
| "loss": 0.7706, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.26881720430107525, | |
| "grad_norm": 3.704352855682373, | |
| "learning_rate": 2.4800000000000004e-06, | |
| "loss": 0.6839, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.3225806451612903, | |
| "grad_norm": 2.454521894454956, | |
| "learning_rate": 2.9800000000000003e-06, | |
| "loss": 0.6756, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3763440860215054, | |
| "grad_norm": 4.109330654144287, | |
| "learning_rate": 3.48e-06, | |
| "loss": 0.6282, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.43010752688172044, | |
| "grad_norm": 3.429811477661133, | |
| "learning_rate": 3.980000000000001e-06, | |
| "loss": 0.5735, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4838709677419355, | |
| "grad_norm": 3.641101121902466, | |
| "learning_rate": 4.48e-06, | |
| "loss": 0.535, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.5376344086021505, | |
| "grad_norm": 3.169020652770996, | |
| "learning_rate": 4.980000000000001e-06, | |
| "loss": 0.5174, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5913978494623656, | |
| "grad_norm": 2.9315528869628906, | |
| "learning_rate": 5.480000000000001e-06, | |
| "loss": 0.446, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.6451612903225806, | |
| "grad_norm": 3.473388671875, | |
| "learning_rate": 5.98e-06, | |
| "loss": 0.5147, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6989247311827957, | |
| "grad_norm": 3.428112506866455, | |
| "learning_rate": 6.480000000000001e-06, | |
| "loss": 0.466, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.7526881720430108, | |
| "grad_norm": 2.7643871307373047, | |
| "learning_rate": 6.98e-06, | |
| "loss": 0.4712, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.8064516129032258, | |
| "grad_norm": 2.77138352394104, | |
| "learning_rate": 7.48e-06, | |
| "loss": 0.4456, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.8602150537634409, | |
| "grad_norm": 3.0843310356140137, | |
| "learning_rate": 7.980000000000002e-06, | |
| "loss": 0.508, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.9139784946236559, | |
| "grad_norm": 2.7599642276763916, | |
| "learning_rate": 8.48e-06, | |
| "loss": 0.4453, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.967741935483871, | |
| "grad_norm": 3.672558546066284, | |
| "learning_rate": 8.98e-06, | |
| "loss": 0.4443, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.021505376344086, | |
| "grad_norm": 3.0827476978302, | |
| "learning_rate": 9.48e-06, | |
| "loss": 0.3988, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.075268817204301, | |
| "grad_norm": 2.4267773628234863, | |
| "learning_rate": 9.980000000000001e-06, | |
| "loss": 0.3737, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.075268817204301, | |
| "eval_loss": 0.27068084478378296, | |
| "eval_runtime": 204.8229, | |
| "eval_samples_per_second": 4.619, | |
| "eval_steps_per_second": 0.581, | |
| "eval_wer": 16.612739206804108, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.129032258064516, | |
| "grad_norm": 1.7745027542114258, | |
| "learning_rate": 9.982156133828997e-06, | |
| "loss": 0.3749, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.1827956989247312, | |
| "grad_norm": 2.5673892498016357, | |
| "learning_rate": 9.963568773234202e-06, | |
| "loss": 0.3547, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.2365591397849462, | |
| "grad_norm": 3.1428632736206055, | |
| "learning_rate": 9.944981412639407e-06, | |
| "loss": 0.3335, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.2903225806451613, | |
| "grad_norm": 3.2400989532470703, | |
| "learning_rate": 9.92639405204461e-06, | |
| "loss": 0.3382, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.3440860215053765, | |
| "grad_norm": 3.075026273727417, | |
| "learning_rate": 9.907806691449815e-06, | |
| "loss": 0.3641, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.3978494623655915, | |
| "grad_norm": 2.6721091270446777, | |
| "learning_rate": 9.88921933085502e-06, | |
| "loss": 0.358, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.4516129032258065, | |
| "grad_norm": 1.545538306236267, | |
| "learning_rate": 9.870631970260223e-06, | |
| "loss": 0.334, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.5053763440860215, | |
| "grad_norm": 2.7524378299713135, | |
| "learning_rate": 9.852044609665428e-06, | |
| "loss": 0.3401, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.5591397849462365, | |
| "grad_norm": 2.7014448642730713, | |
| "learning_rate": 9.833457249070633e-06, | |
| "loss": 0.3305, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.6129032258064515, | |
| "grad_norm": 2.5529074668884277, | |
| "learning_rate": 9.814869888475837e-06, | |
| "loss": 0.316, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 2.5125572681427, | |
| "learning_rate": 9.796282527881042e-06, | |
| "loss": 0.3324, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.7204301075268817, | |
| "grad_norm": 3.541673183441162, | |
| "learning_rate": 9.777695167286247e-06, | |
| "loss": 0.3025, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.7741935483870968, | |
| "grad_norm": 2.809391498565674, | |
| "learning_rate": 9.75910780669145e-06, | |
| "loss": 0.3263, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.827956989247312, | |
| "grad_norm": 3.627777576446533, | |
| "learning_rate": 9.740520446096655e-06, | |
| "loss": 0.3209, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.881720430107527, | |
| "grad_norm": 2.672043561935425, | |
| "learning_rate": 9.721933085501858e-06, | |
| "loss": 0.2879, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.935483870967742, | |
| "grad_norm": 2.783828020095825, | |
| "learning_rate": 9.703345724907063e-06, | |
| "loss": 0.2813, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.989247311827957, | |
| "grad_norm": 3.2919387817382812, | |
| "learning_rate": 9.684758364312268e-06, | |
| "loss": 0.3037, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 2.043010752688172, | |
| "grad_norm": 2.6845808029174805, | |
| "learning_rate": 9.666171003717473e-06, | |
| "loss": 0.2344, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.096774193548387, | |
| "grad_norm": 2.9660229682922363, | |
| "learning_rate": 9.647583643122678e-06, | |
| "loss": 0.1819, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 2.150537634408602, | |
| "grad_norm": 1.8242266178131104, | |
| "learning_rate": 9.628996282527881e-06, | |
| "loss": 0.2016, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.150537634408602, | |
| "eval_loss": 0.27269652485847473, | |
| "eval_runtime": 202.1375, | |
| "eval_samples_per_second": 4.68, | |
| "eval_steps_per_second": 0.589, | |
| "eval_wer": 14.246094111121382, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.204301075268817, | |
| "grad_norm": 2.224641799926758, | |
| "learning_rate": 9.610408921933086e-06, | |
| "loss": 0.2209, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 2.258064516129032, | |
| "grad_norm": 2.039360761642456, | |
| "learning_rate": 9.59182156133829e-06, | |
| "loss": 0.2072, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.3118279569892475, | |
| "grad_norm": 2.753037929534912, | |
| "learning_rate": 9.573234200743495e-06, | |
| "loss": 0.1707, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 2.3655913978494625, | |
| "grad_norm": 2.8433048725128174, | |
| "learning_rate": 9.5546468401487e-06, | |
| "loss": 0.2005, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.4193548387096775, | |
| "grad_norm": 2.2583348751068115, | |
| "learning_rate": 9.536059479553905e-06, | |
| "loss": 0.1792, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 2.4731182795698925, | |
| "grad_norm": 2.151129722595215, | |
| "learning_rate": 9.51747211895911e-06, | |
| "loss": 0.2076, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.5268817204301075, | |
| "grad_norm": 2.289693593978882, | |
| "learning_rate": 9.498884758364313e-06, | |
| "loss": 0.1901, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 2.5806451612903225, | |
| "grad_norm": 2.0624475479125977, | |
| "learning_rate": 9.480297397769518e-06, | |
| "loss": 0.1938, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.6344086021505375, | |
| "grad_norm": 2.455775737762451, | |
| "learning_rate": 9.461710037174721e-06, | |
| "loss": 0.2116, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 2.688172043010753, | |
| "grad_norm": 1.853768229484558, | |
| "learning_rate": 9.443122676579926e-06, | |
| "loss": 0.2008, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.741935483870968, | |
| "grad_norm": 2.4691860675811768, | |
| "learning_rate": 9.424535315985131e-06, | |
| "loss": 0.2167, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 2.795698924731183, | |
| "grad_norm": 1.8937417268753052, | |
| "learning_rate": 9.405947955390336e-06, | |
| "loss": 0.1864, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.849462365591398, | |
| "grad_norm": 2.786158323287964, | |
| "learning_rate": 9.38736059479554e-06, | |
| "loss": 0.1916, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 2.903225806451613, | |
| "grad_norm": 2.7571518421173096, | |
| "learning_rate": 9.368773234200744e-06, | |
| "loss": 0.1982, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.956989247311828, | |
| "grad_norm": 2.334691286087036, | |
| "learning_rate": 9.35018587360595e-06, | |
| "loss": 0.1864, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 3.010752688172043, | |
| "grad_norm": 1.4865392446517944, | |
| "learning_rate": 9.331598513011153e-06, | |
| "loss": 0.1593, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 3.064516129032258, | |
| "grad_norm": 1.797865629196167, | |
| "learning_rate": 9.313011152416358e-06, | |
| "loss": 0.1123, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 3.118279569892473, | |
| "grad_norm": 1.5422674417495728, | |
| "learning_rate": 9.294423791821563e-06, | |
| "loss": 0.1132, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 3.172043010752688, | |
| "grad_norm": 1.649880290031433, | |
| "learning_rate": 9.275836431226768e-06, | |
| "loss": 0.1075, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 3.225806451612903, | |
| "grad_norm": 2.0276639461517334, | |
| "learning_rate": 9.25724907063197e-06, | |
| "loss": 0.1164, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.225806451612903, | |
| "eval_loss": 0.28087103366851807, | |
| "eval_runtime": 202.3199, | |
| "eval_samples_per_second": 4.676, | |
| "eval_steps_per_second": 0.588, | |
| "eval_wer": 15.050383655357308, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.279569892473118, | |
| "grad_norm": 2.4856255054473877, | |
| "learning_rate": 9.238661710037176e-06, | |
| "loss": 0.1112, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 2.857877492904663, | |
| "learning_rate": 9.220074349442379e-06, | |
| "loss": 0.1131, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 3.3870967741935485, | |
| "grad_norm": 1.7496925592422485, | |
| "learning_rate": 9.201486988847584e-06, | |
| "loss": 0.1306, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 3.4408602150537635, | |
| "grad_norm": 1.9851291179656982, | |
| "learning_rate": 9.182899628252789e-06, | |
| "loss": 0.114, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.4946236559139785, | |
| "grad_norm": 2.6501877307891846, | |
| "learning_rate": 9.164312267657994e-06, | |
| "loss": 0.1208, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 3.5483870967741935, | |
| "grad_norm": 1.6141562461853027, | |
| "learning_rate": 9.145724907063197e-06, | |
| "loss": 0.1194, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 3.6021505376344085, | |
| "grad_norm": 2.246312141418457, | |
| "learning_rate": 9.127137546468402e-06, | |
| "loss": 0.121, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 3.6559139784946235, | |
| "grad_norm": 1.7408199310302734, | |
| "learning_rate": 9.108550185873607e-06, | |
| "loss": 0.1178, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.709677419354839, | |
| "grad_norm": 2.1992955207824707, | |
| "learning_rate": 9.08996282527881e-06, | |
| "loss": 0.0901, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 3.763440860215054, | |
| "grad_norm": 2.057574987411499, | |
| "learning_rate": 9.071375464684016e-06, | |
| "loss": 0.0999, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 3.817204301075269, | |
| "grad_norm": 2.032602548599243, | |
| "learning_rate": 9.052788104089219e-06, | |
| "loss": 0.1057, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 3.870967741935484, | |
| "grad_norm": 1.700415849685669, | |
| "learning_rate": 9.034200743494424e-06, | |
| "loss": 0.1127, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.924731182795699, | |
| "grad_norm": 2.944364070892334, | |
| "learning_rate": 9.015613382899629e-06, | |
| "loss": 0.1304, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 3.978494623655914, | |
| "grad_norm": 2.807861804962158, | |
| "learning_rate": 8.997026022304834e-06, | |
| "loss": 0.1215, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 4.032258064516129, | |
| "grad_norm": 2.064152717590332, | |
| "learning_rate": 8.978438661710039e-06, | |
| "loss": 0.0942, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 4.086021505376344, | |
| "grad_norm": 2.315067768096924, | |
| "learning_rate": 8.959851301115242e-06, | |
| "loss": 0.0636, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 4.139784946236559, | |
| "grad_norm": 1.4923697710037231, | |
| "learning_rate": 8.941263940520447e-06, | |
| "loss": 0.0553, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 4.193548387096774, | |
| "grad_norm": 1.0652992725372314, | |
| "learning_rate": 8.92267657992565e-06, | |
| "loss": 0.0545, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 4.247311827956989, | |
| "grad_norm": 1.9834535121917725, | |
| "learning_rate": 8.904089219330855e-06, | |
| "loss": 0.049, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 4.301075268817204, | |
| "grad_norm": 1.4393575191497803, | |
| "learning_rate": 8.88550185873606e-06, | |
| "loss": 0.0551, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 4.301075268817204, | |
| "eval_loss": 0.3065280318260193, | |
| "eval_runtime": 202.95, | |
| "eval_samples_per_second": 4.661, | |
| "eval_steps_per_second": 0.586, | |
| "eval_wer": 15.087362484977351, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 4.354838709677419, | |
| "grad_norm": 1.7886149883270264, | |
| "learning_rate": 8.866914498141265e-06, | |
| "loss": 0.0629, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 4.408602150537634, | |
| "grad_norm": 1.470372200012207, | |
| "learning_rate": 8.84832713754647e-06, | |
| "loss": 0.0569, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 4.462365591397849, | |
| "grad_norm": 2.3206701278686523, | |
| "learning_rate": 8.829739776951673e-06, | |
| "loss": 0.0612, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 4.516129032258064, | |
| "grad_norm": 1.5979121923446655, | |
| "learning_rate": 8.811152416356878e-06, | |
| "loss": 0.0703, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 4.56989247311828, | |
| "grad_norm": 2.818779945373535, | |
| "learning_rate": 8.792565055762082e-06, | |
| "loss": 0.0637, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 4.623655913978495, | |
| "grad_norm": 2.848932981491089, | |
| "learning_rate": 8.773977695167287e-06, | |
| "loss": 0.0636, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 4.67741935483871, | |
| "grad_norm": 1.2150336503982544, | |
| "learning_rate": 8.755390334572492e-06, | |
| "loss": 0.0898, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 4.731182795698925, | |
| "grad_norm": 2.5077340602874756, | |
| "learning_rate": 8.736802973977697e-06, | |
| "loss": 0.0509, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 4.78494623655914, | |
| "grad_norm": 2.6455860137939453, | |
| "learning_rate": 8.7182156133829e-06, | |
| "loss": 0.0662, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 4.838709677419355, | |
| "grad_norm": 2.1598827838897705, | |
| "learning_rate": 8.699628252788105e-06, | |
| "loss": 0.0618, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 4.89247311827957, | |
| "grad_norm": 1.961423397064209, | |
| "learning_rate": 8.68104089219331e-06, | |
| "loss": 0.0687, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 4.946236559139785, | |
| "grad_norm": 1.5583302974700928, | |
| "learning_rate": 8.662453531598513e-06, | |
| "loss": 0.0587, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 1.7334260940551758, | |
| "learning_rate": 8.643866171003718e-06, | |
| "loss": 0.0639, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 5.053763440860215, | |
| "grad_norm": 1.385697364807129, | |
| "learning_rate": 8.625278810408923e-06, | |
| "loss": 0.0295, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 5.10752688172043, | |
| "grad_norm": 1.8390223979949951, | |
| "learning_rate": 8.606691449814128e-06, | |
| "loss": 0.0336, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 5.161290322580645, | |
| "grad_norm": 1.6100000143051147, | |
| "learning_rate": 8.588104089219331e-06, | |
| "loss": 0.0346, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 5.21505376344086, | |
| "grad_norm": 1.8762363195419312, | |
| "learning_rate": 8.569516728624536e-06, | |
| "loss": 0.0355, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 5.268817204301075, | |
| "grad_norm": 0.8988639116287231, | |
| "learning_rate": 8.55092936802974e-06, | |
| "loss": 0.038, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 5.32258064516129, | |
| "grad_norm": 1.372160792350769, | |
| "learning_rate": 8.532342007434945e-06, | |
| "loss": 0.0443, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 5.376344086021505, | |
| "grad_norm": 1.3820526599884033, | |
| "learning_rate": 8.51375464684015e-06, | |
| "loss": 0.0345, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 5.376344086021505, | |
| "eval_loss": 0.31700512766838074, | |
| "eval_runtime": 202.9264, | |
| "eval_samples_per_second": 4.662, | |
| "eval_steps_per_second": 0.586, | |
| "eval_wer": 14.902468336877137, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 5.43010752688172, | |
| "grad_norm": 1.8369241952896118, | |
| "learning_rate": 8.495167286245355e-06, | |
| "loss": 0.0362, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 5.483870967741936, | |
| "grad_norm": 1.735297441482544, | |
| "learning_rate": 8.476579925650558e-06, | |
| "loss": 0.0369, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 5.53763440860215, | |
| "grad_norm": 2.1869583129882812, | |
| "learning_rate": 8.457992565055763e-06, | |
| "loss": 0.0359, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 5.591397849462366, | |
| "grad_norm": 0.9142827987670898, | |
| "learning_rate": 8.439405204460968e-06, | |
| "loss": 0.0369, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 5.645161290322581, | |
| "grad_norm": 1.5425326824188232, | |
| "learning_rate": 8.420817843866171e-06, | |
| "loss": 0.0413, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 5.698924731182796, | |
| "grad_norm": 1.532554268836975, | |
| "learning_rate": 8.402230483271376e-06, | |
| "loss": 0.0455, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 5.752688172043011, | |
| "grad_norm": 1.7818132638931274, | |
| "learning_rate": 8.38364312267658e-06, | |
| "loss": 0.0351, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 5.806451612903226, | |
| "grad_norm": 0.8005560040473938, | |
| "learning_rate": 8.365055762081784e-06, | |
| "loss": 0.0446, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 5.860215053763441, | |
| "grad_norm": 1.37205171585083, | |
| "learning_rate": 8.34646840148699e-06, | |
| "loss": 0.0447, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 5.913978494623656, | |
| "grad_norm": 0.9380530714988708, | |
| "learning_rate": 8.327881040892194e-06, | |
| "loss": 0.0335, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 5.967741935483871, | |
| "grad_norm": 2.020190954208374, | |
| "learning_rate": 8.3092936802974e-06, | |
| "loss": 0.0354, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 6.021505376344086, | |
| "grad_norm": 0.7758223414421082, | |
| "learning_rate": 8.290706319702603e-06, | |
| "loss": 0.0245, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 6.075268817204301, | |
| "grad_norm": 1.127894639968872, | |
| "learning_rate": 8.272118959107808e-06, | |
| "loss": 0.0233, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 6.129032258064516, | |
| "grad_norm": 0.7980286478996277, | |
| "learning_rate": 8.253531598513011e-06, | |
| "loss": 0.0234, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 6.182795698924731, | |
| "grad_norm": 1.1685783863067627, | |
| "learning_rate": 8.234944237918216e-06, | |
| "loss": 0.0311, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 6.236559139784946, | |
| "grad_norm": 1.3557145595550537, | |
| "learning_rate": 8.216356877323421e-06, | |
| "loss": 0.0219, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 6.290322580645161, | |
| "grad_norm": 0.7474266290664673, | |
| "learning_rate": 8.197769516728626e-06, | |
| "loss": 0.0244, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 6.344086021505376, | |
| "grad_norm": 0.9074203968048096, | |
| "learning_rate": 8.179182156133829e-06, | |
| "loss": 0.0195, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 6.397849462365591, | |
| "grad_norm": 1.0333547592163086, | |
| "learning_rate": 8.160594795539034e-06, | |
| "loss": 0.0284, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 6.451612903225806, | |
| "grad_norm": 1.9100396633148193, | |
| "learning_rate": 8.142007434944239e-06, | |
| "loss": 0.0186, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 6.451612903225806, | |
| "eval_loss": 0.3364327549934387, | |
| "eval_runtime": 203.4115, | |
| "eval_samples_per_second": 4.651, | |
| "eval_steps_per_second": 0.585, | |
| "eval_wer": 14.77304243320699, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 6.505376344086022, | |
| "grad_norm": 0.6991943120956421, | |
| "learning_rate": 8.123420074349442e-06, | |
| "loss": 0.0201, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 6.559139784946236, | |
| "grad_norm": 2.668675422668457, | |
| "learning_rate": 8.104832713754647e-06, | |
| "loss": 0.0257, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 6.612903225806452, | |
| "grad_norm": 1.0620919466018677, | |
| "learning_rate": 8.086245353159852e-06, | |
| "loss": 0.0275, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 6.666666666666667, | |
| "grad_norm": 1.9009549617767334, | |
| "learning_rate": 8.067657992565057e-06, | |
| "loss": 0.0215, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 6.720430107526882, | |
| "grad_norm": 0.8860704302787781, | |
| "learning_rate": 8.04907063197026e-06, | |
| "loss": 0.0199, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 6.774193548387097, | |
| "grad_norm": 1.2130790948867798, | |
| "learning_rate": 8.030483271375466e-06, | |
| "loss": 0.0237, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 6.827956989247312, | |
| "grad_norm": 1.5909550189971924, | |
| "learning_rate": 8.011895910780669e-06, | |
| "loss": 0.0269, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 6.881720430107527, | |
| "grad_norm": 1.6755486726760864, | |
| "learning_rate": 7.993308550185874e-06, | |
| "loss": 0.0339, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 6.935483870967742, | |
| "grad_norm": 1.2641445398330688, | |
| "learning_rate": 7.974721189591079e-06, | |
| "loss": 0.0204, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 6.989247311827957, | |
| "grad_norm": 1.7877347469329834, | |
| "learning_rate": 7.956133828996284e-06, | |
| "loss": 0.0266, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 7.043010752688172, | |
| "grad_norm": 0.9837028384208679, | |
| "learning_rate": 7.937546468401489e-06, | |
| "loss": 0.02, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 7.096774193548387, | |
| "grad_norm": 1.2097680568695068, | |
| "learning_rate": 7.918959107806692e-06, | |
| "loss": 0.0126, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 7.150537634408602, | |
| "grad_norm": 1.447039246559143, | |
| "learning_rate": 7.900371747211897e-06, | |
| "loss": 0.018, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 7.204301075268817, | |
| "grad_norm": 0.8316716551780701, | |
| "learning_rate": 7.8817843866171e-06, | |
| "loss": 0.0178, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 7.258064516129032, | |
| "grad_norm": 0.9670646786689758, | |
| "learning_rate": 7.863197026022305e-06, | |
| "loss": 0.0122, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 7.311827956989247, | |
| "grad_norm": 1.4154245853424072, | |
| "learning_rate": 7.84460966542751e-06, | |
| "loss": 0.0171, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 7.365591397849462, | |
| "grad_norm": 1.3647488355636597, | |
| "learning_rate": 7.826022304832714e-06, | |
| "loss": 0.0151, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 7.419354838709677, | |
| "grad_norm": 1.548120141029358, | |
| "learning_rate": 7.807434944237919e-06, | |
| "loss": 0.0149, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 7.473118279569892, | |
| "grad_norm": 1.6091225147247314, | |
| "learning_rate": 7.788847583643124e-06, | |
| "loss": 0.0168, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 7.526881720430108, | |
| "grad_norm": 1.1116617918014526, | |
| "learning_rate": 7.770260223048329e-06, | |
| "loss": 0.0161, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 7.526881720430108, | |
| "eval_loss": 0.34663301706314087, | |
| "eval_runtime": 202.0433, | |
| "eval_samples_per_second": 4.682, | |
| "eval_steps_per_second": 0.589, | |
| "eval_wer": 14.551169455486734, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 7.580645161290323, | |
| "grad_norm": 1.468459129333496, | |
| "learning_rate": 7.751672862453532e-06, | |
| "loss": 0.0193, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 7.634408602150538, | |
| "grad_norm": 1.2769989967346191, | |
| "learning_rate": 7.733085501858737e-06, | |
| "loss": 0.0184, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 7.688172043010753, | |
| "grad_norm": 1.0488286018371582, | |
| "learning_rate": 7.71449814126394e-06, | |
| "loss": 0.019, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 7.741935483870968, | |
| "grad_norm": 0.8325207829475403, | |
| "learning_rate": 7.695910780669145e-06, | |
| "loss": 0.0252, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 7.795698924731183, | |
| "grad_norm": 0.7656351923942566, | |
| "learning_rate": 7.67732342007435e-06, | |
| "loss": 0.0155, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 7.849462365591398, | |
| "grad_norm": 0.9184199571609497, | |
| "learning_rate": 7.658736059479555e-06, | |
| "loss": 0.028, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 7.903225806451613, | |
| "grad_norm": 1.2135573625564575, | |
| "learning_rate": 7.64014869888476e-06, | |
| "loss": 0.0178, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 7.956989247311828, | |
| "grad_norm": 0.8172153830528259, | |
| "learning_rate": 7.621561338289963e-06, | |
| "loss": 0.0254, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 8.010752688172044, | |
| "grad_norm": 0.431659072637558, | |
| "learning_rate": 7.602973977695168e-06, | |
| "loss": 0.0145, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 8.064516129032258, | |
| "grad_norm": 0.9533307552337646, | |
| "learning_rate": 7.584386617100372e-06, | |
| "loss": 0.0165, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 8.118279569892474, | |
| "grad_norm": 0.7198922038078308, | |
| "learning_rate": 7.565799256505577e-06, | |
| "loss": 0.0107, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 8.172043010752688, | |
| "grad_norm": 0.8582783937454224, | |
| "learning_rate": 7.547211895910781e-06, | |
| "loss": 0.0123, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 8.225806451612904, | |
| "grad_norm": 1.0113513469696045, | |
| "learning_rate": 7.528624535315986e-06, | |
| "loss": 0.014, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 8.279569892473118, | |
| "grad_norm": 0.7275539040565491, | |
| "learning_rate": 7.51003717472119e-06, | |
| "loss": 0.0115, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 8.333333333333334, | |
| "grad_norm": 0.4298296570777893, | |
| "learning_rate": 7.491449814126395e-06, | |
| "loss": 0.0104, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 8.387096774193548, | |
| "grad_norm": 0.7536816596984863, | |
| "learning_rate": 7.4728624535316e-06, | |
| "loss": 0.0132, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 8.440860215053764, | |
| "grad_norm": 1.0941580533981323, | |
| "learning_rate": 7.454275092936804e-06, | |
| "loss": 0.012, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 8.494623655913978, | |
| "grad_norm": 1.0508357286453247, | |
| "learning_rate": 7.435687732342009e-06, | |
| "loss": 0.0135, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 8.548387096774194, | |
| "grad_norm": 0.6876735687255859, | |
| "learning_rate": 7.417100371747212e-06, | |
| "loss": 0.0156, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 8.602150537634408, | |
| "grad_norm": 0.8525980114936829, | |
| "learning_rate": 7.398513011152417e-06, | |
| "loss": 0.0106, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 8.602150537634408, | |
| "eval_loss": 0.3538697063922882, | |
| "eval_runtime": 202.0959, | |
| "eval_samples_per_second": 4.681, | |
| "eval_steps_per_second": 0.589, | |
| "eval_wer": 14.338541185171488, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 8.655913978494624, | |
| "grad_norm": 0.5362399220466614, | |
| "learning_rate": 7.379925650557621e-06, | |
| "loss": 0.0123, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 8.709677419354838, | |
| "grad_norm": 0.8804866671562195, | |
| "learning_rate": 7.361338289962826e-06, | |
| "loss": 0.0171, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 8.763440860215054, | |
| "grad_norm": 0.8643043041229248, | |
| "learning_rate": 7.34275092936803e-06, | |
| "loss": 0.0144, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 8.817204301075268, | |
| "grad_norm": 0.8704060912132263, | |
| "learning_rate": 7.3241635687732344e-06, | |
| "loss": 0.0121, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 8.870967741935484, | |
| "grad_norm": 0.6113823056221008, | |
| "learning_rate": 7.305576208178439e-06, | |
| "loss": 0.0154, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 8.924731182795698, | |
| "grad_norm": 1.1631172895431519, | |
| "learning_rate": 7.2869888475836436e-06, | |
| "loss": 0.0109, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 8.978494623655914, | |
| "grad_norm": 0.8042282462120056, | |
| "learning_rate": 7.2684014869888485e-06, | |
| "loss": 0.0158, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 9.03225806451613, | |
| "grad_norm": 0.18347720801830292, | |
| "learning_rate": 7.249814126394053e-06, | |
| "loss": 0.0132, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 9.086021505376344, | |
| "grad_norm": 0.7228168845176697, | |
| "learning_rate": 7.231226765799258e-06, | |
| "loss": 0.0099, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 9.13978494623656, | |
| "grad_norm": 0.35777589678764343, | |
| "learning_rate": 7.212639405204461e-06, | |
| "loss": 0.0129, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 9.193548387096774, | |
| "grad_norm": 0.2701317071914673, | |
| "learning_rate": 7.194052044609666e-06, | |
| "loss": 0.0095, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 9.24731182795699, | |
| "grad_norm": 1.6921519041061401, | |
| "learning_rate": 7.17546468401487e-06, | |
| "loss": 0.0082, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 9.301075268817204, | |
| "grad_norm": 0.27636006474494934, | |
| "learning_rate": 7.156877323420075e-06, | |
| "loss": 0.0075, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 9.35483870967742, | |
| "grad_norm": 0.7335753440856934, | |
| "learning_rate": 7.138289962825279e-06, | |
| "loss": 0.0102, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 9.408602150537634, | |
| "grad_norm": 0.9723600149154663, | |
| "learning_rate": 7.119702602230484e-06, | |
| "loss": 0.0104, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 9.46236559139785, | |
| "grad_norm": 0.2777242660522461, | |
| "learning_rate": 7.101115241635689e-06, | |
| "loss": 0.0138, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 9.516129032258064, | |
| "grad_norm": 0.42475125193595886, | |
| "learning_rate": 7.082527881040892e-06, | |
| "loss": 0.0074, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 9.56989247311828, | |
| "grad_norm": 0.6463161110877991, | |
| "learning_rate": 7.063940520446097e-06, | |
| "loss": 0.0105, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 9.623655913978494, | |
| "grad_norm": 1.2284172773361206, | |
| "learning_rate": 7.0453531598513015e-06, | |
| "loss": 0.0086, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 9.67741935483871, | |
| "grad_norm": 0.6127483248710632, | |
| "learning_rate": 7.0267657992565065e-06, | |
| "loss": 0.013, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 9.67741935483871, | |
| "eval_loss": 0.3432445228099823, | |
| "eval_runtime": 202.5879, | |
| "eval_samples_per_second": 4.67, | |
| "eval_steps_per_second": 0.587, | |
| "eval_wer": 14.883978922067117, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 9.731182795698924, | |
| "grad_norm": 1.6641124486923218, | |
| "learning_rate": 7.008178438661711e-06, | |
| "loss": 0.0117, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 9.78494623655914, | |
| "grad_norm": 0.49032703042030334, | |
| "learning_rate": 6.989591078066915e-06, | |
| "loss": 0.0113, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 9.838709677419354, | |
| "grad_norm": 1.0603209733963013, | |
| "learning_rate": 6.971003717472119e-06, | |
| "loss": 0.0093, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 9.89247311827957, | |
| "grad_norm": 1.1902903318405151, | |
| "learning_rate": 6.952416356877324e-06, | |
| "loss": 0.009, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 9.946236559139784, | |
| "grad_norm": 0.4575275182723999, | |
| "learning_rate": 6.933828996282529e-06, | |
| "loss": 0.0103, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 2.514280080795288, | |
| "learning_rate": 6.915241635687733e-06, | |
| "loss": 0.0146, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 10.053763440860216, | |
| "grad_norm": 0.28566455841064453, | |
| "learning_rate": 6.896654275092938e-06, | |
| "loss": 0.008, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 10.10752688172043, | |
| "grad_norm": 0.3524170219898224, | |
| "learning_rate": 6.878066914498141e-06, | |
| "loss": 0.0057, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 10.161290322580646, | |
| "grad_norm": 1.7274552583694458, | |
| "learning_rate": 6.859479553903346e-06, | |
| "loss": 0.0083, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 10.21505376344086, | |
| "grad_norm": 0.31285515427589417, | |
| "learning_rate": 6.84089219330855e-06, | |
| "loss": 0.0071, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 10.268817204301076, | |
| "grad_norm": 0.8492361307144165, | |
| "learning_rate": 6.822304832713755e-06, | |
| "loss": 0.0086, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 10.32258064516129, | |
| "grad_norm": 0.39797672629356384, | |
| "learning_rate": 6.8037174721189595e-06, | |
| "loss": 0.0088, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 10.376344086021506, | |
| "grad_norm": 0.410177618265152, | |
| "learning_rate": 6.7851301115241644e-06, | |
| "loss": 0.0085, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 10.43010752688172, | |
| "grad_norm": 0.45091158151626587, | |
| "learning_rate": 6.766542750929369e-06, | |
| "loss": 0.0062, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 10.483870967741936, | |
| "grad_norm": 1.3592181205749512, | |
| "learning_rate": 6.747955390334573e-06, | |
| "loss": 0.01, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 10.53763440860215, | |
| "grad_norm": 0.4976150691509247, | |
| "learning_rate": 6.729368029739778e-06, | |
| "loss": 0.0069, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 10.591397849462366, | |
| "grad_norm": 0.14256972074508667, | |
| "learning_rate": 6.710780669144982e-06, | |
| "loss": 0.0064, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 10.64516129032258, | |
| "grad_norm": 0.7307581901550293, | |
| "learning_rate": 6.692193308550187e-06, | |
| "loss": 0.0075, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 10.698924731182796, | |
| "grad_norm": 0.8009108901023865, | |
| "learning_rate": 6.673605947955391e-06, | |
| "loss": 0.0071, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 10.75268817204301, | |
| "grad_norm": 0.7494556307792664, | |
| "learning_rate": 6.655018587360595e-06, | |
| "loss": 0.0092, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 10.75268817204301, | |
| "eval_loss": 0.3648987114429474, | |
| "eval_runtime": 202.4649, | |
| "eval_samples_per_second": 4.672, | |
| "eval_steps_per_second": 0.588, | |
| "eval_wer": 14.218359988906352, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 10.806451612903226, | |
| "grad_norm": 1.12769615650177, | |
| "learning_rate": 6.636431226765799e-06, | |
| "loss": 0.0091, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 10.86021505376344, | |
| "grad_norm": 0.7359474897384644, | |
| "learning_rate": 6.617843866171004e-06, | |
| "loss": 0.0112, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 10.913978494623656, | |
| "grad_norm": 0.4451664090156555, | |
| "learning_rate": 6.599256505576209e-06, | |
| "loss": 0.0098, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 10.967741935483872, | |
| "grad_norm": 0.364681214094162, | |
| "learning_rate": 6.580669144981413e-06, | |
| "loss": 0.0087, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 11.021505376344086, | |
| "grad_norm": 0.416103720664978, | |
| "learning_rate": 6.562081784386618e-06, | |
| "loss": 0.0072, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 11.075268817204302, | |
| "grad_norm": 0.2710916996002197, | |
| "learning_rate": 6.5434944237918215e-06, | |
| "loss": 0.0063, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 11.129032258064516, | |
| "grad_norm": 1.4234521389007568, | |
| "learning_rate": 6.5249070631970265e-06, | |
| "loss": 0.0062, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 11.182795698924732, | |
| "grad_norm": 0.800237238407135, | |
| "learning_rate": 6.506319702602231e-06, | |
| "loss": 0.0075, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 11.236559139784946, | |
| "grad_norm": 0.4724205732345581, | |
| "learning_rate": 6.487732342007436e-06, | |
| "loss": 0.0053, | |
| "step": 5225 | |
| }, | |
| { | |
| "epoch": 11.290322580645162, | |
| "grad_norm": 0.12521684169769287, | |
| "learning_rate": 6.46914498141264e-06, | |
| "loss": 0.0079, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 11.344086021505376, | |
| "grad_norm": 0.2039920538663864, | |
| "learning_rate": 6.450557620817845e-06, | |
| "loss": 0.0069, | |
| "step": 5275 | |
| }, | |
| { | |
| "epoch": 11.397849462365592, | |
| "grad_norm": 1.678312063217163, | |
| "learning_rate": 6.43197026022305e-06, | |
| "loss": 0.008, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 11.451612903225806, | |
| "grad_norm": 0.8350504636764526, | |
| "learning_rate": 6.413382899628253e-06, | |
| "loss": 0.0069, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 11.505376344086022, | |
| "grad_norm": 0.6541998982429504, | |
| "learning_rate": 6.394795539033458e-06, | |
| "loss": 0.009, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 11.559139784946236, | |
| "grad_norm": 1.2869340181350708, | |
| "learning_rate": 6.376208178438662e-06, | |
| "loss": 0.0051, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 11.612903225806452, | |
| "grad_norm": 0.987830638885498, | |
| "learning_rate": 6.357620817843867e-06, | |
| "loss": 0.0072, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 11.666666666666666, | |
| "grad_norm": 0.4543008804321289, | |
| "learning_rate": 6.339033457249071e-06, | |
| "loss": 0.0065, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 11.720430107526882, | |
| "grad_norm": 0.866301953792572, | |
| "learning_rate": 6.320446096654275e-06, | |
| "loss": 0.0071, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 11.774193548387096, | |
| "grad_norm": 1.1665536165237427, | |
| "learning_rate": 6.3018587360594795e-06, | |
| "loss": 0.0089, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 11.827956989247312, | |
| "grad_norm": 0.5745353102684021, | |
| "learning_rate": 6.2832713754646845e-06, | |
| "loss": 0.0086, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 11.827956989247312, | |
| "eval_loss": 0.3715842068195343, | |
| "eval_runtime": 202.8772, | |
| "eval_samples_per_second": 4.663, | |
| "eval_steps_per_second": 0.587, | |
| "eval_wer": 15.586576684847925, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 11.881720430107526, | |
| "grad_norm": 0.7137680053710938, | |
| "learning_rate": 6.2646840148698895e-06, | |
| "loss": 0.0063, | |
| "step": 5525 | |
| }, | |
| { | |
| "epoch": 11.935483870967742, | |
| "grad_norm": 1.8331615924835205, | |
| "learning_rate": 6.246096654275094e-06, | |
| "loss": 0.0052, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 11.989247311827956, | |
| "grad_norm": 1.306740403175354, | |
| "learning_rate": 6.2275092936802986e-06, | |
| "loss": 0.0084, | |
| "step": 5575 | |
| }, | |
| { | |
| "epoch": 12.043010752688172, | |
| "grad_norm": 0.4689745008945465, | |
| "learning_rate": 6.208921933085502e-06, | |
| "loss": 0.0054, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 12.096774193548388, | |
| "grad_norm": 0.8853312134742737, | |
| "learning_rate": 6.190334572490707e-06, | |
| "loss": 0.0038, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 12.150537634408602, | |
| "grad_norm": 0.18394626677036285, | |
| "learning_rate": 6.171747211895911e-06, | |
| "loss": 0.0058, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 12.204301075268818, | |
| "grad_norm": 0.35906341671943665, | |
| "learning_rate": 6.153159851301116e-06, | |
| "loss": 0.0048, | |
| "step": 5675 | |
| }, | |
| { | |
| "epoch": 12.258064516129032, | |
| "grad_norm": 0.0934007316827774, | |
| "learning_rate": 6.13457249070632e-06, | |
| "loss": 0.0056, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 12.311827956989248, | |
| "grad_norm": 0.6383976340293884, | |
| "learning_rate": 6.115985130111525e-06, | |
| "loss": 0.0049, | |
| "step": 5725 | |
| }, | |
| { | |
| "epoch": 12.365591397849462, | |
| "grad_norm": 0.3622893989086151, | |
| "learning_rate": 6.097397769516728e-06, | |
| "loss": 0.0064, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 12.419354838709678, | |
| "grad_norm": 0.21196268498897552, | |
| "learning_rate": 6.078810408921933e-06, | |
| "loss": 0.0064, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 12.473118279569892, | |
| "grad_norm": 0.3381194472312927, | |
| "learning_rate": 6.060223048327138e-06, | |
| "loss": 0.0054, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 12.526881720430108, | |
| "grad_norm": 1.9906443357467651, | |
| "learning_rate": 6.041635687732342e-06, | |
| "loss": 0.0061, | |
| "step": 5825 | |
| }, | |
| { | |
| "epoch": 12.580645161290322, | |
| "grad_norm": 0.3197634220123291, | |
| "learning_rate": 6.023048327137547e-06, | |
| "loss": 0.0053, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 12.634408602150538, | |
| "grad_norm": 0.18474631011486053, | |
| "learning_rate": 6.0044609665427515e-06, | |
| "loss": 0.0065, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 12.688172043010752, | |
| "grad_norm": 0.8498281240463257, | |
| "learning_rate": 5.985873605947956e-06, | |
| "loss": 0.004, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 12.741935483870968, | |
| "grad_norm": 0.4391692578792572, | |
| "learning_rate": 5.96728624535316e-06, | |
| "loss": 0.006, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 12.795698924731182, | |
| "grad_norm": 0.6688899993896484, | |
| "learning_rate": 5.948698884758365e-06, | |
| "loss": 0.0053, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 12.849462365591398, | |
| "grad_norm": 0.9713292121887207, | |
| "learning_rate": 5.930111524163569e-06, | |
| "loss": 0.0072, | |
| "step": 5975 | |
| }, | |
| { | |
| "epoch": 12.903225806451612, | |
| "grad_norm": 0.8484262228012085, | |
| "learning_rate": 5.911524163568774e-06, | |
| "loss": 0.0068, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 12.903225806451612, | |
| "eval_loss": 0.37204521894454956, | |
| "eval_runtime": 201.9047, | |
| "eval_samples_per_second": 4.685, | |
| "eval_steps_per_second": 0.589, | |
| "eval_wer": 14.588148285106776, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 12.956989247311828, | |
| "grad_norm": 1.0728837251663208, | |
| "learning_rate": 5.892936802973979e-06, | |
| "loss": 0.0084, | |
| "step": 6025 | |
| }, | |
| { | |
| "epoch": 13.010752688172044, | |
| "grad_norm": 0.4754142761230469, | |
| "learning_rate": 5.874349442379182e-06, | |
| "loss": 0.0087, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 13.064516129032258, | |
| "grad_norm": 0.3025985062122345, | |
| "learning_rate": 5.855762081784387e-06, | |
| "loss": 0.0063, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 13.118279569892474, | |
| "grad_norm": 0.3236280083656311, | |
| "learning_rate": 5.837174721189591e-06, | |
| "loss": 0.0055, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 13.172043010752688, | |
| "grad_norm": 0.508432924747467, | |
| "learning_rate": 5.818587360594796e-06, | |
| "loss": 0.0053, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 13.225806451612904, | |
| "grad_norm": 1.6511017084121704, | |
| "learning_rate": 5.8e-06, | |
| "loss": 0.0046, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 13.279569892473118, | |
| "grad_norm": 0.142063707113266, | |
| "learning_rate": 5.781412639405205e-06, | |
| "loss": 0.0051, | |
| "step": 6175 | |
| }, | |
| { | |
| "epoch": 13.333333333333334, | |
| "grad_norm": 0.11750756949186325, | |
| "learning_rate": 5.762825278810409e-06, | |
| "loss": 0.0048, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 13.387096774193548, | |
| "grad_norm": 0.8060685396194458, | |
| "learning_rate": 5.744237918215614e-06, | |
| "loss": 0.0057, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 13.440860215053764, | |
| "grad_norm": 0.452999472618103, | |
| "learning_rate": 5.725650557620819e-06, | |
| "loss": 0.0059, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 13.494623655913978, | |
| "grad_norm": 1.3556956052780151, | |
| "learning_rate": 5.707063197026023e-06, | |
| "loss": 0.0049, | |
| "step": 6275 | |
| }, | |
| { | |
| "epoch": 13.548387096774194, | |
| "grad_norm": 0.1406233310699463, | |
| "learning_rate": 5.688475836431228e-06, | |
| "loss": 0.0041, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 13.602150537634408, | |
| "grad_norm": 0.6670034527778625, | |
| "learning_rate": 5.669888475836432e-06, | |
| "loss": 0.0057, | |
| "step": 6325 | |
| }, | |
| { | |
| "epoch": 13.655913978494624, | |
| "grad_norm": 1.7057311534881592, | |
| "learning_rate": 5.651301115241636e-06, | |
| "loss": 0.0056, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 13.709677419354838, | |
| "grad_norm": 0.7842967510223389, | |
| "learning_rate": 5.63271375464684e-06, | |
| "loss": 0.0062, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 13.763440860215054, | |
| "grad_norm": 0.7574280500411987, | |
| "learning_rate": 5.614126394052045e-06, | |
| "loss": 0.006, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 13.817204301075268, | |
| "grad_norm": 1.1247819662094116, | |
| "learning_rate": 5.595539033457249e-06, | |
| "loss": 0.0091, | |
| "step": 6425 | |
| }, | |
| { | |
| "epoch": 13.870967741935484, | |
| "grad_norm": 0.5980854034423828, | |
| "learning_rate": 5.576951672862454e-06, | |
| "loss": 0.005, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 13.924731182795698, | |
| "grad_norm": 0.6640056371688843, | |
| "learning_rate": 5.558364312267659e-06, | |
| "loss": 0.0061, | |
| "step": 6475 | |
| }, | |
| { | |
| "epoch": 13.978494623655914, | |
| "grad_norm": 0.6742274165153503, | |
| "learning_rate": 5.5397769516728625e-06, | |
| "loss": 0.0056, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 13.978494623655914, | |
| "eval_loss": 0.37743857502937317, | |
| "eval_runtime": 202.8285, | |
| "eval_samples_per_second": 4.664, | |
| "eval_steps_per_second": 0.587, | |
| "eval_wer": 14.819265970232042, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 14.03225806451613, | |
| "grad_norm": 0.1915878802537918, | |
| "learning_rate": 5.5211895910780674e-06, | |
| "loss": 0.0045, | |
| "step": 6525 | |
| }, | |
| { | |
| "epoch": 14.086021505376344, | |
| "grad_norm": 0.09815018624067307, | |
| "learning_rate": 5.5026022304832716e-06, | |
| "loss": 0.0057, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 14.13978494623656, | |
| "grad_norm": 0.04359288886189461, | |
| "learning_rate": 5.4840148698884765e-06, | |
| "loss": 0.005, | |
| "step": 6575 | |
| }, | |
| { | |
| "epoch": 14.193548387096774, | |
| "grad_norm": 0.28134745359420776, | |
| "learning_rate": 5.465427509293681e-06, | |
| "loss": 0.0029, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 14.24731182795699, | |
| "grad_norm": 0.6944845914840698, | |
| "learning_rate": 5.446840148698886e-06, | |
| "loss": 0.0056, | |
| "step": 6625 | |
| }, | |
| { | |
| "epoch": 14.301075268817204, | |
| "grad_norm": 1.5637778043746948, | |
| "learning_rate": 5.428252788104089e-06, | |
| "loss": 0.0037, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 14.35483870967742, | |
| "grad_norm": 0.49470245838165283, | |
| "learning_rate": 5.409665427509294e-06, | |
| "loss": 0.0064, | |
| "step": 6675 | |
| }, | |
| { | |
| "epoch": 14.408602150537634, | |
| "grad_norm": 0.055743150413036346, | |
| "learning_rate": 5.391078066914499e-06, | |
| "loss": 0.0033, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 14.46236559139785, | |
| "grad_norm": 0.20047767460346222, | |
| "learning_rate": 5.372490706319703e-06, | |
| "loss": 0.0047, | |
| "step": 6725 | |
| }, | |
| { | |
| "epoch": 14.516129032258064, | |
| "grad_norm": 0.36383625864982605, | |
| "learning_rate": 5.353903345724908e-06, | |
| "loss": 0.0037, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 14.56989247311828, | |
| "grad_norm": 0.07147414237260818, | |
| "learning_rate": 5.335315985130112e-06, | |
| "loss": 0.0026, | |
| "step": 6775 | |
| }, | |
| { | |
| "epoch": 14.623655913978494, | |
| "grad_norm": 0.28435853123664856, | |
| "learning_rate": 5.316728624535316e-06, | |
| "loss": 0.0043, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 14.67741935483871, | |
| "grad_norm": 0.27296435832977295, | |
| "learning_rate": 5.29814126394052e-06, | |
| "loss": 0.0068, | |
| "step": 6825 | |
| }, | |
| { | |
| "epoch": 14.731182795698924, | |
| "grad_norm": 0.9266132116317749, | |
| "learning_rate": 5.279553903345725e-06, | |
| "loss": 0.0065, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 14.78494623655914, | |
| "grad_norm": 0.4447098970413208, | |
| "learning_rate": 5.2609665427509295e-06, | |
| "loss": 0.0051, | |
| "step": 6875 | |
| }, | |
| { | |
| "epoch": 14.838709677419354, | |
| "grad_norm": 0.6710329055786133, | |
| "learning_rate": 5.2423791821561345e-06, | |
| "loss": 0.0044, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 14.89247311827957, | |
| "grad_norm": 0.5553959012031555, | |
| "learning_rate": 5.2237918215613395e-06, | |
| "loss": 0.0062, | |
| "step": 6925 | |
| }, | |
| { | |
| "epoch": 14.946236559139784, | |
| "grad_norm": 0.867906928062439, | |
| "learning_rate": 5.205204460966543e-06, | |
| "loss": 0.0075, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 0.5631603002548218, | |
| "learning_rate": 5.186617100371748e-06, | |
| "loss": 0.0056, | |
| "step": 6975 | |
| }, | |
| { | |
| "epoch": 15.053763440860216, | |
| "grad_norm": 0.16968116164207458, | |
| "learning_rate": 5.168029739776952e-06, | |
| "loss": 0.0032, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 15.053763440860216, | |
| "eval_loss": 0.3897517919540405, | |
| "eval_runtime": 202.1138, | |
| "eval_samples_per_second": 4.681, | |
| "eval_steps_per_second": 0.589, | |
| "eval_wer": 14.597392992511788, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 15.10752688172043, | |
| "grad_norm": 0.43974125385284424, | |
| "learning_rate": 5.149442379182157e-06, | |
| "loss": 0.005, | |
| "step": 7025 | |
| }, | |
| { | |
| "epoch": 15.161290322580646, | |
| "grad_norm": 0.1777154952287674, | |
| "learning_rate": 5.130855018587361e-06, | |
| "loss": 0.0023, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 15.21505376344086, | |
| "grad_norm": 0.0768185630440712, | |
| "learning_rate": 5.112267657992566e-06, | |
| "loss": 0.0045, | |
| "step": 7075 | |
| }, | |
| { | |
| "epoch": 15.268817204301076, | |
| "grad_norm": 0.04717967286705971, | |
| "learning_rate": 5.093680297397769e-06, | |
| "loss": 0.0043, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 15.32258064516129, | |
| "grad_norm": 0.25022652745246887, | |
| "learning_rate": 5.075092936802974e-06, | |
| "loss": 0.003, | |
| "step": 7125 | |
| }, | |
| { | |
| "epoch": 15.376344086021506, | |
| "grad_norm": 0.07506144791841507, | |
| "learning_rate": 5.056505576208179e-06, | |
| "loss": 0.0043, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 15.43010752688172, | |
| "grad_norm": 0.850889265537262, | |
| "learning_rate": 5.037918215613383e-06, | |
| "loss": 0.0055, | |
| "step": 7175 | |
| }, | |
| { | |
| "epoch": 15.483870967741936, | |
| "grad_norm": 0.5556985139846802, | |
| "learning_rate": 5.019330855018588e-06, | |
| "loss": 0.0051, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 15.53763440860215, | |
| "grad_norm": 0.0634092465043068, | |
| "learning_rate": 5.0007434944237924e-06, | |
| "loss": 0.003, | |
| "step": 7225 | |
| }, | |
| { | |
| "epoch": 15.591397849462366, | |
| "grad_norm": 0.2446642518043518, | |
| "learning_rate": 4.982156133828997e-06, | |
| "loss": 0.0047, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 15.64516129032258, | |
| "grad_norm": 1.191821575164795, | |
| "learning_rate": 4.9635687732342016e-06, | |
| "loss": 0.0045, | |
| "step": 7275 | |
| }, | |
| { | |
| "epoch": 15.698924731182796, | |
| "grad_norm": 0.4117543399333954, | |
| "learning_rate": 4.944981412639406e-06, | |
| "loss": 0.0062, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 15.75268817204301, | |
| "grad_norm": 0.8248342275619507, | |
| "learning_rate": 4.92639405204461e-06, | |
| "loss": 0.0043, | |
| "step": 7325 | |
| }, | |
| { | |
| "epoch": 15.806451612903226, | |
| "grad_norm": 0.29120975732803345, | |
| "learning_rate": 4.907806691449815e-06, | |
| "loss": 0.0061, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 15.86021505376344, | |
| "grad_norm": 0.0745767205953598, | |
| "learning_rate": 4.889219330855019e-06, | |
| "loss": 0.0048, | |
| "step": 7375 | |
| }, | |
| { | |
| "epoch": 15.913978494623656, | |
| "grad_norm": 0.10059848427772522, | |
| "learning_rate": 4.870631970260223e-06, | |
| "loss": 0.004, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 15.967741935483872, | |
| "grad_norm": 0.11489495635032654, | |
| "learning_rate": 4.852044609665428e-06, | |
| "loss": 0.004, | |
| "step": 7425 | |
| }, | |
| { | |
| "epoch": 16.021505376344088, | |
| "grad_norm": 0.04976237937808037, | |
| "learning_rate": 4.833457249070632e-06, | |
| "loss": 0.0036, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 16.0752688172043, | |
| "grad_norm": 0.13619866967201233, | |
| "learning_rate": 4.814869888475836e-06, | |
| "loss": 0.0031, | |
| "step": 7475 | |
| }, | |
| { | |
| "epoch": 16.129032258064516, | |
| "grad_norm": 0.35101068019866943, | |
| "learning_rate": 4.796282527881041e-06, | |
| "loss": 0.0037, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 16.129032258064516, | |
| "eval_loss": 0.390476793050766, | |
| "eval_runtime": 202.418, | |
| "eval_samples_per_second": 4.673, | |
| "eval_steps_per_second": 0.588, | |
| "eval_wer": 14.708329481371916, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 16.182795698924732, | |
| "grad_norm": 0.315719872713089, | |
| "learning_rate": 4.777695167286246e-06, | |
| "loss": 0.003, | |
| "step": 7525 | |
| }, | |
| { | |
| "epoch": 16.236559139784948, | |
| "grad_norm": 1.0123934745788574, | |
| "learning_rate": 4.75910780669145e-06, | |
| "loss": 0.0035, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 16.29032258064516, | |
| "grad_norm": 0.517242968082428, | |
| "learning_rate": 4.7405204460966545e-06, | |
| "loss": 0.003, | |
| "step": 7575 | |
| }, | |
| { | |
| "epoch": 16.344086021505376, | |
| "grad_norm": 0.06284263730049133, | |
| "learning_rate": 4.7219330855018595e-06, | |
| "loss": 0.0024, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 16.397849462365592, | |
| "grad_norm": 0.02318274788558483, | |
| "learning_rate": 4.703345724907064e-06, | |
| "loss": 0.0039, | |
| "step": 7625 | |
| }, | |
| { | |
| "epoch": 16.451612903225808, | |
| "grad_norm": 0.2524121105670929, | |
| "learning_rate": 4.684758364312268e-06, | |
| "loss": 0.0041, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 16.50537634408602, | |
| "grad_norm": 0.047711629420518875, | |
| "learning_rate": 4.666171003717473e-06, | |
| "loss": 0.0041, | |
| "step": 7675 | |
| }, | |
| { | |
| "epoch": 16.559139784946236, | |
| "grad_norm": 0.32103028893470764, | |
| "learning_rate": 4.647583643122677e-06, | |
| "loss": 0.0061, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 16.612903225806452, | |
| "grad_norm": 1.1334346532821655, | |
| "learning_rate": 4.628996282527882e-06, | |
| "loss": 0.0043, | |
| "step": 7725 | |
| }, | |
| { | |
| "epoch": 16.666666666666668, | |
| "grad_norm": 0.11029840260744095, | |
| "learning_rate": 4.610408921933086e-06, | |
| "loss": 0.0015, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 16.72043010752688, | |
| "grad_norm": 0.03998972475528717, | |
| "learning_rate": 4.59182156133829e-06, | |
| "loss": 0.0035, | |
| "step": 7775 | |
| }, | |
| { | |
| "epoch": 16.774193548387096, | |
| "grad_norm": 0.9175609946250916, | |
| "learning_rate": 4.573234200743495e-06, | |
| "loss": 0.0037, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 16.827956989247312, | |
| "grad_norm": 0.055633947253227234, | |
| "learning_rate": 4.554646840148699e-06, | |
| "loss": 0.0035, | |
| "step": 7825 | |
| }, | |
| { | |
| "epoch": 16.881720430107528, | |
| "grad_norm": 0.28876572847366333, | |
| "learning_rate": 4.536059479553903e-06, | |
| "loss": 0.0036, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 16.93548387096774, | |
| "grad_norm": 0.09186781197786331, | |
| "learning_rate": 4.517472118959108e-06, | |
| "loss": 0.0034, | |
| "step": 7875 | |
| }, | |
| { | |
| "epoch": 16.989247311827956, | |
| "grad_norm": 0.024787306785583496, | |
| "learning_rate": 4.4988847583643125e-06, | |
| "loss": 0.0035, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 17.043010752688172, | |
| "grad_norm": 0.30542510747909546, | |
| "learning_rate": 4.480297397769517e-06, | |
| "loss": 0.0027, | |
| "step": 7925 | |
| }, | |
| { | |
| "epoch": 17.096774193548388, | |
| "grad_norm": 0.20729881525039673, | |
| "learning_rate": 4.461710037174722e-06, | |
| "loss": 0.0016, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 17.150537634408604, | |
| "grad_norm": 0.17623752355575562, | |
| "learning_rate": 4.4431226765799266e-06, | |
| "loss": 0.003, | |
| "step": 7975 | |
| }, | |
| { | |
| "epoch": 17.204301075268816, | |
| "grad_norm": 1.2774063348770142, | |
| "learning_rate": 4.424535315985131e-06, | |
| "loss": 0.0041, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 17.204301075268816, | |
| "eval_loss": 0.38328301906585693, | |
| "eval_runtime": 202.8745, | |
| "eval_samples_per_second": 4.663, | |
| "eval_steps_per_second": 0.587, | |
| "eval_wer": 14.440232966626606, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 17.258064516129032, | |
| "grad_norm": 0.24027810990810394, | |
| "learning_rate": 4.405947955390335e-06, | |
| "loss": 0.0042, | |
| "step": 8025 | |
| }, | |
| { | |
| "epoch": 17.311827956989248, | |
| "grad_norm": 0.6575544476509094, | |
| "learning_rate": 4.38736059479554e-06, | |
| "loss": 0.0033, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 17.365591397849464, | |
| "grad_norm": 0.7652745842933655, | |
| "learning_rate": 4.368773234200744e-06, | |
| "loss": 0.0025, | |
| "step": 8075 | |
| }, | |
| { | |
| "epoch": 17.419354838709676, | |
| "grad_norm": 1.0893921852111816, | |
| "learning_rate": 4.350185873605948e-06, | |
| "loss": 0.0044, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 17.473118279569892, | |
| "grad_norm": 0.381245881319046, | |
| "learning_rate": 4.331598513011153e-06, | |
| "loss": 0.0053, | |
| "step": 8125 | |
| }, | |
| { | |
| "epoch": 17.526881720430108, | |
| "grad_norm": 0.6958642601966858, | |
| "learning_rate": 4.313011152416357e-06, | |
| "loss": 0.003, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 17.580645161290324, | |
| "grad_norm": 0.3542903661727905, | |
| "learning_rate": 4.294423791821561e-06, | |
| "loss": 0.0031, | |
| "step": 8175 | |
| }, | |
| { | |
| "epoch": 17.634408602150536, | |
| "grad_norm": 0.12086351215839386, | |
| "learning_rate": 4.275836431226766e-06, | |
| "loss": 0.0024, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 17.688172043010752, | |
| "grad_norm": 0.41448554396629333, | |
| "learning_rate": 4.2572490706319704e-06, | |
| "loss": 0.0035, | |
| "step": 8225 | |
| }, | |
| { | |
| "epoch": 17.741935483870968, | |
| "grad_norm": 0.06691323965787888, | |
| "learning_rate": 4.238661710037175e-06, | |
| "loss": 0.0035, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 17.795698924731184, | |
| "grad_norm": 0.7434226870536804, | |
| "learning_rate": 4.2200743494423795e-06, | |
| "loss": 0.0053, | |
| "step": 8275 | |
| }, | |
| { | |
| "epoch": 17.849462365591396, | |
| "grad_norm": 0.36501583456993103, | |
| "learning_rate": 4.201486988847584e-06, | |
| "loss": 0.004, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 17.903225806451612, | |
| "grad_norm": 0.2055322229862213, | |
| "learning_rate": 4.182899628252789e-06, | |
| "loss": 0.0043, | |
| "step": 8325 | |
| }, | |
| { | |
| "epoch": 17.956989247311828, | |
| "grad_norm": 0.7392027378082275, | |
| "learning_rate": 4.164312267657993e-06, | |
| "loss": 0.004, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 18.010752688172044, | |
| "grad_norm": 0.07294179499149323, | |
| "learning_rate": 4.145724907063197e-06, | |
| "loss": 0.0055, | |
| "step": 8375 | |
| }, | |
| { | |
| "epoch": 18.06451612903226, | |
| "grad_norm": 0.0656030923128128, | |
| "learning_rate": 4.127137546468402e-06, | |
| "loss": 0.0024, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 18.118279569892472, | |
| "grad_norm": 0.05267421901226044, | |
| "learning_rate": 4.108550185873607e-06, | |
| "loss": 0.0033, | |
| "step": 8425 | |
| }, | |
| { | |
| "epoch": 18.172043010752688, | |
| "grad_norm": 0.704990565776825, | |
| "learning_rate": 4.089962825278811e-06, | |
| "loss": 0.0047, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 18.225806451612904, | |
| "grad_norm": 0.24530240893363953, | |
| "learning_rate": 4.071375464684015e-06, | |
| "loss": 0.0043, | |
| "step": 8475 | |
| }, | |
| { | |
| "epoch": 18.27956989247312, | |
| "grad_norm": 0.19997531175613403, | |
| "learning_rate": 4.05278810408922e-06, | |
| "loss": 0.0035, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 18.27956989247312, | |
| "eval_loss": 0.3821885585784912, | |
| "eval_runtime": 203.3448, | |
| "eval_samples_per_second": 4.652, | |
| "eval_steps_per_second": 0.585, | |
| "eval_wer": 14.412498844411575, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 18.333333333333332, | |
| "grad_norm": 0.0332474559545517, | |
| "learning_rate": 4.034200743494424e-06, | |
| "loss": 0.0031, | |
| "step": 8525 | |
| }, | |
| { | |
| "epoch": 18.387096774193548, | |
| "grad_norm": 1.3561875820159912, | |
| "learning_rate": 4.015613382899628e-06, | |
| "loss": 0.0017, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 18.440860215053764, | |
| "grad_norm": 0.029482562094926834, | |
| "learning_rate": 3.997026022304833e-06, | |
| "loss": 0.0021, | |
| "step": 8575 | |
| }, | |
| { | |
| "epoch": 18.49462365591398, | |
| "grad_norm": 0.12231668084859848, | |
| "learning_rate": 3.9784386617100375e-06, | |
| "loss": 0.0043, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 18.548387096774192, | |
| "grad_norm": 0.044476673007011414, | |
| "learning_rate": 3.959851301115242e-06, | |
| "loss": 0.0024, | |
| "step": 8625 | |
| }, | |
| { | |
| "epoch": 18.602150537634408, | |
| "grad_norm": 0.6735191345214844, | |
| "learning_rate": 3.941263940520447e-06, | |
| "loss": 0.0032, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 18.655913978494624, | |
| "grad_norm": 1.0479316711425781, | |
| "learning_rate": 3.922676579925651e-06, | |
| "loss": 0.0024, | |
| "step": 8675 | |
| }, | |
| { | |
| "epoch": 18.70967741935484, | |
| "grad_norm": 0.023525085300207138, | |
| "learning_rate": 3.904089219330856e-06, | |
| "loss": 0.0049, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 18.763440860215052, | |
| "grad_norm": 0.165565624833107, | |
| "learning_rate": 3.88550185873606e-06, | |
| "loss": 0.0039, | |
| "step": 8725 | |
| }, | |
| { | |
| "epoch": 18.817204301075268, | |
| "grad_norm": 0.5960690379142761, | |
| "learning_rate": 3.866914498141264e-06, | |
| "loss": 0.003, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 18.870967741935484, | |
| "grad_norm": 0.23799718916416168, | |
| "learning_rate": 3.848327137546469e-06, | |
| "loss": 0.002, | |
| "step": 8775 | |
| }, | |
| { | |
| "epoch": 18.9247311827957, | |
| "grad_norm": 0.01600775308907032, | |
| "learning_rate": 3.829739776951673e-06, | |
| "loss": 0.0022, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 18.978494623655912, | |
| "grad_norm": 0.3210331201553345, | |
| "learning_rate": 3.8111524163568776e-06, | |
| "loss": 0.0033, | |
| "step": 8825 | |
| }, | |
| { | |
| "epoch": 19.032258064516128, | |
| "grad_norm": 0.05005327984690666, | |
| "learning_rate": 3.7925650557620818e-06, | |
| "loss": 0.0033, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 19.086021505376344, | |
| "grad_norm": 0.4820277690887451, | |
| "learning_rate": 3.7739776951672863e-06, | |
| "loss": 0.0034, | |
| "step": 8875 | |
| }, | |
| { | |
| "epoch": 19.13978494623656, | |
| "grad_norm": 0.1907467395067215, | |
| "learning_rate": 3.7553903345724913e-06, | |
| "loss": 0.0025, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 19.193548387096776, | |
| "grad_norm": 0.023403950035572052, | |
| "learning_rate": 3.7368029739776954e-06, | |
| "loss": 0.0024, | |
| "step": 8925 | |
| }, | |
| { | |
| "epoch": 19.247311827956988, | |
| "grad_norm": 0.02337467670440674, | |
| "learning_rate": 3.7182156133829e-06, | |
| "loss": 0.0038, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 19.301075268817204, | |
| "grad_norm": 0.42413467168807983, | |
| "learning_rate": 3.6996282527881046e-06, | |
| "loss": 0.0048, | |
| "step": 8975 | |
| }, | |
| { | |
| "epoch": 19.35483870967742, | |
| "grad_norm": 0.0469290092587471, | |
| "learning_rate": 3.6810408921933087e-06, | |
| "loss": 0.0034, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 19.35483870967742, | |
| "eval_loss": 0.38839593529701233, | |
| "eval_runtime": 202.9911, | |
| "eval_samples_per_second": 4.66, | |
| "eval_steps_per_second": 0.586, | |
| "eval_wer": 14.62512711472682, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 19.408602150537636, | |
| "grad_norm": 0.2083800882101059, | |
| "learning_rate": 3.6624535315985132e-06, | |
| "loss": 0.0029, | |
| "step": 9025 | |
| }, | |
| { | |
| "epoch": 19.462365591397848, | |
| "grad_norm": 0.16142559051513672, | |
| "learning_rate": 3.643866171003718e-06, | |
| "loss": 0.0028, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 19.516129032258064, | |
| "grad_norm": 0.02445228025317192, | |
| "learning_rate": 3.625278810408922e-06, | |
| "loss": 0.0031, | |
| "step": 9075 | |
| }, | |
| { | |
| "epoch": 19.56989247311828, | |
| "grad_norm": 0.10709693282842636, | |
| "learning_rate": 3.6066914498141265e-06, | |
| "loss": 0.0028, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 19.623655913978496, | |
| "grad_norm": 0.48716649413108826, | |
| "learning_rate": 3.5881040892193315e-06, | |
| "loss": 0.0024, | |
| "step": 9125 | |
| }, | |
| { | |
| "epoch": 19.677419354838708, | |
| "grad_norm": 0.043807078152894974, | |
| "learning_rate": 3.5695167286245356e-06, | |
| "loss": 0.0026, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 19.731182795698924, | |
| "grad_norm": 0.008909267373383045, | |
| "learning_rate": 3.55092936802974e-06, | |
| "loss": 0.0027, | |
| "step": 9175 | |
| }, | |
| { | |
| "epoch": 19.78494623655914, | |
| "grad_norm": 0.0496838316321373, | |
| "learning_rate": 3.5323420074349447e-06, | |
| "loss": 0.0028, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 19.838709677419356, | |
| "grad_norm": 0.3045809864997864, | |
| "learning_rate": 3.513754646840149e-06, | |
| "loss": 0.0043, | |
| "step": 9225 | |
| }, | |
| { | |
| "epoch": 19.892473118279568, | |
| "grad_norm": 0.019404035061597824, | |
| "learning_rate": 3.4951672862453534e-06, | |
| "loss": 0.0022, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 19.946236559139784, | |
| "grad_norm": 0.05067993700504303, | |
| "learning_rate": 3.476579925650558e-06, | |
| "loss": 0.0027, | |
| "step": 9275 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.17729219794273376, | |
| "learning_rate": 3.457992565055762e-06, | |
| "loss": 0.0021, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 20.053763440860216, | |
| "grad_norm": 0.166994109749794, | |
| "learning_rate": 3.4394052044609666e-06, | |
| "loss": 0.0025, | |
| "step": 9325 | |
| }, | |
| { | |
| "epoch": 20.107526881720432, | |
| "grad_norm": 0.026689428836107254, | |
| "learning_rate": 3.4208178438661716e-06, | |
| "loss": 0.0035, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 20.161290322580644, | |
| "grad_norm": 0.016895387321710587, | |
| "learning_rate": 3.4022304832713757e-06, | |
| "loss": 0.0026, | |
| "step": 9375 | |
| }, | |
| { | |
| "epoch": 20.21505376344086, | |
| "grad_norm": 0.06793255358934402, | |
| "learning_rate": 3.3836431226765803e-06, | |
| "loss": 0.0015, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 20.268817204301076, | |
| "grad_norm": 0.034562163054943085, | |
| "learning_rate": 3.365055762081785e-06, | |
| "loss": 0.0027, | |
| "step": 9425 | |
| }, | |
| { | |
| "epoch": 20.322580645161292, | |
| "grad_norm": 0.16164565086364746, | |
| "learning_rate": 3.346468401486989e-06, | |
| "loss": 0.0023, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 20.376344086021504, | |
| "grad_norm": 0.015665782615542412, | |
| "learning_rate": 3.3278810408921935e-06, | |
| "loss": 0.002, | |
| "step": 9475 | |
| }, | |
| { | |
| "epoch": 20.43010752688172, | |
| "grad_norm": 0.009676897898316383, | |
| "learning_rate": 3.309293680297398e-06, | |
| "loss": 0.0027, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 20.43010752688172, | |
| "eval_loss": 0.39532387256622314, | |
| "eval_runtime": 202.6591, | |
| "eval_samples_per_second": 4.668, | |
| "eval_steps_per_second": 0.587, | |
| "eval_wer": 14.449477674031616, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 20.483870967741936, | |
| "grad_norm": 0.16924279928207397, | |
| "learning_rate": 3.2907063197026022e-06, | |
| "loss": 0.0022, | |
| "step": 9525 | |
| }, | |
| { | |
| "epoch": 20.537634408602152, | |
| "grad_norm": 0.3499106168746948, | |
| "learning_rate": 3.272118959107807e-06, | |
| "loss": 0.0017, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 20.591397849462364, | |
| "grad_norm": 0.5156524181365967, | |
| "learning_rate": 3.2535315985130113e-06, | |
| "loss": 0.0027, | |
| "step": 9575 | |
| }, | |
| { | |
| "epoch": 20.64516129032258, | |
| "grad_norm": 0.12964314222335815, | |
| "learning_rate": 3.234944237918216e-06, | |
| "loss": 0.0029, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 20.698924731182796, | |
| "grad_norm": 0.4109344482421875, | |
| "learning_rate": 3.2163568773234205e-06, | |
| "loss": 0.002, | |
| "step": 9625 | |
| }, | |
| { | |
| "epoch": 20.752688172043012, | |
| "grad_norm": 0.17528752982616425, | |
| "learning_rate": 3.197769516728625e-06, | |
| "loss": 0.0025, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 20.806451612903224, | |
| "grad_norm": 0.27459415793418884, | |
| "learning_rate": 3.179182156133829e-06, | |
| "loss": 0.002, | |
| "step": 9675 | |
| }, | |
| { | |
| "epoch": 20.86021505376344, | |
| "grad_norm": 0.7621147036552429, | |
| "learning_rate": 3.1605947955390337e-06, | |
| "loss": 0.0019, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 20.913978494623656, | |
| "grad_norm": 0.03115130215883255, | |
| "learning_rate": 3.1420074349442383e-06, | |
| "loss": 0.0021, | |
| "step": 9725 | |
| }, | |
| { | |
| "epoch": 20.967741935483872, | |
| "grad_norm": 0.3843834698200226, | |
| "learning_rate": 3.1234200743494424e-06, | |
| "loss": 0.0037, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 21.021505376344088, | |
| "grad_norm": 0.11314116418361664, | |
| "learning_rate": 3.104832713754647e-06, | |
| "loss": 0.0019, | |
| "step": 9775 | |
| }, | |
| { | |
| "epoch": 21.0752688172043, | |
| "grad_norm": 0.008430559188127518, | |
| "learning_rate": 3.0862453531598515e-06, | |
| "loss": 0.0016, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 21.129032258064516, | |
| "grad_norm": 0.014893501996994019, | |
| "learning_rate": 3.067657992565056e-06, | |
| "loss": 0.0012, | |
| "step": 9825 | |
| }, | |
| { | |
| "epoch": 21.182795698924732, | |
| "grad_norm": 0.020304594188928604, | |
| "learning_rate": 3.0490706319702606e-06, | |
| "loss": 0.0014, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 21.236559139784948, | |
| "grad_norm": 0.12655578553676605, | |
| "learning_rate": 3.030483271375465e-06, | |
| "loss": 0.0026, | |
| "step": 9875 | |
| }, | |
| { | |
| "epoch": 21.29032258064516, | |
| "grad_norm": 0.014000285416841507, | |
| "learning_rate": 3.0118959107806693e-06, | |
| "loss": 0.0026, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 21.344086021505376, | |
| "grad_norm": 0.026862381026148796, | |
| "learning_rate": 2.993308550185874e-06, | |
| "loss": 0.0024, | |
| "step": 9925 | |
| }, | |
| { | |
| "epoch": 21.397849462365592, | |
| "grad_norm": 0.03989304229617119, | |
| "learning_rate": 2.9747211895910784e-06, | |
| "loss": 0.0018, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 21.451612903225808, | |
| "grad_norm": 0.019757866859436035, | |
| "learning_rate": 2.9561338289962825e-06, | |
| "loss": 0.0031, | |
| "step": 9975 | |
| }, | |
| { | |
| "epoch": 21.50537634408602, | |
| "grad_norm": 0.02383114956319332, | |
| "learning_rate": 2.937546468401487e-06, | |
| "loss": 0.0022, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 21.50537634408602, | |
| "eval_loss": 0.40046602487564087, | |
| "eval_runtime": 202.0733, | |
| "eval_samples_per_second": 4.681, | |
| "eval_steps_per_second": 0.589, | |
| "eval_wer": 14.449477674031616, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 21.559139784946236, | |
| "grad_norm": 0.019417457282543182, | |
| "learning_rate": 2.9189591078066916e-06, | |
| "loss": 0.0027, | |
| "step": 10025 | |
| }, | |
| { | |
| "epoch": 21.612903225806452, | |
| "grad_norm": 0.15449251234531403, | |
| "learning_rate": 2.900371747211896e-06, | |
| "loss": 0.0017, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 21.666666666666668, | |
| "grad_norm": 0.16010086238384247, | |
| "learning_rate": 2.8817843866171008e-06, | |
| "loss": 0.0018, | |
| "step": 10075 | |
| }, | |
| { | |
| "epoch": 21.72043010752688, | |
| "grad_norm": 0.02312368154525757, | |
| "learning_rate": 2.8631970260223053e-06, | |
| "loss": 0.0036, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 21.774193548387096, | |
| "grad_norm": 0.040190454572439194, | |
| "learning_rate": 2.8446096654275094e-06, | |
| "loss": 0.0024, | |
| "step": 10125 | |
| }, | |
| { | |
| "epoch": 21.827956989247312, | |
| "grad_norm": 0.030338788405060768, | |
| "learning_rate": 2.826022304832714e-06, | |
| "loss": 0.0024, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 21.881720430107528, | |
| "grad_norm": 0.18002262711524963, | |
| "learning_rate": 2.8074349442379186e-06, | |
| "loss": 0.0029, | |
| "step": 10175 | |
| }, | |
| { | |
| "epoch": 21.93548387096774, | |
| "grad_norm": 0.047431185841560364, | |
| "learning_rate": 2.7888475836431227e-06, | |
| "loss": 0.0013, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 21.989247311827956, | |
| "grad_norm": 0.12101946771144867, | |
| "learning_rate": 2.7702602230483272e-06, | |
| "loss": 0.003, | |
| "step": 10225 | |
| }, | |
| { | |
| "epoch": 22.043010752688172, | |
| "grad_norm": 0.01733570732176304, | |
| "learning_rate": 2.751672862453532e-06, | |
| "loss": 0.0013, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 22.096774193548388, | |
| "grad_norm": 0.02413998357951641, | |
| "learning_rate": 2.7330855018587364e-06, | |
| "loss": 0.0024, | |
| "step": 10275 | |
| }, | |
| { | |
| "epoch": 22.150537634408604, | |
| "grad_norm": 0.006610923912376165, | |
| "learning_rate": 2.714498141263941e-06, | |
| "loss": 0.0015, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 22.204301075268816, | |
| "grad_norm": 0.11478333920240402, | |
| "learning_rate": 2.6959107806691455e-06, | |
| "loss": 0.0014, | |
| "step": 10325 | |
| }, | |
| { | |
| "epoch": 22.258064516129032, | |
| "grad_norm": 0.8776764869689941, | |
| "learning_rate": 2.6773234200743496e-06, | |
| "loss": 0.0019, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 22.311827956989248, | |
| "grad_norm": 0.020020902156829834, | |
| "learning_rate": 2.658736059479554e-06, | |
| "loss": 0.0022, | |
| "step": 10375 | |
| }, | |
| { | |
| "epoch": 22.365591397849464, | |
| "grad_norm": 0.019508883357048035, | |
| "learning_rate": 2.6401486988847587e-06, | |
| "loss": 0.0017, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 22.419354838709676, | |
| "grad_norm": 0.02609153278172016, | |
| "learning_rate": 2.621561338289963e-06, | |
| "loss": 0.0013, | |
| "step": 10425 | |
| }, | |
| { | |
| "epoch": 22.473118279569892, | |
| "grad_norm": 0.027138570323586464, | |
| "learning_rate": 2.6029739776951674e-06, | |
| "loss": 0.0019, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 22.526881720430108, | |
| "grad_norm": 0.01063444558531046, | |
| "learning_rate": 2.584386617100372e-06, | |
| "loss": 0.0026, | |
| "step": 10475 | |
| }, | |
| { | |
| "epoch": 22.580645161290324, | |
| "grad_norm": 0.24903129041194916, | |
| "learning_rate": 2.565799256505576e-06, | |
| "loss": 0.0027, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 22.580645161290324, | |
| "eval_loss": 0.40335774421691895, | |
| "eval_runtime": 202.0679, | |
| "eval_samples_per_second": 4.682, | |
| "eval_steps_per_second": 0.589, | |
| "eval_wer": 13.93177405935102, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 22.634408602150536, | |
| "grad_norm": 0.1529041826725006, | |
| "learning_rate": 2.547211895910781e-06, | |
| "loss": 0.0018, | |
| "step": 10525 | |
| }, | |
| { | |
| "epoch": 22.688172043010752, | |
| "grad_norm": 0.02129989117383957, | |
| "learning_rate": 2.5286245353159856e-06, | |
| "loss": 0.0013, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 22.741935483870968, | |
| "grad_norm": 0.013442150317132473, | |
| "learning_rate": 2.5100371747211898e-06, | |
| "loss": 0.0028, | |
| "step": 10575 | |
| }, | |
| { | |
| "epoch": 22.795698924731184, | |
| "grad_norm": 0.024951398372650146, | |
| "learning_rate": 2.4914498141263943e-06, | |
| "loss": 0.0022, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 22.849462365591396, | |
| "grad_norm": 0.3933217525482178, | |
| "learning_rate": 2.472862453531599e-06, | |
| "loss": 0.0033, | |
| "step": 10625 | |
| }, | |
| { | |
| "epoch": 22.903225806451612, | |
| "grad_norm": 0.030309738591313362, | |
| "learning_rate": 2.454275092936803e-06, | |
| "loss": 0.0012, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 22.956989247311828, | |
| "grad_norm": 0.1965196579694748, | |
| "learning_rate": 2.4356877323420076e-06, | |
| "loss": 0.0019, | |
| "step": 10675 | |
| }, | |
| { | |
| "epoch": 23.010752688172044, | |
| "grad_norm": 0.2897844612598419, | |
| "learning_rate": 2.417100371747212e-06, | |
| "loss": 0.0017, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 23.06451612903226, | |
| "grad_norm": 0.010055635124444962, | |
| "learning_rate": 2.3985130111524167e-06, | |
| "loss": 0.0017, | |
| "step": 10725 | |
| }, | |
| { | |
| "epoch": 23.118279569892472, | |
| "grad_norm": 0.02669104002416134, | |
| "learning_rate": 2.379925650557621e-06, | |
| "loss": 0.0005, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 23.172043010752688, | |
| "grad_norm": 0.2305319756269455, | |
| "learning_rate": 2.3613382899628253e-06, | |
| "loss": 0.0015, | |
| "step": 10775 | |
| }, | |
| { | |
| "epoch": 23.225806451612904, | |
| "grad_norm": 0.009956962428987026, | |
| "learning_rate": 2.34275092936803e-06, | |
| "loss": 0.0024, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 23.27956989247312, | |
| "grad_norm": 0.1403415948152542, | |
| "learning_rate": 2.3241635687732345e-06, | |
| "loss": 0.0014, | |
| "step": 10825 | |
| }, | |
| { | |
| "epoch": 23.333333333333332, | |
| "grad_norm": 0.21458983421325684, | |
| "learning_rate": 2.305576208178439e-06, | |
| "loss": 0.0017, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 23.387096774193548, | |
| "grad_norm": 0.008475505746901035, | |
| "learning_rate": 2.286988847583643e-06, | |
| "loss": 0.001, | |
| "step": 10875 | |
| }, | |
| { | |
| "epoch": 23.440860215053764, | |
| "grad_norm": 0.02105923928320408, | |
| "learning_rate": 2.2684014869888477e-06, | |
| "loss": 0.0013, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 23.49462365591398, | |
| "grad_norm": 0.007669220678508282, | |
| "learning_rate": 2.2498141263940523e-06, | |
| "loss": 0.0016, | |
| "step": 10925 | |
| }, | |
| { | |
| "epoch": 23.548387096774192, | |
| "grad_norm": 0.007480244617909193, | |
| "learning_rate": 2.231226765799257e-06, | |
| "loss": 0.0013, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 23.602150537634408, | |
| "grad_norm": 0.00940194632858038, | |
| "learning_rate": 2.212639405204461e-06, | |
| "loss": 0.0031, | |
| "step": 10975 | |
| }, | |
| { | |
| "epoch": 23.655913978494624, | |
| "grad_norm": 0.013057105243206024, | |
| "learning_rate": 2.1940520446096655e-06, | |
| "loss": 0.0012, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 23.655913978494624, | |
| "eval_loss": 0.40596359968185425, | |
| "eval_runtime": 202.5237, | |
| "eval_samples_per_second": 4.671, | |
| "eval_steps_per_second": 0.588, | |
| "eval_wer": 13.941018766756033, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 23.70967741935484, | |
| "grad_norm": 0.0065338280983269215, | |
| "learning_rate": 2.17546468401487e-06, | |
| "loss": 0.0015, | |
| "step": 11025 | |
| }, | |
| { | |
| "epoch": 23.763440860215052, | |
| "grad_norm": 0.013204723596572876, | |
| "learning_rate": 2.1568773234200746e-06, | |
| "loss": 0.003, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 23.817204301075268, | |
| "grad_norm": 0.027307022362947464, | |
| "learning_rate": 2.138289962825279e-06, | |
| "loss": 0.0027, | |
| "step": 11075 | |
| }, | |
| { | |
| "epoch": 23.870967741935484, | |
| "grad_norm": 0.014446156099438667, | |
| "learning_rate": 2.1197026022304833e-06, | |
| "loss": 0.0014, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 23.9247311827957, | |
| "grad_norm": 0.014391875825822353, | |
| "learning_rate": 2.101115241635688e-06, | |
| "loss": 0.0031, | |
| "step": 11125 | |
| }, | |
| { | |
| "epoch": 23.978494623655912, | |
| "grad_norm": 0.0412728525698185, | |
| "learning_rate": 2.0825278810408924e-06, | |
| "loss": 0.0022, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 24.032258064516128, | |
| "grad_norm": 0.009646103717386723, | |
| "learning_rate": 2.063940520446097e-06, | |
| "loss": 0.0015, | |
| "step": 11175 | |
| }, | |
| { | |
| "epoch": 24.086021505376344, | |
| "grad_norm": 0.007044603582471609, | |
| "learning_rate": 2.045353159851301e-06, | |
| "loss": 0.0012, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 24.13978494623656, | |
| "grad_norm": 0.013142594136297703, | |
| "learning_rate": 2.0267657992565057e-06, | |
| "loss": 0.0013, | |
| "step": 11225 | |
| }, | |
| { | |
| "epoch": 24.193548387096776, | |
| "grad_norm": 0.006704692263156176, | |
| "learning_rate": 2.00817843866171e-06, | |
| "loss": 0.0015, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 24.247311827956988, | |
| "grad_norm": 0.005626600701361895, | |
| "learning_rate": 1.9895910780669148e-06, | |
| "loss": 0.0012, | |
| "step": 11275 | |
| }, | |
| { | |
| "epoch": 24.301075268817204, | |
| "grad_norm": 0.008840459398925304, | |
| "learning_rate": 1.9710037174721193e-06, | |
| "loss": 0.0019, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 24.35483870967742, | |
| "grad_norm": 0.005357383284717798, | |
| "learning_rate": 1.9524163568773235e-06, | |
| "loss": 0.0011, | |
| "step": 11325 | |
| }, | |
| { | |
| "epoch": 24.408602150537636, | |
| "grad_norm": 0.005820517428219318, | |
| "learning_rate": 1.933828996282528e-06, | |
| "loss": 0.0014, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 24.462365591397848, | |
| "grad_norm": 0.12261584401130676, | |
| "learning_rate": 1.9152416356877326e-06, | |
| "loss": 0.0009, | |
| "step": 11375 | |
| }, | |
| { | |
| "epoch": 24.516129032258064, | |
| "grad_norm": 0.16665996611118317, | |
| "learning_rate": 1.8966542750929371e-06, | |
| "loss": 0.0032, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 24.56989247311828, | |
| "grad_norm": 0.006091310176998377, | |
| "learning_rate": 1.8780669144981415e-06, | |
| "loss": 0.0016, | |
| "step": 11425 | |
| }, | |
| { | |
| "epoch": 24.623655913978496, | |
| "grad_norm": 0.027028294280171394, | |
| "learning_rate": 1.8594795539033458e-06, | |
| "loss": 0.0013, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 24.677419354838708, | |
| "grad_norm": 0.107554592192173, | |
| "learning_rate": 1.8408921933085502e-06, | |
| "loss": 0.0014, | |
| "step": 11475 | |
| }, | |
| { | |
| "epoch": 24.731182795698924, | |
| "grad_norm": 0.006071150302886963, | |
| "learning_rate": 1.822304832713755e-06, | |
| "loss": 0.0008, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 24.731182795698924, | |
| "eval_loss": 0.4129054546356201, | |
| "eval_runtime": 202.4824, | |
| "eval_samples_per_second": 4.672, | |
| "eval_steps_per_second": 0.588, | |
| "eval_wer": 13.848571692705928, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 24.78494623655914, | |
| "grad_norm": 0.005664244759827852, | |
| "learning_rate": 1.8037174721189593e-06, | |
| "loss": 0.0019, | |
| "step": 11525 | |
| }, | |
| { | |
| "epoch": 24.838709677419356, | |
| "grad_norm": 0.007854313589632511, | |
| "learning_rate": 1.7851301115241638e-06, | |
| "loss": 0.0019, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 24.892473118279568, | |
| "grad_norm": 0.1173175498843193, | |
| "learning_rate": 1.7665427509293682e-06, | |
| "loss": 0.002, | |
| "step": 11575 | |
| }, | |
| { | |
| "epoch": 24.946236559139784, | |
| "grad_norm": 0.005052879452705383, | |
| "learning_rate": 1.7479553903345725e-06, | |
| "loss": 0.0021, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 0.011530703864991665, | |
| "learning_rate": 1.7293680297397773e-06, | |
| "loss": 0.0017, | |
| "step": 11625 | |
| }, | |
| { | |
| "epoch": 25.053763440860216, | |
| "grad_norm": 0.07344318926334381, | |
| "learning_rate": 1.7107806691449816e-06, | |
| "loss": 0.002, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 25.107526881720432, | |
| "grad_norm": 0.12406457215547562, | |
| "learning_rate": 1.692193308550186e-06, | |
| "loss": 0.0017, | |
| "step": 11675 | |
| }, | |
| { | |
| "epoch": 25.161290322580644, | |
| "grad_norm": 0.005189701449126005, | |
| "learning_rate": 1.6736059479553903e-06, | |
| "loss": 0.0016, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 25.21505376344086, | |
| "grad_norm": 0.3264636993408203, | |
| "learning_rate": 1.655018587360595e-06, | |
| "loss": 0.0021, | |
| "step": 11725 | |
| }, | |
| { | |
| "epoch": 25.268817204301076, | |
| "grad_norm": 0.004206045996397734, | |
| "learning_rate": 1.6364312267657994e-06, | |
| "loss": 0.0014, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 25.322580645161292, | |
| "grad_norm": 0.12464595586061478, | |
| "learning_rate": 1.6178438661710038e-06, | |
| "loss": 0.0019, | |
| "step": 11775 | |
| }, | |
| { | |
| "epoch": 25.376344086021504, | |
| "grad_norm": 0.0052951849065721035, | |
| "learning_rate": 1.5992565055762083e-06, | |
| "loss": 0.001, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 25.43010752688172, | |
| "grad_norm": 0.20631186664104462, | |
| "learning_rate": 1.5806691449814127e-06, | |
| "loss": 0.0013, | |
| "step": 11825 | |
| }, | |
| { | |
| "epoch": 25.483870967741936, | |
| "grad_norm": 0.005665977951139212, | |
| "learning_rate": 1.5620817843866174e-06, | |
| "loss": 0.0023, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 25.537634408602152, | |
| "grad_norm": 0.18204852938652039, | |
| "learning_rate": 1.5434944237918218e-06, | |
| "loss": 0.0017, | |
| "step": 11875 | |
| }, | |
| { | |
| "epoch": 25.591397849462364, | |
| "grad_norm": 0.004289372358471155, | |
| "learning_rate": 1.5249070631970261e-06, | |
| "loss": 0.0005, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 25.64516129032258, | |
| "grad_norm": 0.06818500906229019, | |
| "learning_rate": 1.5063197026022305e-06, | |
| "loss": 0.0005, | |
| "step": 11925 | |
| }, | |
| { | |
| "epoch": 25.698924731182796, | |
| "grad_norm": 0.14878062903881073, | |
| "learning_rate": 1.487732342007435e-06, | |
| "loss": 0.0016, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 25.752688172043012, | |
| "grad_norm": 0.003890681779012084, | |
| "learning_rate": 1.4691449814126396e-06, | |
| "loss": 0.0016, | |
| "step": 11975 | |
| }, | |
| { | |
| "epoch": 25.806451612903224, | |
| "grad_norm": 0.004652164876461029, | |
| "learning_rate": 1.450557620817844e-06, | |
| "loss": 0.001, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 25.806451612903224, | |
| "eval_loss": 0.4189203381538391, | |
| "eval_runtime": 202.0119, | |
| "eval_samples_per_second": 4.683, | |
| "eval_steps_per_second": 0.589, | |
| "eval_wer": 13.830082277895904, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 25.86021505376344, | |
| "grad_norm": 0.16533516347408295, | |
| "learning_rate": 1.4319702602230485e-06, | |
| "loss": 0.0011, | |
| "step": 12025 | |
| }, | |
| { | |
| "epoch": 25.913978494623656, | |
| "grad_norm": 0.0048462748527526855, | |
| "learning_rate": 1.4133828996282528e-06, | |
| "loss": 0.001, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 25.967741935483872, | |
| "grad_norm": 0.0046990737318992615, | |
| "learning_rate": 1.3947955390334576e-06, | |
| "loss": 0.0017, | |
| "step": 12075 | |
| }, | |
| { | |
| "epoch": 26.021505376344088, | |
| "grad_norm": 0.003037052694708109, | |
| "learning_rate": 1.376208178438662e-06, | |
| "loss": 0.0024, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 26.0752688172043, | |
| "grad_norm": 0.08328765630722046, | |
| "learning_rate": 1.3576208178438663e-06, | |
| "loss": 0.0011, | |
| "step": 12125 | |
| }, | |
| { | |
| "epoch": 26.129032258064516, | |
| "grad_norm": 0.002522684633731842, | |
| "learning_rate": 1.3390334572490706e-06, | |
| "loss": 0.0022, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 26.182795698924732, | |
| "grad_norm": 0.00458819093182683, | |
| "learning_rate": 1.3204460966542752e-06, | |
| "loss": 0.0018, | |
| "step": 12175 | |
| }, | |
| { | |
| "epoch": 26.236559139784948, | |
| "grad_norm": 0.004143861588090658, | |
| "learning_rate": 1.3018587360594797e-06, | |
| "loss": 0.0008, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 26.29032258064516, | |
| "grad_norm": 0.006522686220705509, | |
| "learning_rate": 1.283271375464684e-06, | |
| "loss": 0.0014, | |
| "step": 12225 | |
| }, | |
| { | |
| "epoch": 26.344086021505376, | |
| "grad_norm": 0.0033553235698491335, | |
| "learning_rate": 1.2646840148698886e-06, | |
| "loss": 0.0018, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 26.397849462365592, | |
| "grad_norm": 0.004214679356664419, | |
| "learning_rate": 1.2460966542750932e-06, | |
| "loss": 0.0014, | |
| "step": 12275 | |
| }, | |
| { | |
| "epoch": 26.451612903225808, | |
| "grad_norm": 0.23780201375484467, | |
| "learning_rate": 1.2275092936802975e-06, | |
| "loss": 0.0021, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 26.50537634408602, | |
| "grad_norm": 0.003071879968047142, | |
| "learning_rate": 1.2089219330855019e-06, | |
| "loss": 0.0021, | |
| "step": 12325 | |
| }, | |
| { | |
| "epoch": 26.559139784946236, | |
| "grad_norm": 0.003364423755556345, | |
| "learning_rate": 1.1903345724907064e-06, | |
| "loss": 0.0015, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 26.612903225806452, | |
| "grad_norm": 0.30511873960494995, | |
| "learning_rate": 1.1717472118959108e-06, | |
| "loss": 0.0018, | |
| "step": 12375 | |
| }, | |
| { | |
| "epoch": 26.666666666666668, | |
| "grad_norm": 0.003765388624742627, | |
| "learning_rate": 1.1531598513011153e-06, | |
| "loss": 0.0026, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 26.72043010752688, | |
| "grad_norm": 0.13415110111236572, | |
| "learning_rate": 1.1345724907063199e-06, | |
| "loss": 0.0012, | |
| "step": 12425 | |
| }, | |
| { | |
| "epoch": 26.774193548387096, | |
| "grad_norm": 0.0052949776872992516, | |
| "learning_rate": 1.1159851301115242e-06, | |
| "loss": 0.0006, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 26.827956989247312, | |
| "grad_norm": 0.0027304012328386307, | |
| "learning_rate": 1.0973977695167288e-06, | |
| "loss": 0.001, | |
| "step": 12475 | |
| }, | |
| { | |
| "epoch": 26.881720430107528, | |
| "grad_norm": 0.004548298195004463, | |
| "learning_rate": 1.0788104089219331e-06, | |
| "loss": 0.0008, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 26.881720430107528, | |
| "eval_loss": 0.4191061854362488, | |
| "eval_runtime": 207.3533, | |
| "eval_samples_per_second": 4.562, | |
| "eval_steps_per_second": 0.574, | |
| "eval_wer": 13.959508181566052, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 26.93548387096774, | |
| "grad_norm": 0.0037072377745062113, | |
| "learning_rate": 1.0602230483271377e-06, | |
| "loss": 0.002, | |
| "step": 12525 | |
| }, | |
| { | |
| "epoch": 26.989247311827956, | |
| "grad_norm": 0.004865365568548441, | |
| "learning_rate": 1.041635687732342e-06, | |
| "loss": 0.0012, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 27.043010752688172, | |
| "grad_norm": 0.16591113805770874, | |
| "learning_rate": 1.0230483271375466e-06, | |
| "loss": 0.0008, | |
| "step": 12575 | |
| }, | |
| { | |
| "epoch": 27.096774193548388, | |
| "grad_norm": 0.003480426501482725, | |
| "learning_rate": 1.004460966542751e-06, | |
| "loss": 0.002, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 27.150537634408604, | |
| "grad_norm": 0.003888448467478156, | |
| "learning_rate": 9.858736059479555e-07, | |
| "loss": 0.001, | |
| "step": 12625 | |
| }, | |
| { | |
| "epoch": 27.204301075268816, | |
| "grad_norm": 0.004046307876706123, | |
| "learning_rate": 9.6728624535316e-07, | |
| "loss": 0.0022, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 27.258064516129032, | |
| "grad_norm": 0.004325231071561575, | |
| "learning_rate": 9.486988847583644e-07, | |
| "loss": 0.0024, | |
| "step": 12675 | |
| }, | |
| { | |
| "epoch": 27.311827956989248, | |
| "grad_norm": 0.1196964755654335, | |
| "learning_rate": 9.301115241635688e-07, | |
| "loss": 0.001, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 27.365591397849464, | |
| "grad_norm": 0.003892822889611125, | |
| "learning_rate": 9.115241635687733e-07, | |
| "loss": 0.002, | |
| "step": 12725 | |
| }, | |
| { | |
| "epoch": 27.419354838709676, | |
| "grad_norm": 0.0024752148892730474, | |
| "learning_rate": 8.929368029739778e-07, | |
| "loss": 0.0007, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 27.473118279569892, | |
| "grad_norm": 0.00464650196954608, | |
| "learning_rate": 8.743494423791822e-07, | |
| "loss": 0.0019, | |
| "step": 12775 | |
| }, | |
| { | |
| "epoch": 27.526881720430108, | |
| "grad_norm": 0.2570537328720093, | |
| "learning_rate": 8.557620817843867e-07, | |
| "loss": 0.0022, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 27.580645161290324, | |
| "grad_norm": 0.003213089657947421, | |
| "learning_rate": 8.371747211895912e-07, | |
| "loss": 0.0008, | |
| "step": 12825 | |
| }, | |
| { | |
| "epoch": 27.634408602150536, | |
| "grad_norm": 0.0038951928727328777, | |
| "learning_rate": 8.185873605947955e-07, | |
| "loss": 0.0013, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 27.688172043010752, | |
| "grad_norm": 0.0030759673099964857, | |
| "learning_rate": 8.000000000000001e-07, | |
| "loss": 0.0009, | |
| "step": 12875 | |
| }, | |
| { | |
| "epoch": 27.741935483870968, | |
| "grad_norm": 0.0037837938871234655, | |
| "learning_rate": 7.814126394052045e-07, | |
| "loss": 0.0022, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 27.795698924731184, | |
| "grad_norm": 0.0026918076910078526, | |
| "learning_rate": 7.62825278810409e-07, | |
| "loss": 0.0016, | |
| "step": 12925 | |
| }, | |
| { | |
| "epoch": 27.849462365591396, | |
| "grad_norm": 0.0030537450220435858, | |
| "learning_rate": 7.442379182156134e-07, | |
| "loss": 0.0008, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 27.903225806451612, | |
| "grad_norm": 0.11770904064178467, | |
| "learning_rate": 7.25650557620818e-07, | |
| "loss": 0.0014, | |
| "step": 12975 | |
| }, | |
| { | |
| "epoch": 27.956989247311828, | |
| "grad_norm": 0.0030784786213189363, | |
| "learning_rate": 7.070631970260223e-07, | |
| "loss": 0.0018, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 27.956989247311828, | |
| "eval_loss": 0.421833336353302, | |
| "eval_runtime": 202.7221, | |
| "eval_samples_per_second": 4.666, | |
| "eval_steps_per_second": 0.587, | |
| "eval_wer": 13.793103448275861, | |
| "step": 13000 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 13950, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 30, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.4130840981661286e+21, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |