diff --git "a/checkpoints/whisper-base/bengali/checkpoint-19000/trainer_state.json" "b/checkpoints/whisper-base/bengali/checkpoint-19000/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoints/whisper-base/bengali/checkpoint-19000/trainer_state.json" @@ -0,0 +1,5512 @@ +{ + "best_metric": 24.73569978295876, + "best_model_checkpoint": "results/whisper-base/bengali/checkpoint-9000", + "epoch": 10.626398210290828, + "eval_steps": 1000, + "global_step": 19000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "grad_norm": 20.479082107543945, + "learning_rate": 4.4e-07, + "loss": 2.3197, + "step": 25 + }, + { + "epoch": 0.03, + "grad_norm": 11.922679901123047, + "learning_rate": 9.400000000000001e-07, + "loss": 2.0798, + "step": 50 + }, + { + "epoch": 0.04, + "grad_norm": 7.62770938873291, + "learning_rate": 1.44e-06, + "loss": 1.7731, + "step": 75 + }, + { + "epoch": 0.06, + "grad_norm": 5.752090930938721, + "learning_rate": 1.94e-06, + "loss": 1.5748, + "step": 100 + }, + { + "epoch": 0.07, + "grad_norm": 4.62416934967041, + "learning_rate": 2.4400000000000004e-06, + "loss": 1.4206, + "step": 125 + }, + { + "epoch": 0.08, + "grad_norm": 5.872653484344482, + "learning_rate": 2.9400000000000002e-06, + "loss": 1.3248, + "step": 150 + }, + { + "epoch": 0.1, + "grad_norm": 6.6760993003845215, + "learning_rate": 3.44e-06, + "loss": 1.2546, + "step": 175 + }, + { + "epoch": 0.11, + "grad_norm": 6.7307329177856445, + "learning_rate": 3.94e-06, + "loss": 1.128, + "step": 200 + }, + { + "epoch": 0.13, + "grad_norm": 6.937326431274414, + "learning_rate": 4.440000000000001e-06, + "loss": 0.8661, + "step": 225 + }, + { + "epoch": 0.14, + "grad_norm": 4.662399768829346, + "learning_rate": 4.94e-06, + "loss": 0.646, + "step": 250 + }, + { + "epoch": 0.15, + "grad_norm": 4.14452600479126, + "learning_rate": 5.4400000000000004e-06, + "loss": 0.5209, + "step": 275 + }, + { + "epoch": 0.17, + "grad_norm": 4.323141098022461, + "learning_rate": 5.94e-06, + "loss": 0.4461, + "step": 300 + }, + { + "epoch": 0.18, + "grad_norm": 4.011408805847168, + "learning_rate": 6.440000000000001e-06, + "loss": 0.4018, + "step": 325 + }, + { + "epoch": 0.2, + "grad_norm": 5.2240705490112305, + "learning_rate": 6.9400000000000005e-06, + "loss": 0.3637, + "step": 350 + }, + { + "epoch": 0.21, + "grad_norm": 4.614215850830078, + "learning_rate": 7.440000000000001e-06, + "loss": 0.3387, + "step": 375 + }, + { + "epoch": 0.22, + "grad_norm": 4.833929538726807, + "learning_rate": 7.94e-06, + "loss": 0.3189, + "step": 400 + }, + { + "epoch": 0.24, + "grad_norm": 3.848154067993164, + "learning_rate": 8.44e-06, + "loss": 0.2969, + "step": 425 + }, + { + "epoch": 0.25, + "grad_norm": 5.367223262786865, + "learning_rate": 8.94e-06, + "loss": 0.2912, + "step": 450 + }, + { + "epoch": 0.27, + "grad_norm": 4.365331172943115, + "learning_rate": 9.440000000000001e-06, + "loss": 0.286, + "step": 475 + }, + { + "epoch": 0.28, + "grad_norm": 6.139851093292236, + "learning_rate": 9.940000000000001e-06, + "loss": 0.2743, + "step": 500 + }, + { + "epoch": 0.29, + "grad_norm": 5.86342191696167, + "learning_rate": 9.997788944723618e-06, + "loss": 0.2666, + "step": 525 + }, + { + "epoch": 0.31, + "grad_norm": 3.888645887374878, + "learning_rate": 9.99527638190955e-06, + "loss": 0.2557, + "step": 550 + }, + { + "epoch": 0.32, + "grad_norm": 4.206381797790527, + "learning_rate": 9.992763819095477e-06, + "loss": 0.2506, + "step": 575 + }, + { + "epoch": 0.34, + "grad_norm": 4.591144561767578, + "learning_rate": 9.990251256281408e-06, + "loss": 0.2393, + "step": 600 + }, + { + "epoch": 0.35, + "grad_norm": 3.7976574897766113, + "learning_rate": 9.987738693467337e-06, + "loss": 0.238, + "step": 625 + }, + { + "epoch": 0.36, + "grad_norm": 4.7065911293029785, + "learning_rate": 9.985226130653267e-06, + "loss": 0.2286, + "step": 650 + }, + { + "epoch": 0.38, + "grad_norm": 4.082373142242432, + "learning_rate": 9.982713567839198e-06, + "loss": 0.2256, + "step": 675 + }, + { + "epoch": 0.39, + "grad_norm": 3.7245709896087646, + "learning_rate": 9.980201005025127e-06, + "loss": 0.2217, + "step": 700 + }, + { + "epoch": 0.41, + "grad_norm": 3.2761738300323486, + "learning_rate": 9.977688442211056e-06, + "loss": 0.2141, + "step": 725 + }, + { + "epoch": 0.42, + "grad_norm": 3.9429969787597656, + "learning_rate": 9.975175879396986e-06, + "loss": 0.2148, + "step": 750 + }, + { + "epoch": 0.43, + "grad_norm": 2.745335340499878, + "learning_rate": 9.972663316582915e-06, + "loss": 0.2087, + "step": 775 + }, + { + "epoch": 0.45, + "grad_norm": 3.284982442855835, + "learning_rate": 9.970150753768844e-06, + "loss": 0.2071, + "step": 800 + }, + { + "epoch": 0.46, + "grad_norm": 3.2479090690612793, + "learning_rate": 9.967638190954775e-06, + "loss": 0.2009, + "step": 825 + }, + { + "epoch": 0.48, + "grad_norm": 3.2984981536865234, + "learning_rate": 9.965125628140703e-06, + "loss": 0.2012, + "step": 850 + }, + { + "epoch": 0.49, + "grad_norm": 3.311579704284668, + "learning_rate": 9.962613065326634e-06, + "loss": 0.1941, + "step": 875 + }, + { + "epoch": 0.5, + "grad_norm": 3.8732566833496094, + "learning_rate": 9.960100502512563e-06, + "loss": 0.1977, + "step": 900 + }, + { + "epoch": 0.52, + "grad_norm": 3.0260491371154785, + "learning_rate": 9.957587939698493e-06, + "loss": 0.1911, + "step": 925 + }, + { + "epoch": 0.53, + "grad_norm": 2.8873238563537598, + "learning_rate": 9.955075376884424e-06, + "loss": 0.1885, + "step": 950 + }, + { + "epoch": 0.55, + "grad_norm": 3.0286946296691895, + "learning_rate": 9.952562814070353e-06, + "loss": 0.186, + "step": 975 + }, + { + "epoch": 0.56, + "grad_norm": 2.839372158050537, + "learning_rate": 9.950050251256282e-06, + "loss": 0.1856, + "step": 1000 + }, + { + "epoch": 0.56, + "eval_loss": 0.14572674036026, + "eval_runtime": 1294.9998, + "eval_samples_per_second": 1.158, + "eval_steps_per_second": 1.158, + "eval_wer": 40.77574739200448, + "step": 1000 + }, + { + "epoch": 0.57, + "grad_norm": 5.458705425262451, + "learning_rate": 9.947537688442212e-06, + "loss": 0.1829, + "step": 1025 + }, + { + "epoch": 0.59, + "grad_norm": 2.867703676223755, + "learning_rate": 9.945025125628141e-06, + "loss": 0.1831, + "step": 1050 + }, + { + "epoch": 0.6, + "grad_norm": 2.979769229888916, + "learning_rate": 9.94251256281407e-06, + "loss": 0.1829, + "step": 1075 + }, + { + "epoch": 0.62, + "grad_norm": 3.345287561416626, + "learning_rate": 9.940000000000001e-06, + "loss": 0.1753, + "step": 1100 + }, + { + "epoch": 0.63, + "grad_norm": 3.8183023929595947, + "learning_rate": 9.93748743718593e-06, + "loss": 0.1785, + "step": 1125 + }, + { + "epoch": 0.64, + "grad_norm": 3.1384637355804443, + "learning_rate": 9.93497487437186e-06, + "loss": 0.1727, + "step": 1150 + }, + { + "epoch": 0.66, + "grad_norm": 2.651932716369629, + "learning_rate": 9.93246231155779e-06, + "loss": 0.1685, + "step": 1175 + }, + { + "epoch": 0.67, + "grad_norm": 3.3064398765563965, + "learning_rate": 9.929949748743719e-06, + "loss": 0.1713, + "step": 1200 + }, + { + "epoch": 0.69, + "grad_norm": 3.0926802158355713, + "learning_rate": 9.92743718592965e-06, + "loss": 0.167, + "step": 1225 + }, + { + "epoch": 0.7, + "grad_norm": 2.8239219188690186, + "learning_rate": 9.924924623115579e-06, + "loss": 0.166, + "step": 1250 + }, + { + "epoch": 0.71, + "grad_norm": 2.59196400642395, + "learning_rate": 9.922412060301508e-06, + "loss": 0.1652, + "step": 1275 + }, + { + "epoch": 0.73, + "grad_norm": 2.587282419204712, + "learning_rate": 9.91989949748744e-06, + "loss": 0.162, + "step": 1300 + }, + { + "epoch": 0.74, + "grad_norm": 3.395512104034424, + "learning_rate": 9.917386934673367e-06, + "loss": 0.1643, + "step": 1325 + }, + { + "epoch": 0.76, + "grad_norm": 3.0003013610839844, + "learning_rate": 9.914874371859298e-06, + "loss": 0.1616, + "step": 1350 + }, + { + "epoch": 0.77, + "grad_norm": 2.4067747592926025, + "learning_rate": 9.912361809045227e-06, + "loss": 0.1613, + "step": 1375 + }, + { + "epoch": 0.78, + "grad_norm": 3.117004632949829, + "learning_rate": 9.909849246231157e-06, + "loss": 0.1551, + "step": 1400 + }, + { + "epoch": 0.8, + "grad_norm": 2.4046616554260254, + "learning_rate": 9.907336683417086e-06, + "loss": 0.1563, + "step": 1425 + }, + { + "epoch": 0.81, + "grad_norm": 4.316405773162842, + "learning_rate": 9.904824120603015e-06, + "loss": 0.1611, + "step": 1450 + }, + { + "epoch": 0.82, + "grad_norm": 3.059438943862915, + "learning_rate": 9.902311557788945e-06, + "loss": 0.152, + "step": 1475 + }, + { + "epoch": 0.84, + "grad_norm": 2.760357141494751, + "learning_rate": 9.899798994974876e-06, + "loss": 0.1466, + "step": 1500 + }, + { + "epoch": 0.85, + "grad_norm": 3.3387234210968018, + "learning_rate": 9.897286432160805e-06, + "loss": 0.1585, + "step": 1525 + }, + { + "epoch": 0.87, + "grad_norm": 2.262953281402588, + "learning_rate": 9.894773869346734e-06, + "loss": 0.1531, + "step": 1550 + }, + { + "epoch": 0.88, + "grad_norm": 2.503844976425171, + "learning_rate": 9.892261306532665e-06, + "loss": 0.1472, + "step": 1575 + }, + { + "epoch": 0.89, + "grad_norm": 2.459182024002075, + "learning_rate": 9.889748743718593e-06, + "loss": 0.1495, + "step": 1600 + }, + { + "epoch": 0.91, + "grad_norm": 2.699077844619751, + "learning_rate": 9.887236180904524e-06, + "loss": 0.1536, + "step": 1625 + }, + { + "epoch": 0.92, + "grad_norm": 3.1890530586242676, + "learning_rate": 9.884723618090453e-06, + "loss": 0.1483, + "step": 1650 + }, + { + "epoch": 0.94, + "grad_norm": 3.2312211990356445, + "learning_rate": 9.882211055276383e-06, + "loss": 0.1435, + "step": 1675 + }, + { + "epoch": 0.95, + "grad_norm": 2.5697576999664307, + "learning_rate": 9.879698492462312e-06, + "loss": 0.1465, + "step": 1700 + }, + { + "epoch": 0.96, + "grad_norm": 3.028343439102173, + "learning_rate": 9.877185929648241e-06, + "loss": 0.1445, + "step": 1725 + }, + { + "epoch": 0.98, + "grad_norm": 2.123739004135132, + "learning_rate": 9.874673366834172e-06, + "loss": 0.1412, + "step": 1750 + }, + { + "epoch": 0.99, + "grad_norm": 2.920504331588745, + "learning_rate": 9.872160804020102e-06, + "loss": 0.1408, + "step": 1775 + }, + { + "epoch": 1.01, + "grad_norm": 2.644196033477783, + "learning_rate": 9.869648241206031e-06, + "loss": 0.1401, + "step": 1800 + }, + { + "epoch": 1.02, + "grad_norm": 2.547008991241455, + "learning_rate": 9.86713567839196e-06, + "loss": 0.1327, + "step": 1825 + }, + { + "epoch": 1.03, + "grad_norm": 2.1837239265441895, + "learning_rate": 9.864623115577891e-06, + "loss": 0.1301, + "step": 1850 + }, + { + "epoch": 1.05, + "grad_norm": 2.398655414581299, + "learning_rate": 9.862110552763819e-06, + "loss": 0.1323, + "step": 1875 + }, + { + "epoch": 1.06, + "grad_norm": 2.1983821392059326, + "learning_rate": 9.85959798994975e-06, + "loss": 0.1291, + "step": 1900 + }, + { + "epoch": 1.08, + "grad_norm": 2.395869731903076, + "learning_rate": 9.85708542713568e-06, + "loss": 0.1247, + "step": 1925 + }, + { + "epoch": 1.09, + "grad_norm": 2.402700424194336, + "learning_rate": 9.854572864321609e-06, + "loss": 0.1297, + "step": 1950 + }, + { + "epoch": 1.1, + "grad_norm": 2.341567277908325, + "learning_rate": 9.85206030150754e-06, + "loss": 0.1275, + "step": 1975 + }, + { + "epoch": 1.12, + "grad_norm": 3.422062873840332, + "learning_rate": 9.849547738693467e-06, + "loss": 0.125, + "step": 2000 + }, + { + "epoch": 1.12, + "eval_loss": 0.10941009223461151, + "eval_runtime": 1282.51, + "eval_samples_per_second": 1.17, + "eval_steps_per_second": 1.17, + "eval_wer": 32.99026815094868, + "step": 2000 + }, + { + "epoch": 1.13, + "grad_norm": 3.0541255474090576, + "learning_rate": 9.847035175879398e-06, + "loss": 0.1222, + "step": 2025 + }, + { + "epoch": 1.15, + "grad_norm": 2.4372646808624268, + "learning_rate": 9.844522613065328e-06, + "loss": 0.1228, + "step": 2050 + }, + { + "epoch": 1.16, + "grad_norm": 2.5140273571014404, + "learning_rate": 9.842010050251257e-06, + "loss": 0.1277, + "step": 2075 + }, + { + "epoch": 1.17, + "grad_norm": 2.391875982284546, + "learning_rate": 9.839497487437186e-06, + "loss": 0.1223, + "step": 2100 + }, + { + "epoch": 1.19, + "grad_norm": 2.4066147804260254, + "learning_rate": 9.836984924623117e-06, + "loss": 0.1211, + "step": 2125 + }, + { + "epoch": 1.2, + "grad_norm": 2.2214889526367188, + "learning_rate": 9.834472361809047e-06, + "loss": 0.1219, + "step": 2150 + }, + { + "epoch": 1.22, + "grad_norm": 2.5247437953948975, + "learning_rate": 9.831959798994976e-06, + "loss": 0.1188, + "step": 2175 + }, + { + "epoch": 1.23, + "grad_norm": 3.2937655448913574, + "learning_rate": 9.829447236180905e-06, + "loss": 0.1195, + "step": 2200 + }, + { + "epoch": 1.24, + "grad_norm": 2.3191704750061035, + "learning_rate": 9.826934673366834e-06, + "loss": 0.1226, + "step": 2225 + }, + { + "epoch": 1.26, + "grad_norm": 2.650092124938965, + "learning_rate": 9.824422110552766e-06, + "loss": 0.1218, + "step": 2250 + }, + { + "epoch": 1.27, + "grad_norm": 3.117119550704956, + "learning_rate": 9.821909547738693e-06, + "loss": 0.1202, + "step": 2275 + }, + { + "epoch": 1.29, + "grad_norm": 2.1879711151123047, + "learning_rate": 9.819396984924624e-06, + "loss": 0.119, + "step": 2300 + }, + { + "epoch": 1.3, + "grad_norm": 3.037064552307129, + "learning_rate": 9.816884422110553e-06, + "loss": 0.1209, + "step": 2325 + }, + { + "epoch": 1.31, + "grad_norm": 2.5333287715911865, + "learning_rate": 9.814371859296483e-06, + "loss": 0.1157, + "step": 2350 + }, + { + "epoch": 1.33, + "grad_norm": 2.4529097080230713, + "learning_rate": 9.811859296482414e-06, + "loss": 0.1169, + "step": 2375 + }, + { + "epoch": 1.34, + "grad_norm": 2.246457815170288, + "learning_rate": 9.809346733668343e-06, + "loss": 0.1172, + "step": 2400 + }, + { + "epoch": 1.36, + "grad_norm": 1.8162719011306763, + "learning_rate": 9.806834170854272e-06, + "loss": 0.1155, + "step": 2425 + }, + { + "epoch": 1.37, + "grad_norm": 2.8499510288238525, + "learning_rate": 9.804321608040202e-06, + "loss": 0.1166, + "step": 2450 + }, + { + "epoch": 1.38, + "grad_norm": 1.823399543762207, + "learning_rate": 9.801809045226131e-06, + "loss": 0.115, + "step": 2475 + }, + { + "epoch": 1.4, + "grad_norm": 2.7357358932495117, + "learning_rate": 9.79929648241206e-06, + "loss": 0.1108, + "step": 2500 + }, + { + "epoch": 1.41, + "grad_norm": 3.5756173133850098, + "learning_rate": 9.796783919597991e-06, + "loss": 0.1183, + "step": 2525 + }, + { + "epoch": 1.43, + "grad_norm": 2.150749683380127, + "learning_rate": 9.79427135678392e-06, + "loss": 0.1157, + "step": 2550 + }, + { + "epoch": 1.44, + "grad_norm": 2.2136292457580566, + "learning_rate": 9.79175879396985e-06, + "loss": 0.118, + "step": 2575 + }, + { + "epoch": 1.45, + "grad_norm": 2.220170259475708, + "learning_rate": 9.78924623115578e-06, + "loss": 0.1165, + "step": 2600 + }, + { + "epoch": 1.47, + "grad_norm": 2.3177106380462646, + "learning_rate": 9.786733668341709e-06, + "loss": 0.1123, + "step": 2625 + }, + { + "epoch": 1.48, + "grad_norm": 2.216575860977173, + "learning_rate": 9.78422110552764e-06, + "loss": 0.1128, + "step": 2650 + }, + { + "epoch": 1.5, + "grad_norm": 2.057546615600586, + "learning_rate": 9.781708542713569e-06, + "loss": 0.1097, + "step": 2675 + }, + { + "epoch": 1.51, + "grad_norm": 2.5831878185272217, + "learning_rate": 9.779195979899498e-06, + "loss": 0.1093, + "step": 2700 + }, + { + "epoch": 1.52, + "grad_norm": 2.306478261947632, + "learning_rate": 9.776683417085428e-06, + "loss": 0.114, + "step": 2725 + }, + { + "epoch": 1.54, + "grad_norm": 2.323587417602539, + "learning_rate": 9.774170854271357e-06, + "loss": 0.1117, + "step": 2750 + }, + { + "epoch": 1.55, + "grad_norm": 3.0451858043670654, + "learning_rate": 9.771658291457288e-06, + "loss": 0.1142, + "step": 2775 + }, + { + "epoch": 1.57, + "grad_norm": 3.178542375564575, + "learning_rate": 9.769145728643217e-06, + "loss": 0.1159, + "step": 2800 + }, + { + "epoch": 1.58, + "grad_norm": 2.5522382259368896, + "learning_rate": 9.766633165829147e-06, + "loss": 0.1102, + "step": 2825 + }, + { + "epoch": 1.59, + "grad_norm": 2.8957133293151855, + "learning_rate": 9.764120603015076e-06, + "loss": 0.112, + "step": 2850 + }, + { + "epoch": 1.61, + "grad_norm": 2.62931752204895, + "learning_rate": 9.761608040201005e-06, + "loss": 0.1132, + "step": 2875 + }, + { + "epoch": 1.62, + "grad_norm": 2.26515793800354, + "learning_rate": 9.759095477386935e-06, + "loss": 0.1091, + "step": 2900 + }, + { + "epoch": 1.64, + "grad_norm": 2.3211770057678223, + "learning_rate": 9.756582914572866e-06, + "loss": 0.1071, + "step": 2925 + }, + { + "epoch": 1.65, + "grad_norm": 2.146005392074585, + "learning_rate": 9.754070351758795e-06, + "loss": 0.1067, + "step": 2950 + }, + { + "epoch": 1.66, + "grad_norm": 2.2931926250457764, + "learning_rate": 9.751557788944724e-06, + "loss": 0.1101, + "step": 2975 + }, + { + "epoch": 1.68, + "grad_norm": 2.441265821456909, + "learning_rate": 9.749045226130654e-06, + "loss": 0.1093, + "step": 3000 + }, + { + "epoch": 1.68, + "eval_loss": 0.09424228966236115, + "eval_runtime": 1257.7532, + "eval_samples_per_second": 1.193, + "eval_steps_per_second": 1.193, + "eval_wer": 28.91549394384933, + "step": 3000 + }, + { + "epoch": 1.69, + "grad_norm": 2.352782726287842, + "learning_rate": 9.746532663316583e-06, + "loss": 0.1069, + "step": 3025 + }, + { + "epoch": 1.71, + "grad_norm": 2.396038055419922, + "learning_rate": 9.744020100502514e-06, + "loss": 0.1071, + "step": 3050 + }, + { + "epoch": 1.72, + "grad_norm": 2.1817808151245117, + "learning_rate": 9.741507537688443e-06, + "loss": 0.1085, + "step": 3075 + }, + { + "epoch": 1.73, + "grad_norm": 2.5631215572357178, + "learning_rate": 9.738994974874373e-06, + "loss": 0.1068, + "step": 3100 + }, + { + "epoch": 1.75, + "grad_norm": 2.2351434230804443, + "learning_rate": 9.736482412060302e-06, + "loss": 0.105, + "step": 3125 + }, + { + "epoch": 1.76, + "grad_norm": 2.0444986820220947, + "learning_rate": 9.733969849246231e-06, + "loss": 0.107, + "step": 3150 + }, + { + "epoch": 1.78, + "grad_norm": 2.6535725593566895, + "learning_rate": 9.731457286432162e-06, + "loss": 0.1069, + "step": 3175 + }, + { + "epoch": 1.79, + "grad_norm": 2.8334600925445557, + "learning_rate": 9.728944723618092e-06, + "loss": 0.1051, + "step": 3200 + }, + { + "epoch": 1.8, + "grad_norm": 2.6148955821990967, + "learning_rate": 9.726432160804021e-06, + "loss": 0.103, + "step": 3225 + }, + { + "epoch": 1.82, + "grad_norm": 2.369356393814087, + "learning_rate": 9.72391959798995e-06, + "loss": 0.1116, + "step": 3250 + }, + { + "epoch": 1.83, + "grad_norm": 2.479933261871338, + "learning_rate": 9.721407035175881e-06, + "loss": 0.1049, + "step": 3275 + }, + { + "epoch": 1.85, + "grad_norm": 2.4552206993103027, + "learning_rate": 9.718894472361809e-06, + "loss": 0.1046, + "step": 3300 + }, + { + "epoch": 1.86, + "grad_norm": 1.9066778421401978, + "learning_rate": 9.71638190954774e-06, + "loss": 0.1013, + "step": 3325 + }, + { + "epoch": 1.87, + "grad_norm": 2.111132860183716, + "learning_rate": 9.71386934673367e-06, + "loss": 0.1003, + "step": 3350 + }, + { + "epoch": 1.89, + "grad_norm": 3.1862635612487793, + "learning_rate": 9.711356783919599e-06, + "loss": 0.1049, + "step": 3375 + }, + { + "epoch": 1.9, + "grad_norm": 1.8302373886108398, + "learning_rate": 9.70884422110553e-06, + "loss": 0.1072, + "step": 3400 + }, + { + "epoch": 1.92, + "grad_norm": 2.316040277481079, + "learning_rate": 9.706331658291457e-06, + "loss": 0.1019, + "step": 3425 + }, + { + "epoch": 1.93, + "grad_norm": 2.2230210304260254, + "learning_rate": 9.703819095477388e-06, + "loss": 0.1054, + "step": 3450 + }, + { + "epoch": 1.94, + "grad_norm": 2.367607355117798, + "learning_rate": 9.701306532663318e-06, + "loss": 0.1029, + "step": 3475 + }, + { + "epoch": 1.96, + "grad_norm": 2.147073984146118, + "learning_rate": 9.698793969849247e-06, + "loss": 0.1009, + "step": 3500 + }, + { + "epoch": 1.97, + "grad_norm": 2.4300241470336914, + "learning_rate": 9.696281407035176e-06, + "loss": 0.101, + "step": 3525 + }, + { + "epoch": 1.99, + "grad_norm": 2.174846649169922, + "learning_rate": 9.693768844221107e-06, + "loss": 0.0978, + "step": 3550 + }, + { + "epoch": 2.0, + "grad_norm": 2.3007450103759766, + "learning_rate": 9.691256281407035e-06, + "loss": 0.0995, + "step": 3575 + }, + { + "epoch": 2.01, + "grad_norm": 2.4721336364746094, + "learning_rate": 9.688743718592966e-06, + "loss": 0.0887, + "step": 3600 + }, + { + "epoch": 2.03, + "grad_norm": 2.2181708812713623, + "learning_rate": 9.686231155778895e-06, + "loss": 0.0866, + "step": 3625 + }, + { + "epoch": 2.04, + "grad_norm": 1.8976894617080688, + "learning_rate": 9.683718592964825e-06, + "loss": 0.0862, + "step": 3650 + }, + { + "epoch": 2.06, + "grad_norm": 2.3264853954315186, + "learning_rate": 9.681206030150756e-06, + "loss": 0.088, + "step": 3675 + }, + { + "epoch": 2.07, + "grad_norm": 1.9397796392440796, + "learning_rate": 9.678693467336683e-06, + "loss": 0.0896, + "step": 3700 + }, + { + "epoch": 2.08, + "grad_norm": 2.0393741130828857, + "learning_rate": 9.676180904522614e-06, + "loss": 0.0854, + "step": 3725 + }, + { + "epoch": 2.1, + "grad_norm": 2.169718027114868, + "learning_rate": 9.673668341708544e-06, + "loss": 0.0883, + "step": 3750 + }, + { + "epoch": 2.11, + "grad_norm": 2.1115963459014893, + "learning_rate": 9.671155778894473e-06, + "loss": 0.0923, + "step": 3775 + }, + { + "epoch": 2.13, + "grad_norm": 2.055377721786499, + "learning_rate": 9.668643216080404e-06, + "loss": 0.0859, + "step": 3800 + }, + { + "epoch": 2.14, + "grad_norm": 2.477351665496826, + "learning_rate": 9.666130653266333e-06, + "loss": 0.0849, + "step": 3825 + }, + { + "epoch": 2.15, + "grad_norm": 2.2051820755004883, + "learning_rate": 9.663618090452263e-06, + "loss": 0.0839, + "step": 3850 + }, + { + "epoch": 2.17, + "grad_norm": 2.350365400314331, + "learning_rate": 9.661105527638192e-06, + "loss": 0.0871, + "step": 3875 + }, + { + "epoch": 2.18, + "grad_norm": 2.170224905014038, + "learning_rate": 9.658592964824121e-06, + "loss": 0.0855, + "step": 3900 + }, + { + "epoch": 2.2, + "grad_norm": 1.870482325553894, + "learning_rate": 9.65608040201005e-06, + "loss": 0.0867, + "step": 3925 + }, + { + "epoch": 2.21, + "grad_norm": 1.9789308309555054, + "learning_rate": 9.653567839195982e-06, + "loss": 0.0878, + "step": 3950 + }, + { + "epoch": 2.22, + "grad_norm": 1.9876500368118286, + "learning_rate": 9.651055276381909e-06, + "loss": 0.0846, + "step": 3975 + }, + { + "epoch": 2.24, + "grad_norm": 2.23836088180542, + "learning_rate": 9.64854271356784e-06, + "loss": 0.0841, + "step": 4000 + }, + { + "epoch": 2.24, + "eval_loss": 0.08813700079917908, + "eval_runtime": 1548.8639, + "eval_samples_per_second": 0.968, + "eval_steps_per_second": 0.968, + "eval_wer": 27.97031435972835, + "step": 4000 + }, + { + "epoch": 2.25, + "grad_norm": 2.6324143409729004, + "learning_rate": 9.64603015075377e-06, + "loss": 0.0846, + "step": 4025 + }, + { + "epoch": 2.27, + "grad_norm": 1.8375723361968994, + "learning_rate": 9.643517587939699e-06, + "loss": 0.0862, + "step": 4050 + }, + { + "epoch": 2.28, + "grad_norm": 2.220909595489502, + "learning_rate": 9.64100502512563e-06, + "loss": 0.084, + "step": 4075 + }, + { + "epoch": 2.29, + "grad_norm": 2.610175371170044, + "learning_rate": 9.638492462311559e-06, + "loss": 0.0842, + "step": 4100 + }, + { + "epoch": 2.31, + "grad_norm": 1.811500072479248, + "learning_rate": 9.635979899497488e-06, + "loss": 0.0823, + "step": 4125 + }, + { + "epoch": 2.32, + "grad_norm": 2.0553157329559326, + "learning_rate": 9.633467336683418e-06, + "loss": 0.0825, + "step": 4150 + }, + { + "epoch": 2.34, + "grad_norm": 1.9910943508148193, + "learning_rate": 9.630954773869347e-06, + "loss": 0.0854, + "step": 4175 + }, + { + "epoch": 2.35, + "grad_norm": 1.9267401695251465, + "learning_rate": 9.628442211055276e-06, + "loss": 0.0812, + "step": 4200 + }, + { + "epoch": 2.36, + "grad_norm": 1.8411636352539062, + "learning_rate": 9.625929648241207e-06, + "loss": 0.0848, + "step": 4225 + }, + { + "epoch": 2.38, + "grad_norm": 2.24184513092041, + "learning_rate": 9.623417085427137e-06, + "loss": 0.0811, + "step": 4250 + }, + { + "epoch": 2.39, + "grad_norm": 2.0340802669525146, + "learning_rate": 9.620904522613066e-06, + "loss": 0.0829, + "step": 4275 + }, + { + "epoch": 2.4, + "grad_norm": 1.898876667022705, + "learning_rate": 9.618391959798995e-06, + "loss": 0.0846, + "step": 4300 + }, + { + "epoch": 2.42, + "grad_norm": 2.3137125968933105, + "learning_rate": 9.615879396984925e-06, + "loss": 0.0823, + "step": 4325 + }, + { + "epoch": 2.43, + "grad_norm": 2.510221481323242, + "learning_rate": 9.613366834170856e-06, + "loss": 0.0862, + "step": 4350 + }, + { + "epoch": 2.45, + "grad_norm": 2.1451172828674316, + "learning_rate": 9.610854271356785e-06, + "loss": 0.0832, + "step": 4375 + }, + { + "epoch": 2.46, + "grad_norm": 1.8479361534118652, + "learning_rate": 9.608341708542714e-06, + "loss": 0.0842, + "step": 4400 + }, + { + "epoch": 2.47, + "grad_norm": 2.3328495025634766, + "learning_rate": 9.605829145728644e-06, + "loss": 0.0857, + "step": 4425 + }, + { + "epoch": 2.49, + "grad_norm": 1.9808458089828491, + "learning_rate": 9.603316582914573e-06, + "loss": 0.0791, + "step": 4450 + }, + { + "epoch": 2.5, + "grad_norm": 1.7810078859329224, + "learning_rate": 9.600804020100504e-06, + "loss": 0.0812, + "step": 4475 + }, + { + "epoch": 2.52, + "grad_norm": 2.0694406032562256, + "learning_rate": 9.598291457286433e-06, + "loss": 0.0816, + "step": 4500 + }, + { + "epoch": 2.53, + "grad_norm": 2.125455141067505, + "learning_rate": 9.595778894472363e-06, + "loss": 0.084, + "step": 4525 + }, + { + "epoch": 2.54, + "grad_norm": 2.307854175567627, + "learning_rate": 9.593266331658292e-06, + "loss": 0.0796, + "step": 4550 + }, + { + "epoch": 2.56, + "grad_norm": 2.1585569381713867, + "learning_rate": 9.590753768844221e-06, + "loss": 0.0796, + "step": 4575 + }, + { + "epoch": 2.57, + "grad_norm": 2.4452669620513916, + "learning_rate": 9.58824120603015e-06, + "loss": 0.0854, + "step": 4600 + }, + { + "epoch": 2.59, + "grad_norm": 2.0743563175201416, + "learning_rate": 9.585728643216082e-06, + "loss": 0.0849, + "step": 4625 + }, + { + "epoch": 2.6, + "grad_norm": 1.8115513324737549, + "learning_rate": 9.583216080402011e-06, + "loss": 0.0807, + "step": 4650 + }, + { + "epoch": 2.61, + "grad_norm": 2.037189483642578, + "learning_rate": 9.58070351758794e-06, + "loss": 0.0846, + "step": 4675 + }, + { + "epoch": 2.63, + "grad_norm": 2.2096242904663086, + "learning_rate": 9.57819095477387e-06, + "loss": 0.0791, + "step": 4700 + }, + { + "epoch": 2.64, + "grad_norm": 1.8818609714508057, + "learning_rate": 9.575678391959799e-06, + "loss": 0.0846, + "step": 4725 + }, + { + "epoch": 2.66, + "grad_norm": 2.0996897220611572, + "learning_rate": 9.57316582914573e-06, + "loss": 0.0772, + "step": 4750 + }, + { + "epoch": 2.67, + "grad_norm": 2.697157144546509, + "learning_rate": 9.57065326633166e-06, + "loss": 0.0779, + "step": 4775 + }, + { + "epoch": 2.68, + "grad_norm": 1.9727689027786255, + "learning_rate": 9.568140703517589e-06, + "loss": 0.0825, + "step": 4800 + }, + { + "epoch": 2.7, + "grad_norm": 2.3720059394836426, + "learning_rate": 9.565628140703518e-06, + "loss": 0.0796, + "step": 4825 + }, + { + "epoch": 2.71, + "grad_norm": 2.233374834060669, + "learning_rate": 9.563115577889447e-06, + "loss": 0.0811, + "step": 4850 + }, + { + "epoch": 2.73, + "grad_norm": 1.9238030910491943, + "learning_rate": 9.560603015075378e-06, + "loss": 0.0798, + "step": 4875 + }, + { + "epoch": 2.74, + "grad_norm": 2.8772740364074707, + "learning_rate": 9.558090452261308e-06, + "loss": 0.0772, + "step": 4900 + }, + { + "epoch": 2.75, + "grad_norm": 2.729653835296631, + "learning_rate": 9.555577889447237e-06, + "loss": 0.0771, + "step": 4925 + }, + { + "epoch": 2.77, + "grad_norm": 2.06793212890625, + "learning_rate": 9.553065326633166e-06, + "loss": 0.0774, + "step": 4950 + }, + { + "epoch": 2.78, + "grad_norm": 1.8402355909347534, + "learning_rate": 9.550552763819096e-06, + "loss": 0.0804, + "step": 4975 + }, + { + "epoch": 2.8, + "grad_norm": 2.32189679145813, + "learning_rate": 9.548040201005025e-06, + "loss": 0.0809, + "step": 5000 + }, + { + "epoch": 2.8, + "eval_loss": 0.08343477547168732, + "eval_runtime": 1539.3544, + "eval_samples_per_second": 0.974, + "eval_steps_per_second": 0.974, + "eval_wer": 25.953931246936918, + "step": 5000 + }, + { + "epoch": 2.81, + "grad_norm": 2.597062587738037, + "learning_rate": 9.545527638190956e-06, + "loss": 0.0789, + "step": 5025 + }, + { + "epoch": 2.82, + "grad_norm": 2.2318174839019775, + "learning_rate": 9.543015075376885e-06, + "loss": 0.0804, + "step": 5050 + }, + { + "epoch": 2.84, + "grad_norm": 1.986308217048645, + "learning_rate": 9.540502512562815e-06, + "loss": 0.0795, + "step": 5075 + }, + { + "epoch": 2.85, + "grad_norm": 1.7781836986541748, + "learning_rate": 9.537989949748746e-06, + "loss": 0.0766, + "step": 5100 + }, + { + "epoch": 2.87, + "grad_norm": 2.045193672180176, + "learning_rate": 9.535477386934673e-06, + "loss": 0.0779, + "step": 5125 + }, + { + "epoch": 2.88, + "grad_norm": 1.809615135192871, + "learning_rate": 9.532964824120604e-06, + "loss": 0.075, + "step": 5150 + }, + { + "epoch": 2.89, + "grad_norm": 2.1733291149139404, + "learning_rate": 9.530452261306534e-06, + "loss": 0.078, + "step": 5175 + }, + { + "epoch": 2.91, + "grad_norm": 1.6681715250015259, + "learning_rate": 9.527939698492463e-06, + "loss": 0.0779, + "step": 5200 + }, + { + "epoch": 2.92, + "grad_norm": 2.364774703979492, + "learning_rate": 9.525427135678392e-06, + "loss": 0.0793, + "step": 5225 + }, + { + "epoch": 2.94, + "grad_norm": 1.6892741918563843, + "learning_rate": 9.522914572864322e-06, + "loss": 0.0757, + "step": 5250 + }, + { + "epoch": 2.95, + "grad_norm": 2.010044574737549, + "learning_rate": 9.520402010050253e-06, + "loss": 0.0771, + "step": 5275 + }, + { + "epoch": 2.96, + "grad_norm": 1.8010215759277344, + "learning_rate": 9.517889447236182e-06, + "loss": 0.0763, + "step": 5300 + }, + { + "epoch": 2.98, + "grad_norm": 1.8384108543395996, + "learning_rate": 9.515376884422111e-06, + "loss": 0.0766, + "step": 5325 + }, + { + "epoch": 2.99, + "grad_norm": 1.962332010269165, + "learning_rate": 9.51286432160804e-06, + "loss": 0.0761, + "step": 5350 + }, + { + "epoch": 3.01, + "grad_norm": 1.5537636280059814, + "learning_rate": 9.510351758793972e-06, + "loss": 0.0743, + "step": 5375 + }, + { + "epoch": 3.02, + "grad_norm": 1.6470623016357422, + "learning_rate": 9.5078391959799e-06, + "loss": 0.0608, + "step": 5400 + }, + { + "epoch": 3.03, + "grad_norm": 1.8470031023025513, + "learning_rate": 9.50532663316583e-06, + "loss": 0.064, + "step": 5425 + }, + { + "epoch": 3.05, + "grad_norm": 1.7956515550613403, + "learning_rate": 9.50281407035176e-06, + "loss": 0.0665, + "step": 5450 + }, + { + "epoch": 3.06, + "grad_norm": 2.0213046073913574, + "learning_rate": 9.500301507537689e-06, + "loss": 0.0633, + "step": 5475 + }, + { + "epoch": 3.08, + "grad_norm": 1.9063010215759277, + "learning_rate": 9.49778894472362e-06, + "loss": 0.0634, + "step": 5500 + }, + { + "epoch": 3.09, + "grad_norm": 1.8944132328033447, + "learning_rate": 9.49527638190955e-06, + "loss": 0.0617, + "step": 5525 + }, + { + "epoch": 3.1, + "grad_norm": 2.2454872131347656, + "learning_rate": 9.492763819095479e-06, + "loss": 0.068, + "step": 5550 + }, + { + "epoch": 3.12, + "grad_norm": 2.492413282394409, + "learning_rate": 9.490251256281408e-06, + "loss": 0.0614, + "step": 5575 + }, + { + "epoch": 3.13, + "grad_norm": 2.1105923652648926, + "learning_rate": 9.487738693467337e-06, + "loss": 0.0625, + "step": 5600 + }, + { + "epoch": 3.15, + "grad_norm": 2.487652540206909, + "learning_rate": 9.485226130653267e-06, + "loss": 0.0639, + "step": 5625 + }, + { + "epoch": 3.16, + "grad_norm": 1.711843729019165, + "learning_rate": 9.482713567839198e-06, + "loss": 0.0601, + "step": 5650 + }, + { + "epoch": 3.17, + "grad_norm": 2.1762421131134033, + "learning_rate": 9.480201005025125e-06, + "loss": 0.0646, + "step": 5675 + }, + { + "epoch": 3.19, + "grad_norm": 2.102074146270752, + "learning_rate": 9.477688442211056e-06, + "loss": 0.065, + "step": 5700 + }, + { + "epoch": 3.2, + "grad_norm": 1.9095159769058228, + "learning_rate": 9.475175879396985e-06, + "loss": 0.061, + "step": 5725 + }, + { + "epoch": 3.22, + "grad_norm": 2.3133902549743652, + "learning_rate": 9.472663316582915e-06, + "loss": 0.0681, + "step": 5750 + }, + { + "epoch": 3.23, + "grad_norm": 1.6935017108917236, + "learning_rate": 9.470150753768846e-06, + "loss": 0.062, + "step": 5775 + }, + { + "epoch": 3.24, + "grad_norm": 2.0041425228118896, + "learning_rate": 9.467638190954775e-06, + "loss": 0.0638, + "step": 5800 + }, + { + "epoch": 3.26, + "grad_norm": 2.380725383758545, + "learning_rate": 9.465125628140704e-06, + "loss": 0.0666, + "step": 5825 + }, + { + "epoch": 3.27, + "grad_norm": 1.7464348077774048, + "learning_rate": 9.462613065326634e-06, + "loss": 0.0599, + "step": 5850 + }, + { + "epoch": 3.29, + "grad_norm": 1.8467971086502075, + "learning_rate": 9.460100502512563e-06, + "loss": 0.0648, + "step": 5875 + }, + { + "epoch": 3.3, + "grad_norm": 2.2188971042633057, + "learning_rate": 9.457587939698494e-06, + "loss": 0.0657, + "step": 5900 + }, + { + "epoch": 3.31, + "grad_norm": 2.2380785942077637, + "learning_rate": 9.455075376884423e-06, + "loss": 0.0632, + "step": 5925 + }, + { + "epoch": 3.33, + "grad_norm": 1.6837469339370728, + "learning_rate": 9.452562814070353e-06, + "loss": 0.0633, + "step": 5950 + }, + { + "epoch": 3.34, + "grad_norm": 1.9306910037994385, + "learning_rate": 9.450050251256282e-06, + "loss": 0.0631, + "step": 5975 + }, + { + "epoch": 3.36, + "grad_norm": 2.4268481731414795, + "learning_rate": 9.447537688442211e-06, + "loss": 0.0651, + "step": 6000 + }, + { + "epoch": 3.36, + "eval_loss": 0.08409538865089417, + "eval_runtime": 1258.0848, + "eval_samples_per_second": 1.192, + "eval_steps_per_second": 1.192, + "eval_wer": 26.093957851991878, + "step": 6000 + }, + { + "epoch": 3.37, + "grad_norm": 2.007359504699707, + "learning_rate": 9.44502512562814e-06, + "loss": 0.0578, + "step": 6025 + }, + { + "epoch": 3.38, + "grad_norm": 1.8847688436508179, + "learning_rate": 9.442613065326634e-06, + "loss": 0.0642, + "step": 6050 + }, + { + "epoch": 3.4, + "grad_norm": 2.306269407272339, + "learning_rate": 9.440100502512563e-06, + "loss": 0.0638, + "step": 6075 + }, + { + "epoch": 3.41, + "grad_norm": 2.3040685653686523, + "learning_rate": 9.437587939698494e-06, + "loss": 0.062, + "step": 6100 + }, + { + "epoch": 3.43, + "grad_norm": 2.1980137825012207, + "learning_rate": 9.435075376884422e-06, + "loss": 0.0645, + "step": 6125 + }, + { + "epoch": 3.44, + "grad_norm": 2.4031503200531006, + "learning_rate": 9.432562814070353e-06, + "loss": 0.0653, + "step": 6150 + }, + { + "epoch": 3.45, + "grad_norm": 2.2297208309173584, + "learning_rate": 9.430050251256282e-06, + "loss": 0.0603, + "step": 6175 + }, + { + "epoch": 3.47, + "grad_norm": 1.817520260810852, + "learning_rate": 9.427537688442212e-06, + "loss": 0.0651, + "step": 6200 + }, + { + "epoch": 3.48, + "grad_norm": 2.1112923622131348, + "learning_rate": 9.425025125628141e-06, + "loss": 0.0645, + "step": 6225 + }, + { + "epoch": 3.5, + "grad_norm": 1.82675302028656, + "learning_rate": 9.422512562814072e-06, + "loss": 0.065, + "step": 6250 + }, + { + "epoch": 3.51, + "grad_norm": 1.9629912376403809, + "learning_rate": 9.42e-06, + "loss": 0.0641, + "step": 6275 + }, + { + "epoch": 3.52, + "grad_norm": 2.342519521713257, + "learning_rate": 9.41748743718593e-06, + "loss": 0.063, + "step": 6300 + }, + { + "epoch": 3.54, + "grad_norm": 2.0519747734069824, + "learning_rate": 9.41497487437186e-06, + "loss": 0.0633, + "step": 6325 + }, + { + "epoch": 3.55, + "grad_norm": 1.8285983800888062, + "learning_rate": 9.41246231155779e-06, + "loss": 0.0623, + "step": 6350 + }, + { + "epoch": 3.57, + "grad_norm": 1.7750009298324585, + "learning_rate": 9.40994974874372e-06, + "loss": 0.0611, + "step": 6375 + }, + { + "epoch": 3.58, + "grad_norm": 2.1673221588134766, + "learning_rate": 9.407437185929648e-06, + "loss": 0.0636, + "step": 6400 + }, + { + "epoch": 3.59, + "grad_norm": 2.1094229221343994, + "learning_rate": 9.404924623115579e-06, + "loss": 0.0609, + "step": 6425 + }, + { + "epoch": 3.61, + "grad_norm": 2.2001357078552246, + "learning_rate": 9.402412060301508e-06, + "loss": 0.0623, + "step": 6450 + }, + { + "epoch": 3.62, + "grad_norm": 2.0866901874542236, + "learning_rate": 9.399899497487438e-06, + "loss": 0.0637, + "step": 6475 + }, + { + "epoch": 3.64, + "grad_norm": 1.9105075597763062, + "learning_rate": 9.397386934673369e-06, + "loss": 0.0601, + "step": 6500 + }, + { + "epoch": 3.65, + "grad_norm": 1.6594455242156982, + "learning_rate": 9.394874371859298e-06, + "loss": 0.0638, + "step": 6525 + }, + { + "epoch": 3.66, + "grad_norm": 1.9969534873962402, + "learning_rate": 9.392361809045227e-06, + "loss": 0.0617, + "step": 6550 + }, + { + "epoch": 3.68, + "grad_norm": 1.8099467754364014, + "learning_rate": 9.389849246231157e-06, + "loss": 0.0628, + "step": 6575 + }, + { + "epoch": 3.69, + "grad_norm": 2.2564048767089844, + "learning_rate": 9.387336683417086e-06, + "loss": 0.0629, + "step": 6600 + }, + { + "epoch": 3.71, + "grad_norm": 1.8307623863220215, + "learning_rate": 9.384824120603015e-06, + "loss": 0.0577, + "step": 6625 + }, + { + "epoch": 3.72, + "grad_norm": 1.759268879890442, + "learning_rate": 9.382311557788946e-06, + "loss": 0.059, + "step": 6650 + }, + { + "epoch": 3.73, + "grad_norm": 2.067389726638794, + "learning_rate": 9.379798994974874e-06, + "loss": 0.0616, + "step": 6675 + }, + { + "epoch": 3.75, + "grad_norm": 1.8005328178405762, + "learning_rate": 9.377286432160805e-06, + "loss": 0.0599, + "step": 6700 + }, + { + "epoch": 3.76, + "grad_norm": 2.594447135925293, + "learning_rate": 9.374773869346734e-06, + "loss": 0.0606, + "step": 6725 + }, + { + "epoch": 3.78, + "grad_norm": 1.6017422676086426, + "learning_rate": 9.372261306532664e-06, + "loss": 0.0621, + "step": 6750 + }, + { + "epoch": 3.79, + "grad_norm": 1.852373719215393, + "learning_rate": 9.369748743718595e-06, + "loss": 0.0605, + "step": 6775 + }, + { + "epoch": 3.8, + "grad_norm": 1.860703706741333, + "learning_rate": 9.367236180904524e-06, + "loss": 0.0595, + "step": 6800 + }, + { + "epoch": 3.82, + "grad_norm": 2.018710136413574, + "learning_rate": 9.364723618090453e-06, + "loss": 0.0601, + "step": 6825 + }, + { + "epoch": 3.83, + "grad_norm": 2.154536247253418, + "learning_rate": 9.362211055276383e-06, + "loss": 0.0613, + "step": 6850 + }, + { + "epoch": 3.85, + "grad_norm": 1.9843919277191162, + "learning_rate": 9.359698492462312e-06, + "loss": 0.0597, + "step": 6875 + }, + { + "epoch": 3.86, + "grad_norm": 2.1597299575805664, + "learning_rate": 9.357185929648241e-06, + "loss": 0.0608, + "step": 6900 + }, + { + "epoch": 3.87, + "grad_norm": 1.994773268699646, + "learning_rate": 9.354673366834172e-06, + "loss": 0.0634, + "step": 6925 + }, + { + "epoch": 3.89, + "grad_norm": 1.985724687576294, + "learning_rate": 9.352160804020101e-06, + "loss": 0.0617, + "step": 6950 + }, + { + "epoch": 3.9, + "grad_norm": 1.6854017972946167, + "learning_rate": 9.34964824120603e-06, + "loss": 0.0597, + "step": 6975 + }, + { + "epoch": 3.91, + "grad_norm": 1.9718555212020874, + "learning_rate": 9.34713567839196e-06, + "loss": 0.0621, + "step": 7000 + }, + { + "epoch": 3.91, + "eval_loss": 0.07950112968683243, + "eval_runtime": 1540.6934, + "eval_samples_per_second": 0.974, + "eval_steps_per_second": 0.974, + "eval_wer": 24.74270111321151, + "step": 7000 + }, + { + "epoch": 3.93, + "grad_norm": 2.3680989742279053, + "learning_rate": 9.34462311557789e-06, + "loss": 0.0609, + "step": 7025 + }, + { + "epoch": 3.94, + "grad_norm": 2.2002029418945312, + "learning_rate": 9.34211055276382e-06, + "loss": 0.0603, + "step": 7050 + }, + { + "epoch": 3.96, + "grad_norm": 2.0039074420928955, + "learning_rate": 9.33959798994975e-06, + "loss": 0.0572, + "step": 7075 + }, + { + "epoch": 3.97, + "grad_norm": 1.7604914903640747, + "learning_rate": 9.337085427135679e-06, + "loss": 0.0585, + "step": 7100 + }, + { + "epoch": 3.98, + "grad_norm": 2.173600912094116, + "learning_rate": 9.334572864321608e-06, + "loss": 0.0608, + "step": 7125 + }, + { + "epoch": 4.0, + "grad_norm": 1.7853466272354126, + "learning_rate": 9.332060301507538e-06, + "loss": 0.0626, + "step": 7150 + }, + { + "epoch": 4.01, + "grad_norm": 1.6174085140228271, + "learning_rate": 9.329547738693469e-06, + "loss": 0.048, + "step": 7175 + }, + { + "epoch": 4.03, + "grad_norm": 1.6191383600234985, + "learning_rate": 9.327035175879398e-06, + "loss": 0.0466, + "step": 7200 + }, + { + "epoch": 4.04, + "grad_norm": 1.7680180072784424, + "learning_rate": 9.324522613065327e-06, + "loss": 0.0466, + "step": 7225 + }, + { + "epoch": 4.05, + "grad_norm": 1.6971068382263184, + "learning_rate": 9.322010050251257e-06, + "loss": 0.0494, + "step": 7250 + }, + { + "epoch": 4.07, + "grad_norm": 1.7964837551116943, + "learning_rate": 9.319497487437186e-06, + "loss": 0.0448, + "step": 7275 + }, + { + "epoch": 4.08, + "grad_norm": 1.9257533550262451, + "learning_rate": 9.316984924623115e-06, + "loss": 0.0473, + "step": 7300 + }, + { + "epoch": 4.1, + "grad_norm": 2.192664861679077, + "learning_rate": 9.314472361809046e-06, + "loss": 0.0486, + "step": 7325 + }, + { + "epoch": 4.11, + "grad_norm": 1.9059473276138306, + "learning_rate": 9.311959798994976e-06, + "loss": 0.0473, + "step": 7350 + }, + { + "epoch": 4.12, + "grad_norm": 2.3671586513519287, + "learning_rate": 9.309447236180905e-06, + "loss": 0.0483, + "step": 7375 + }, + { + "epoch": 4.14, + "grad_norm": 1.9169944524765015, + "learning_rate": 9.306934673366836e-06, + "loss": 0.048, + "step": 7400 + }, + { + "epoch": 4.15, + "grad_norm": 1.6867883205413818, + "learning_rate": 9.304422110552764e-06, + "loss": 0.0444, + "step": 7425 + }, + { + "epoch": 4.17, + "grad_norm": 2.1389479637145996, + "learning_rate": 9.301909547738695e-06, + "loss": 0.0474, + "step": 7450 + }, + { + "epoch": 4.18, + "grad_norm": 1.9204530715942383, + "learning_rate": 9.299396984924624e-06, + "loss": 0.0488, + "step": 7475 + }, + { + "epoch": 4.19, + "grad_norm": 1.8195775747299194, + "learning_rate": 9.296884422110553e-06, + "loss": 0.0495, + "step": 7500 + }, + { + "epoch": 4.21, + "grad_norm": 1.8835941553115845, + "learning_rate": 9.294371859296483e-06, + "loss": 0.0458, + "step": 7525 + }, + { + "epoch": 4.22, + "grad_norm": 2.0439579486846924, + "learning_rate": 9.291859296482412e-06, + "loss": 0.0475, + "step": 7550 + }, + { + "epoch": 4.24, + "grad_norm": 1.8745596408843994, + "learning_rate": 9.289346733668343e-06, + "loss": 0.0473, + "step": 7575 + }, + { + "epoch": 4.25, + "grad_norm": 1.8819526433944702, + "learning_rate": 9.286834170854272e-06, + "loss": 0.0503, + "step": 7600 + }, + { + "epoch": 4.26, + "grad_norm": 1.7282466888427734, + "learning_rate": 9.284321608040202e-06, + "loss": 0.046, + "step": 7625 + }, + { + "epoch": 4.28, + "grad_norm": 2.1101884841918945, + "learning_rate": 9.281809045226131e-06, + "loss": 0.0475, + "step": 7650 + }, + { + "epoch": 4.29, + "grad_norm": 1.5361087322235107, + "learning_rate": 9.279296482412062e-06, + "loss": 0.0479, + "step": 7675 + }, + { + "epoch": 4.31, + "grad_norm": 1.6633384227752686, + "learning_rate": 9.27678391959799e-06, + "loss": 0.047, + "step": 7700 + }, + { + "epoch": 4.32, + "grad_norm": 1.911967158317566, + "learning_rate": 9.27427135678392e-06, + "loss": 0.0482, + "step": 7725 + }, + { + "epoch": 4.33, + "grad_norm": 1.876800537109375, + "learning_rate": 9.27175879396985e-06, + "loss": 0.0475, + "step": 7750 + }, + { + "epoch": 4.35, + "grad_norm": 1.90635347366333, + "learning_rate": 9.26924623115578e-06, + "loss": 0.0491, + "step": 7775 + }, + { + "epoch": 4.36, + "grad_norm": 1.8439699411392212, + "learning_rate": 9.26673366834171e-06, + "loss": 0.0473, + "step": 7800 + }, + { + "epoch": 4.38, + "grad_norm": 1.8636940717697144, + "learning_rate": 9.264221105527638e-06, + "loss": 0.0472, + "step": 7825 + }, + { + "epoch": 4.39, + "grad_norm": 2.0078325271606445, + "learning_rate": 9.261708542713569e-06, + "loss": 0.05, + "step": 7850 + }, + { + "epoch": 4.4, + "grad_norm": 1.5912322998046875, + "learning_rate": 9.259195979899498e-06, + "loss": 0.046, + "step": 7875 + }, + { + "epoch": 4.42, + "grad_norm": 2.0076215267181396, + "learning_rate": 9.256683417085428e-06, + "loss": 0.0453, + "step": 7900 + }, + { + "epoch": 4.43, + "grad_norm": 1.8626244068145752, + "learning_rate": 9.254170854271357e-06, + "loss": 0.0479, + "step": 7925 + }, + { + "epoch": 4.45, + "grad_norm": 1.7121516466140747, + "learning_rate": 9.251658291457288e-06, + "loss": 0.0475, + "step": 7950 + }, + { + "epoch": 4.46, + "grad_norm": 2.361213207244873, + "learning_rate": 9.249145728643217e-06, + "loss": 0.0478, + "step": 7975 + }, + { + "epoch": 4.47, + "grad_norm": 1.7829092741012573, + "learning_rate": 9.246633165829147e-06, + "loss": 0.048, + "step": 8000 + }, + { + "epoch": 4.47, + "eval_loss": 0.08309131115674973, + "eval_runtime": 1537.2902, + "eval_samples_per_second": 0.976, + "eval_steps_per_second": 0.976, + "eval_wer": 25.16978225862914, + "step": 8000 + }, + { + "epoch": 4.49, + "grad_norm": 1.985420823097229, + "learning_rate": 9.244120603015076e-06, + "loss": 0.0484, + "step": 8025 + }, + { + "epoch": 4.5, + "grad_norm": 2.077108144760132, + "learning_rate": 9.241608040201005e-06, + "loss": 0.0472, + "step": 8050 + }, + { + "epoch": 4.52, + "grad_norm": 1.9472047090530396, + "learning_rate": 9.239095477386936e-06, + "loss": 0.0478, + "step": 8075 + }, + { + "epoch": 4.53, + "grad_norm": 2.2777373790740967, + "learning_rate": 9.236582914572864e-06, + "loss": 0.0488, + "step": 8100 + }, + { + "epoch": 4.54, + "grad_norm": 2.1253933906555176, + "learning_rate": 9.234070351758795e-06, + "loss": 0.0481, + "step": 8125 + }, + { + "epoch": 4.56, + "grad_norm": 1.873387098312378, + "learning_rate": 9.231557788944724e-06, + "loss": 0.0478, + "step": 8150 + }, + { + "epoch": 4.57, + "grad_norm": 1.9682562351226807, + "learning_rate": 9.229045226130654e-06, + "loss": 0.0471, + "step": 8175 + }, + { + "epoch": 4.59, + "grad_norm": 1.9117881059646606, + "learning_rate": 9.226532663316585e-06, + "loss": 0.0499, + "step": 8200 + }, + { + "epoch": 4.6, + "grad_norm": 2.2858989238739014, + "learning_rate": 9.224020100502514e-06, + "loss": 0.0466, + "step": 8225 + }, + { + "epoch": 4.61, + "grad_norm": 1.874833345413208, + "learning_rate": 9.221507537688443e-06, + "loss": 0.0505, + "step": 8250 + }, + { + "epoch": 4.63, + "grad_norm": 1.8459376096725464, + "learning_rate": 9.218994974874373e-06, + "loss": 0.0468, + "step": 8275 + }, + { + "epoch": 4.64, + "grad_norm": 1.8114656209945679, + "learning_rate": 9.216482412060302e-06, + "loss": 0.0453, + "step": 8300 + }, + { + "epoch": 4.66, + "grad_norm": 1.9587324857711792, + "learning_rate": 9.213969849246231e-06, + "loss": 0.0478, + "step": 8325 + }, + { + "epoch": 4.67, + "grad_norm": 2.2526862621307373, + "learning_rate": 9.211457286432162e-06, + "loss": 0.0452, + "step": 8350 + }, + { + "epoch": 4.68, + "grad_norm": 2.2629716396331787, + "learning_rate": 9.20894472361809e-06, + "loss": 0.0498, + "step": 8375 + }, + { + "epoch": 4.7, + "grad_norm": 1.932504653930664, + "learning_rate": 9.206432160804021e-06, + "loss": 0.0451, + "step": 8400 + }, + { + "epoch": 4.71, + "grad_norm": 1.9428602457046509, + "learning_rate": 9.20391959798995e-06, + "loss": 0.0477, + "step": 8425 + }, + { + "epoch": 4.73, + "grad_norm": 1.9576139450073242, + "learning_rate": 9.20140703517588e-06, + "loss": 0.0471, + "step": 8450 + }, + { + "epoch": 4.74, + "grad_norm": 1.9237579107284546, + "learning_rate": 9.19889447236181e-06, + "loss": 0.0462, + "step": 8475 + }, + { + "epoch": 4.75, + "grad_norm": 2.21309494972229, + "learning_rate": 9.19638190954774e-06, + "loss": 0.0471, + "step": 8500 + }, + { + "epoch": 4.77, + "grad_norm": 1.881838321685791, + "learning_rate": 9.19386934673367e-06, + "loss": 0.0464, + "step": 8525 + }, + { + "epoch": 4.78, + "grad_norm": 1.9514816999435425, + "learning_rate": 9.191356783919599e-06, + "loss": 0.0446, + "step": 8550 + }, + { + "epoch": 4.8, + "grad_norm": 1.9100663661956787, + "learning_rate": 9.188844221105528e-06, + "loss": 0.0476, + "step": 8575 + }, + { + "epoch": 4.81, + "grad_norm": 2.2070868015289307, + "learning_rate": 9.186331658291459e-06, + "loss": 0.0486, + "step": 8600 + }, + { + "epoch": 4.82, + "grad_norm": 1.7795501947402954, + "learning_rate": 9.183819095477388e-06, + "loss": 0.0463, + "step": 8625 + }, + { + "epoch": 4.84, + "grad_norm": 1.6400113105773926, + "learning_rate": 9.181306532663317e-06, + "loss": 0.0467, + "step": 8650 + }, + { + "epoch": 4.85, + "grad_norm": 2.5019569396972656, + "learning_rate": 9.178793969849247e-06, + "loss": 0.048, + "step": 8675 + }, + { + "epoch": 4.87, + "grad_norm": 1.6988704204559326, + "learning_rate": 9.176281407035176e-06, + "loss": 0.047, + "step": 8700 + }, + { + "epoch": 4.88, + "grad_norm": 1.8742605447769165, + "learning_rate": 9.173768844221105e-06, + "loss": 0.0465, + "step": 8725 + }, + { + "epoch": 4.89, + "grad_norm": 1.8531486988067627, + "learning_rate": 9.171256281407036e-06, + "loss": 0.0453, + "step": 8750 + }, + { + "epoch": 4.91, + "grad_norm": 1.8690688610076904, + "learning_rate": 9.168743718592966e-06, + "loss": 0.0458, + "step": 8775 + }, + { + "epoch": 4.92, + "grad_norm": 1.8053923845291138, + "learning_rate": 9.166231155778895e-06, + "loss": 0.0467, + "step": 8800 + }, + { + "epoch": 4.94, + "grad_norm": 1.7939424514770508, + "learning_rate": 9.163718592964826e-06, + "loss": 0.0486, + "step": 8825 + }, + { + "epoch": 4.95, + "grad_norm": 1.8470284938812256, + "learning_rate": 9.161206030150754e-06, + "loss": 0.0449, + "step": 8850 + }, + { + "epoch": 4.96, + "grad_norm": 1.9548052549362183, + "learning_rate": 9.158693467336685e-06, + "loss": 0.0469, + "step": 8875 + }, + { + "epoch": 4.98, + "grad_norm": 2.1690359115600586, + "learning_rate": 9.156180904522614e-06, + "loss": 0.0456, + "step": 8900 + }, + { + "epoch": 4.99, + "grad_norm": 1.6433491706848145, + "learning_rate": 9.153668341708543e-06, + "loss": 0.0454, + "step": 8925 + }, + { + "epoch": 5.01, + "grad_norm": 1.6121461391448975, + "learning_rate": 9.151155778894473e-06, + "loss": 0.0417, + "step": 8950 + }, + { + "epoch": 5.02, + "grad_norm": 1.1955963373184204, + "learning_rate": 9.148643216080402e-06, + "loss": 0.0339, + "step": 8975 + }, + { + "epoch": 5.03, + "grad_norm": 2.314229965209961, + "learning_rate": 9.146130653266331e-06, + "loss": 0.0348, + "step": 9000 + }, + { + "epoch": 5.03, + "eval_loss": 0.08638431876897812, + "eval_runtime": 1547.2933, + "eval_samples_per_second": 0.969, + "eval_steps_per_second": 0.969, + "eval_wer": 24.73569978295876, + "step": 9000 + }, + { + "epoch": 5.05, + "grad_norm": 1.9079633951187134, + "learning_rate": 9.143618090452262e-06, + "loss": 0.0333, + "step": 9025 + }, + { + "epoch": 5.06, + "grad_norm": 1.8292887210845947, + "learning_rate": 9.141105527638192e-06, + "loss": 0.0357, + "step": 9050 + }, + { + "epoch": 5.08, + "grad_norm": 1.8790791034698486, + "learning_rate": 9.138592964824121e-06, + "loss": 0.0329, + "step": 9075 + }, + { + "epoch": 5.09, + "grad_norm": 1.7024917602539062, + "learning_rate": 9.136080402010052e-06, + "loss": 0.0353, + "step": 9100 + }, + { + "epoch": 5.1, + "grad_norm": 1.728759527206421, + "learning_rate": 9.13356783919598e-06, + "loss": 0.0343, + "step": 9125 + }, + { + "epoch": 5.12, + "grad_norm": 1.4763672351837158, + "learning_rate": 9.13105527638191e-06, + "loss": 0.0357, + "step": 9150 + }, + { + "epoch": 5.13, + "grad_norm": 1.541526436805725, + "learning_rate": 9.12854271356784e-06, + "loss": 0.0333, + "step": 9175 + }, + { + "epoch": 5.15, + "grad_norm": 2.032904624938965, + "learning_rate": 9.12603015075377e-06, + "loss": 0.0362, + "step": 9200 + }, + { + "epoch": 5.16, + "grad_norm": 1.4788271188735962, + "learning_rate": 9.1235175879397e-06, + "loss": 0.0363, + "step": 9225 + }, + { + "epoch": 5.17, + "grad_norm": 1.5795788764953613, + "learning_rate": 9.121005025125628e-06, + "loss": 0.0341, + "step": 9250 + }, + { + "epoch": 5.19, + "grad_norm": 2.0994884967803955, + "learning_rate": 9.118492462311559e-06, + "loss": 0.0376, + "step": 9275 + }, + { + "epoch": 5.2, + "grad_norm": 1.8375917673110962, + "learning_rate": 9.115979899497488e-06, + "loss": 0.0343, + "step": 9300 + }, + { + "epoch": 5.22, + "grad_norm": 1.9826256036758423, + "learning_rate": 9.113467336683418e-06, + "loss": 0.0359, + "step": 9325 + }, + { + "epoch": 5.23, + "grad_norm": 1.5949604511260986, + "learning_rate": 9.110954773869347e-06, + "loss": 0.0334, + "step": 9350 + }, + { + "epoch": 5.24, + "grad_norm": 2.062195301055908, + "learning_rate": 9.108442211055278e-06, + "loss": 0.0347, + "step": 9375 + }, + { + "epoch": 5.26, + "grad_norm": 1.67949640750885, + "learning_rate": 9.105929648241206e-06, + "loss": 0.0356, + "step": 9400 + }, + { + "epoch": 5.27, + "grad_norm": 1.844469666481018, + "learning_rate": 9.103417085427137e-06, + "loss": 0.0355, + "step": 9425 + }, + { + "epoch": 5.29, + "grad_norm": 1.7946102619171143, + "learning_rate": 9.100904522613066e-06, + "loss": 0.0356, + "step": 9450 + }, + { + "epoch": 5.3, + "grad_norm": 1.9098948240280151, + "learning_rate": 9.098391959798995e-06, + "loss": 0.0367, + "step": 9475 + }, + { + "epoch": 5.31, + "grad_norm": 2.024116039276123, + "learning_rate": 9.095879396984926e-06, + "loss": 0.0363, + "step": 9500 + }, + { + "epoch": 5.33, + "grad_norm": 1.7224719524383545, + "learning_rate": 9.093366834170854e-06, + "loss": 0.0354, + "step": 9525 + }, + { + "epoch": 5.34, + "grad_norm": 1.597653865814209, + "learning_rate": 9.090854271356785e-06, + "loss": 0.0355, + "step": 9550 + }, + { + "epoch": 5.36, + "grad_norm": 1.5614638328552246, + "learning_rate": 9.088341708542714e-06, + "loss": 0.0328, + "step": 9575 + }, + { + "epoch": 5.37, + "grad_norm": 2.019812822341919, + "learning_rate": 9.085829145728644e-06, + "loss": 0.033, + "step": 9600 + }, + { + "epoch": 5.38, + "grad_norm": 1.9035489559173584, + "learning_rate": 9.083316582914573e-06, + "loss": 0.0353, + "step": 9625 + }, + { + "epoch": 5.4, + "grad_norm": 1.9559494256973267, + "learning_rate": 9.080804020100504e-06, + "loss": 0.0376, + "step": 9650 + }, + { + "epoch": 5.41, + "grad_norm": 1.6555471420288086, + "learning_rate": 9.078291457286433e-06, + "loss": 0.0371, + "step": 9675 + }, + { + "epoch": 5.43, + "grad_norm": 1.8791228532791138, + "learning_rate": 9.075778894472363e-06, + "loss": 0.0359, + "step": 9700 + }, + { + "epoch": 5.44, + "grad_norm": 2.1574184894561768, + "learning_rate": 9.073266331658292e-06, + "loss": 0.0336, + "step": 9725 + }, + { + "epoch": 5.45, + "grad_norm": 1.6978013515472412, + "learning_rate": 9.070753768844221e-06, + "loss": 0.0355, + "step": 9750 + }, + { + "epoch": 5.47, + "grad_norm": 1.5776162147521973, + "learning_rate": 9.068241206030152e-06, + "loss": 0.0378, + "step": 9775 + }, + { + "epoch": 5.48, + "grad_norm": 1.9801268577575684, + "learning_rate": 9.06572864321608e-06, + "loss": 0.0351, + "step": 9800 + }, + { + "epoch": 5.49, + "grad_norm": 2.0189411640167236, + "learning_rate": 9.063216080402011e-06, + "loss": 0.0358, + "step": 9825 + }, + { + "epoch": 5.51, + "grad_norm": 1.6830401420593262, + "learning_rate": 9.06070351758794e-06, + "loss": 0.0369, + "step": 9850 + }, + { + "epoch": 5.52, + "grad_norm": 1.9140214920043945, + "learning_rate": 9.05819095477387e-06, + "loss": 0.0367, + "step": 9875 + }, + { + "epoch": 5.54, + "grad_norm": 1.8516499996185303, + "learning_rate": 9.0556783919598e-06, + "loss": 0.036, + "step": 9900 + }, + { + "epoch": 5.55, + "grad_norm": 1.8060190677642822, + "learning_rate": 9.05316582914573e-06, + "loss": 0.0345, + "step": 9925 + }, + { + "epoch": 5.56, + "grad_norm": 1.9838569164276123, + "learning_rate": 9.05065326633166e-06, + "loss": 0.0367, + "step": 9950 + }, + { + "epoch": 5.58, + "grad_norm": 1.842403531074524, + "learning_rate": 9.048140703517589e-06, + "loss": 0.035, + "step": 9975 + }, + { + "epoch": 5.59, + "grad_norm": 1.4839140176773071, + "learning_rate": 9.045628140703518e-06, + "loss": 0.0354, + "step": 10000 + }, + { + "epoch": 5.59, + "eval_loss": 0.08825862407684326, + "eval_runtime": 1547.5777, + "eval_samples_per_second": 0.969, + "eval_steps_per_second": 0.969, + "eval_wer": 25.722887348596235, + "step": 10000 + }, + { + "epoch": 5.61, + "grad_norm": 1.8587559461593628, + "learning_rate": 9.043115577889447e-06, + "loss": 0.0372, + "step": 10025 + }, + { + "epoch": 5.62, + "grad_norm": 2.097219705581665, + "learning_rate": 9.04070351758794e-06, + "loss": 0.0349, + "step": 10050 + }, + { + "epoch": 5.63, + "grad_norm": 1.9071590900421143, + "learning_rate": 9.03819095477387e-06, + "loss": 0.0361, + "step": 10075 + }, + { + "epoch": 5.65, + "grad_norm": 1.5997395515441895, + "learning_rate": 9.0356783919598e-06, + "loss": 0.0355, + "step": 10100 + }, + { + "epoch": 5.66, + "grad_norm": 1.879860758781433, + "learning_rate": 9.033165829145728e-06, + "loss": 0.0358, + "step": 10125 + }, + { + "epoch": 5.68, + "grad_norm": 1.8191801309585571, + "learning_rate": 9.03065326633166e-06, + "loss": 0.0364, + "step": 10150 + }, + { + "epoch": 5.69, + "grad_norm": 1.6246849298477173, + "learning_rate": 9.028140703517589e-06, + "loss": 0.0361, + "step": 10175 + }, + { + "epoch": 5.7, + "grad_norm": 1.9530048370361328, + "learning_rate": 9.025628140703518e-06, + "loss": 0.0336, + "step": 10200 + }, + { + "epoch": 5.72, + "grad_norm": 1.7234457731246948, + "learning_rate": 9.023115577889447e-06, + "loss": 0.0344, + "step": 10225 + }, + { + "epoch": 5.73, + "grad_norm": 2.0469868183135986, + "learning_rate": 9.020603015075378e-06, + "loss": 0.0358, + "step": 10250 + }, + { + "epoch": 5.75, + "grad_norm": 1.5930458307266235, + "learning_rate": 9.018090452261308e-06, + "loss": 0.0343, + "step": 10275 + }, + { + "epoch": 5.76, + "grad_norm": 1.6492174863815308, + "learning_rate": 9.015577889447237e-06, + "loss": 0.0368, + "step": 10300 + }, + { + "epoch": 5.77, + "grad_norm": 1.5358837842941284, + "learning_rate": 9.013065326633166e-06, + "loss": 0.0344, + "step": 10325 + }, + { + "epoch": 5.79, + "grad_norm": 1.8455798625946045, + "learning_rate": 9.010552763819096e-06, + "loss": 0.0373, + "step": 10350 + }, + { + "epoch": 5.8, + "grad_norm": 2.1980483531951904, + "learning_rate": 9.008040201005027e-06, + "loss": 0.0346, + "step": 10375 + }, + { + "epoch": 5.82, + "grad_norm": 1.9728219509124756, + "learning_rate": 9.005527638190954e-06, + "loss": 0.0371, + "step": 10400 + }, + { + "epoch": 5.83, + "grad_norm": 1.9018830060958862, + "learning_rate": 9.003015075376885e-06, + "loss": 0.0342, + "step": 10425 + }, + { + "epoch": 5.84, + "grad_norm": 1.5646787881851196, + "learning_rate": 9.000502512562815e-06, + "loss": 0.0338, + "step": 10450 + }, + { + "epoch": 5.86, + "grad_norm": 1.8304989337921143, + "learning_rate": 8.997989949748744e-06, + "loss": 0.0369, + "step": 10475 + }, + { + "epoch": 5.87, + "grad_norm": 2.231433868408203, + "learning_rate": 8.995477386934675e-06, + "loss": 0.0351, + "step": 10500 + }, + { + "epoch": 5.89, + "grad_norm": 1.581472396850586, + "learning_rate": 8.992964824120604e-06, + "loss": 0.0343, + "step": 10525 + }, + { + "epoch": 5.9, + "grad_norm": 1.9242390394210815, + "learning_rate": 8.990452261306534e-06, + "loss": 0.0343, + "step": 10550 + }, + { + "epoch": 5.91, + "grad_norm": 1.6705659627914429, + "learning_rate": 8.987939698492463e-06, + "loss": 0.0338, + "step": 10575 + }, + { + "epoch": 5.93, + "grad_norm": 1.8718876838684082, + "learning_rate": 8.985427135678392e-06, + "loss": 0.0361, + "step": 10600 + }, + { + "epoch": 5.94, + "grad_norm": 2.0012893676757812, + "learning_rate": 8.982914572864322e-06, + "loss": 0.0358, + "step": 10625 + }, + { + "epoch": 5.96, + "grad_norm": 1.8063054084777832, + "learning_rate": 8.980402010050253e-06, + "loss": 0.0336, + "step": 10650 + }, + { + "epoch": 5.97, + "grad_norm": 2.0780587196350098, + "learning_rate": 8.977889447236182e-06, + "loss": 0.0364, + "step": 10675 + }, + { + "epoch": 5.98, + "grad_norm": 1.8927561044692993, + "learning_rate": 8.975376884422111e-06, + "loss": 0.0339, + "step": 10700 + }, + { + "epoch": 6.0, + "grad_norm": 1.6720718145370483, + "learning_rate": 8.97286432160804e-06, + "loss": 0.033, + "step": 10725 + }, + { + "epoch": 6.01, + "grad_norm": 1.5935943126678467, + "learning_rate": 8.97035175879397e-06, + "loss": 0.0262, + "step": 10750 + }, + { + "epoch": 6.03, + "grad_norm": 1.691170573234558, + "learning_rate": 8.967839195979901e-06, + "loss": 0.0256, + "step": 10775 + }, + { + "epoch": 6.04, + "grad_norm": 1.8816449642181396, + "learning_rate": 8.96532663316583e-06, + "loss": 0.0255, + "step": 10800 + }, + { + "epoch": 6.05, + "grad_norm": 1.4208954572677612, + "learning_rate": 8.96281407035176e-06, + "loss": 0.0249, + "step": 10825 + }, + { + "epoch": 6.07, + "grad_norm": 1.672662377357483, + "learning_rate": 8.960301507537689e-06, + "loss": 0.024, + "step": 10850 + }, + { + "epoch": 6.08, + "grad_norm": 1.7169475555419922, + "learning_rate": 8.957788944723618e-06, + "loss": 0.0241, + "step": 10875 + }, + { + "epoch": 6.1, + "grad_norm": 1.526678442955017, + "learning_rate": 8.95527638190955e-06, + "loss": 0.0256, + "step": 10900 + }, + { + "epoch": 6.11, + "grad_norm": 1.5596081018447876, + "learning_rate": 8.952763819095479e-06, + "loss": 0.0258, + "step": 10925 + }, + { + "epoch": 6.12, + "grad_norm": 2.0523030757904053, + "learning_rate": 8.950251256281408e-06, + "loss": 0.0255, + "step": 10950 + }, + { + "epoch": 6.14, + "grad_norm": 1.6910170316696167, + "learning_rate": 8.947738693467337e-06, + "loss": 0.0239, + "step": 10975 + }, + { + "epoch": 6.15, + "grad_norm": 2.0352160930633545, + "learning_rate": 8.945226130653267e-06, + "loss": 0.0248, + "step": 11000 + }, + { + "epoch": 6.15, + "eval_loss": 0.0970708429813385, + "eval_runtime": 1542.3258, + "eval_samples_per_second": 0.973, + "eval_steps_per_second": 0.973, + "eval_wer": 25.253798221662116, + "step": 11000 + }, + { + "epoch": 6.17, + "grad_norm": 1.617034673690796, + "learning_rate": 8.942713567839196e-06, + "loss": 0.0258, + "step": 11025 + }, + { + "epoch": 6.18, + "grad_norm": 1.3897294998168945, + "learning_rate": 8.940201005025127e-06, + "loss": 0.0255, + "step": 11050 + }, + { + "epoch": 6.19, + "grad_norm": 1.8258416652679443, + "learning_rate": 8.937688442211056e-06, + "loss": 0.0262, + "step": 11075 + }, + { + "epoch": 6.21, + "grad_norm": 1.945985198020935, + "learning_rate": 8.935175879396986e-06, + "loss": 0.027, + "step": 11100 + }, + { + "epoch": 6.22, + "grad_norm": 1.790618896484375, + "learning_rate": 8.932663316582915e-06, + "loss": 0.025, + "step": 11125 + }, + { + "epoch": 6.24, + "grad_norm": 1.7258163690567017, + "learning_rate": 8.930150753768844e-06, + "loss": 0.0252, + "step": 11150 + }, + { + "epoch": 6.25, + "grad_norm": 2.1420340538024902, + "learning_rate": 8.927638190954775e-06, + "loss": 0.0255, + "step": 11175 + }, + { + "epoch": 6.26, + "grad_norm": 1.4598020315170288, + "learning_rate": 8.925125628140705e-06, + "loss": 0.0249, + "step": 11200 + }, + { + "epoch": 6.28, + "grad_norm": 1.576206088066101, + "learning_rate": 8.922613065326634e-06, + "loss": 0.0243, + "step": 11225 + }, + { + "epoch": 6.29, + "grad_norm": 2.0248894691467285, + "learning_rate": 8.920100502512563e-06, + "loss": 0.0252, + "step": 11250 + }, + { + "epoch": 6.31, + "grad_norm": 1.5580759048461914, + "learning_rate": 8.917587939698493e-06, + "loss": 0.026, + "step": 11275 + }, + { + "epoch": 6.32, + "grad_norm": 1.6908998489379883, + "learning_rate": 8.915075376884424e-06, + "loss": 0.0255, + "step": 11300 + }, + { + "epoch": 6.33, + "grad_norm": 1.8284715414047241, + "learning_rate": 8.912562814070353e-06, + "loss": 0.0253, + "step": 11325 + }, + { + "epoch": 6.35, + "grad_norm": 1.3091851472854614, + "learning_rate": 8.910050251256282e-06, + "loss": 0.024, + "step": 11350 + }, + { + "epoch": 6.36, + "grad_norm": 1.5588383674621582, + "learning_rate": 8.907537688442212e-06, + "loss": 0.0251, + "step": 11375 + }, + { + "epoch": 6.38, + "grad_norm": 1.6450953483581543, + "learning_rate": 8.905025125628143e-06, + "loss": 0.0265, + "step": 11400 + }, + { + "epoch": 6.39, + "grad_norm": 1.5461299419403076, + "learning_rate": 8.90251256281407e-06, + "loss": 0.0254, + "step": 11425 + }, + { + "epoch": 6.4, + "grad_norm": 1.8010703325271606, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0272, + "step": 11450 + }, + { + "epoch": 6.42, + "grad_norm": 1.8586952686309814, + "learning_rate": 8.89748743718593e-06, + "loss": 0.0262, + "step": 11475 + }, + { + "epoch": 6.43, + "grad_norm": 1.745080590248108, + "learning_rate": 8.89497487437186e-06, + "loss": 0.0257, + "step": 11500 + }, + { + "epoch": 6.45, + "grad_norm": 1.6464567184448242, + "learning_rate": 8.892462311557791e-06, + "loss": 0.0266, + "step": 11525 + }, + { + "epoch": 6.46, + "grad_norm": 1.5463519096374512, + "learning_rate": 8.889949748743718e-06, + "loss": 0.0256, + "step": 11550 + }, + { + "epoch": 6.47, + "grad_norm": 2.099302291870117, + "learning_rate": 8.88743718592965e-06, + "loss": 0.0257, + "step": 11575 + }, + { + "epoch": 6.49, + "grad_norm": 1.8039714097976685, + "learning_rate": 8.884924623115579e-06, + "loss": 0.0264, + "step": 11600 + }, + { + "epoch": 6.5, + "grad_norm": 1.673970103263855, + "learning_rate": 8.882412060301508e-06, + "loss": 0.0268, + "step": 11625 + }, + { + "epoch": 6.52, + "grad_norm": 1.7410236597061157, + "learning_rate": 8.879899497487437e-06, + "loss": 0.0244, + "step": 11650 + }, + { + "epoch": 6.53, + "grad_norm": 1.5405129194259644, + "learning_rate": 8.877386934673368e-06, + "loss": 0.0259, + "step": 11675 + }, + { + "epoch": 6.54, + "grad_norm": 1.6754980087280273, + "learning_rate": 8.874874371859296e-06, + "loss": 0.0248, + "step": 11700 + }, + { + "epoch": 6.56, + "grad_norm": 1.7075492143630981, + "learning_rate": 8.872361809045227e-06, + "loss": 0.0253, + "step": 11725 + }, + { + "epoch": 6.57, + "grad_norm": 1.7080812454223633, + "learning_rate": 8.869849246231156e-06, + "loss": 0.0256, + "step": 11750 + }, + { + "epoch": 6.59, + "grad_norm": 1.8137493133544922, + "learning_rate": 8.867336683417086e-06, + "loss": 0.027, + "step": 11775 + }, + { + "epoch": 6.6, + "grad_norm": 1.8992966413497925, + "learning_rate": 8.864824120603017e-06, + "loss": 0.0265, + "step": 11800 + }, + { + "epoch": 6.61, + "grad_norm": 1.7295725345611572, + "learning_rate": 8.862311557788944e-06, + "loss": 0.0257, + "step": 11825 + }, + { + "epoch": 6.63, + "grad_norm": 1.847601056098938, + "learning_rate": 8.859798994974875e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.64, + "grad_norm": 1.9641332626342773, + "learning_rate": 8.857286432160805e-06, + "loss": 0.0263, + "step": 11875 + }, + { + "epoch": 6.66, + "grad_norm": 1.715695858001709, + "learning_rate": 8.854773869346734e-06, + "loss": 0.0246, + "step": 11900 + }, + { + "epoch": 6.67, + "grad_norm": 1.8434436321258545, + "learning_rate": 8.852261306532665e-06, + "loss": 0.027, + "step": 11925 + }, + { + "epoch": 6.68, + "grad_norm": 1.5504242181777954, + "learning_rate": 8.849748743718594e-06, + "loss": 0.0262, + "step": 11950 + }, + { + "epoch": 6.7, + "grad_norm": 1.9810289144515991, + "learning_rate": 8.847236180904524e-06, + "loss": 0.0265, + "step": 11975 + }, + { + "epoch": 6.71, + "grad_norm": 1.6186991930007935, + "learning_rate": 8.844723618090453e-06, + "loss": 0.0265, + "step": 12000 + }, + { + "epoch": 6.71, + "eval_loss": 0.09767824411392212, + "eval_runtime": 1544.1555, + "eval_samples_per_second": 0.971, + "eval_steps_per_second": 0.971, + "eval_wer": 25.407827487222573, + "step": 12000 + }, + { + "epoch": 6.73, + "grad_norm": 1.813325047492981, + "learning_rate": 8.842211055276382e-06, + "loss": 0.027, + "step": 12025 + }, + { + "epoch": 6.74, + "grad_norm": 1.896941065788269, + "learning_rate": 8.839698492462312e-06, + "loss": 0.026, + "step": 12050 + }, + { + "epoch": 6.75, + "grad_norm": 1.9059789180755615, + "learning_rate": 8.837185929648243e-06, + "loss": 0.0259, + "step": 12075 + }, + { + "epoch": 6.77, + "grad_norm": 1.596055269241333, + "learning_rate": 8.83467336683417e-06, + "loss": 0.0257, + "step": 12100 + }, + { + "epoch": 6.78, + "grad_norm": 1.7050496339797974, + "learning_rate": 8.832160804020101e-06, + "loss": 0.0271, + "step": 12125 + }, + { + "epoch": 6.8, + "grad_norm": 1.666527271270752, + "learning_rate": 8.82964824120603e-06, + "loss": 0.0263, + "step": 12150 + }, + { + "epoch": 6.81, + "grad_norm": 1.5924322605133057, + "learning_rate": 8.82713567839196e-06, + "loss": 0.0263, + "step": 12175 + }, + { + "epoch": 6.82, + "grad_norm": 1.512611985206604, + "learning_rate": 8.824623115577891e-06, + "loss": 0.0243, + "step": 12200 + }, + { + "epoch": 6.84, + "grad_norm": 2.3868093490600586, + "learning_rate": 8.82211055276382e-06, + "loss": 0.0261, + "step": 12225 + }, + { + "epoch": 6.85, + "grad_norm": 1.4095665216445923, + "learning_rate": 8.81959798994975e-06, + "loss": 0.0268, + "step": 12250 + }, + { + "epoch": 6.87, + "grad_norm": 1.6239408254623413, + "learning_rate": 8.817085427135679e-06, + "loss": 0.025, + "step": 12275 + }, + { + "epoch": 6.88, + "grad_norm": 1.8920422792434692, + "learning_rate": 8.814572864321608e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 6.89, + "grad_norm": 1.8745660781860352, + "learning_rate": 8.812160804020102e-06, + "loss": 0.0264, + "step": 12325 + }, + { + "epoch": 6.91, + "grad_norm": 1.9475117921829224, + "learning_rate": 8.809648241206031e-06, + "loss": 0.0272, + "step": 12350 + }, + { + "epoch": 6.92, + "grad_norm": 1.8490169048309326, + "learning_rate": 8.80713567839196e-06, + "loss": 0.0258, + "step": 12375 + }, + { + "epoch": 6.94, + "grad_norm": 1.6835732460021973, + "learning_rate": 8.804623115577891e-06, + "loss": 0.0274, + "step": 12400 + }, + { + "epoch": 6.95, + "grad_norm": 1.7207229137420654, + "learning_rate": 8.802110552763819e-06, + "loss": 0.0269, + "step": 12425 + }, + { + "epoch": 6.96, + "grad_norm": 1.711963415145874, + "learning_rate": 8.79959798994975e-06, + "loss": 0.025, + "step": 12450 + }, + { + "epoch": 6.98, + "grad_norm": 2.1076231002807617, + "learning_rate": 8.79708542713568e-06, + "loss": 0.0269, + "step": 12475 + }, + { + "epoch": 6.99, + "grad_norm": 1.554739236831665, + "learning_rate": 8.794572864321609e-06, + "loss": 0.0257, + "step": 12500 + }, + { + "epoch": 7.01, + "grad_norm": 1.307066559791565, + "learning_rate": 8.792060301507538e-06, + "loss": 0.0231, + "step": 12525 + }, + { + "epoch": 7.02, + "grad_norm": 1.4923175573349, + "learning_rate": 8.789547738693467e-06, + "loss": 0.0184, + "step": 12550 + }, + { + "epoch": 7.03, + "grad_norm": 1.799414038658142, + "learning_rate": 8.787035175879398e-06, + "loss": 0.0184, + "step": 12575 + }, + { + "epoch": 7.05, + "grad_norm": 1.4363048076629639, + "learning_rate": 8.784522613065328e-06, + "loss": 0.0182, + "step": 12600 + }, + { + "epoch": 7.06, + "grad_norm": 1.7587112188339233, + "learning_rate": 8.782010050251257e-06, + "loss": 0.0179, + "step": 12625 + }, + { + "epoch": 7.07, + "grad_norm": 1.457387924194336, + "learning_rate": 8.779497487437186e-06, + "loss": 0.0184, + "step": 12650 + }, + { + "epoch": 7.09, + "grad_norm": 1.6355394124984741, + "learning_rate": 8.776984924623117e-06, + "loss": 0.0186, + "step": 12675 + }, + { + "epoch": 7.1, + "grad_norm": 1.719256043434143, + "learning_rate": 8.774472361809045e-06, + "loss": 0.0175, + "step": 12700 + }, + { + "epoch": 7.12, + "grad_norm": 1.6304877996444702, + "learning_rate": 8.771959798994976e-06, + "loss": 0.0167, + "step": 12725 + }, + { + "epoch": 7.13, + "grad_norm": 1.9032924175262451, + "learning_rate": 8.769447236180905e-06, + "loss": 0.0172, + "step": 12750 + }, + { + "epoch": 7.14, + "grad_norm": 1.4420280456542969, + "learning_rate": 8.766934673366834e-06, + "loss": 0.0173, + "step": 12775 + }, + { + "epoch": 7.16, + "grad_norm": 1.5082676410675049, + "learning_rate": 8.764422110552765e-06, + "loss": 0.0185, + "step": 12800 + }, + { + "epoch": 7.17, + "grad_norm": 1.89511239528656, + "learning_rate": 8.761909547738693e-06, + "loss": 0.0176, + "step": 12825 + }, + { + "epoch": 7.19, + "grad_norm": 1.8311001062393188, + "learning_rate": 8.759396984924624e-06, + "loss": 0.0182, + "step": 12850 + }, + { + "epoch": 7.2, + "grad_norm": 1.8964591026306152, + "learning_rate": 8.756884422110553e-06, + "loss": 0.0179, + "step": 12875 + }, + { + "epoch": 7.21, + "grad_norm": 1.7208322286605835, + "learning_rate": 8.754371859296483e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.23, + "grad_norm": 1.1779571771621704, + "learning_rate": 8.751859296482412e-06, + "loss": 0.0178, + "step": 12925 + }, + { + "epoch": 7.24, + "grad_norm": 1.6696969270706177, + "learning_rate": 8.749346733668343e-06, + "loss": 0.018, + "step": 12950 + }, + { + "epoch": 7.26, + "grad_norm": 1.5363738536834717, + "learning_rate": 8.746834170854272e-06, + "loss": 0.0174, + "step": 12975 + }, + { + "epoch": 7.27, + "grad_norm": 1.5757735967636108, + "learning_rate": 8.744321608040202e-06, + "loss": 0.0173, + "step": 13000 + }, + { + "epoch": 7.27, + "eval_loss": 0.10644286870956421, + "eval_runtime": 1250.7357, + "eval_samples_per_second": 1.199, + "eval_steps_per_second": 1.199, + "eval_wer": 25.56185675278303, + "step": 13000 + }, + { + "epoch": 7.28, + "grad_norm": 1.2004117965698242, + "learning_rate": 8.741809045226131e-06, + "loss": 0.0177, + "step": 13025 + }, + { + "epoch": 7.3, + "grad_norm": 1.7819678783416748, + "learning_rate": 8.73929648241206e-06, + "loss": 0.0187, + "step": 13050 + }, + { + "epoch": 7.31, + "grad_norm": 1.9206719398498535, + "learning_rate": 8.736783919597991e-06, + "loss": 0.018, + "step": 13075 + }, + { + "epoch": 7.33, + "grad_norm": 1.931118130683899, + "learning_rate": 8.734271356783919e-06, + "loss": 0.0185, + "step": 13100 + }, + { + "epoch": 7.34, + "grad_norm": 1.9506186246871948, + "learning_rate": 8.73175879396985e-06, + "loss": 0.0177, + "step": 13125 + }, + { + "epoch": 7.35, + "grad_norm": 2.123161554336548, + "learning_rate": 8.72924623115578e-06, + "loss": 0.0188, + "step": 13150 + }, + { + "epoch": 7.37, + "grad_norm": 1.6503745317459106, + "learning_rate": 8.726733668341709e-06, + "loss": 0.0195, + "step": 13175 + }, + { + "epoch": 7.38, + "grad_norm": 1.7911384105682373, + "learning_rate": 8.72422110552764e-06, + "loss": 0.018, + "step": 13200 + }, + { + "epoch": 7.4, + "grad_norm": 1.8498948812484741, + "learning_rate": 8.721708542713569e-06, + "loss": 0.0191, + "step": 13225 + }, + { + "epoch": 7.41, + "grad_norm": 1.825381875038147, + "learning_rate": 8.719195979899498e-06, + "loss": 0.018, + "step": 13250 + }, + { + "epoch": 7.42, + "grad_norm": 1.6786212921142578, + "learning_rate": 8.716683417085428e-06, + "loss": 0.0186, + "step": 13275 + }, + { + "epoch": 7.44, + "grad_norm": 1.7535996437072754, + "learning_rate": 8.714170854271357e-06, + "loss": 0.0189, + "step": 13300 + }, + { + "epoch": 7.45, + "grad_norm": 1.445857286453247, + "learning_rate": 8.711658291457286e-06, + "loss": 0.0179, + "step": 13325 + }, + { + "epoch": 7.47, + "grad_norm": 1.8488236665725708, + "learning_rate": 8.709145728643217e-06, + "loss": 0.0197, + "step": 13350 + }, + { + "epoch": 7.48, + "grad_norm": 1.4603967666625977, + "learning_rate": 8.706633165829147e-06, + "loss": 0.0203, + "step": 13375 + }, + { + "epoch": 7.49, + "grad_norm": 1.525683045387268, + "learning_rate": 8.704120603015076e-06, + "loss": 0.0183, + "step": 13400 + }, + { + "epoch": 7.51, + "grad_norm": 1.6381289958953857, + "learning_rate": 8.701608040201005e-06, + "loss": 0.0197, + "step": 13425 + }, + { + "epoch": 7.52, + "grad_norm": 1.4716131687164307, + "learning_rate": 8.699095477386935e-06, + "loss": 0.0196, + "step": 13450 + }, + { + "epoch": 7.54, + "grad_norm": 1.688878059387207, + "learning_rate": 8.696582914572866e-06, + "loss": 0.0183, + "step": 13475 + }, + { + "epoch": 7.55, + "grad_norm": 1.4268814325332642, + "learning_rate": 8.694070351758795e-06, + "loss": 0.0181, + "step": 13500 + }, + { + "epoch": 7.56, + "grad_norm": 1.8417357206344604, + "learning_rate": 8.691557788944724e-06, + "loss": 0.0191, + "step": 13525 + }, + { + "epoch": 7.58, + "grad_norm": 1.6161320209503174, + "learning_rate": 8.689045226130654e-06, + "loss": 0.0183, + "step": 13550 + }, + { + "epoch": 7.59, + "grad_norm": 1.398176670074463, + "learning_rate": 8.686532663316583e-06, + "loss": 0.0197, + "step": 13575 + }, + { + "epoch": 7.61, + "grad_norm": 1.9973474740982056, + "learning_rate": 8.684020100502514e-06, + "loss": 0.0191, + "step": 13600 + }, + { + "epoch": 7.62, + "grad_norm": 1.3835101127624512, + "learning_rate": 8.681507537688443e-06, + "loss": 0.0181, + "step": 13625 + }, + { + "epoch": 7.63, + "grad_norm": 1.814956545829773, + "learning_rate": 8.678994974874373e-06, + "loss": 0.0188, + "step": 13650 + }, + { + "epoch": 7.65, + "grad_norm": 1.386847734451294, + "learning_rate": 8.676482412060302e-06, + "loss": 0.019, + "step": 13675 + }, + { + "epoch": 7.66, + "grad_norm": 1.6018749475479126, + "learning_rate": 8.673969849246231e-06, + "loss": 0.0183, + "step": 13700 + }, + { + "epoch": 7.68, + "grad_norm": 1.71701180934906, + "learning_rate": 8.67145728643216e-06, + "loss": 0.0206, + "step": 13725 + }, + { + "epoch": 7.69, + "grad_norm": 1.40883207321167, + "learning_rate": 8.668944723618092e-06, + "loss": 0.0192, + "step": 13750 + }, + { + "epoch": 7.7, + "grad_norm": 1.1789219379425049, + "learning_rate": 8.666432160804021e-06, + "loss": 0.0193, + "step": 13775 + }, + { + "epoch": 7.72, + "grad_norm": 1.4395115375518799, + "learning_rate": 8.66391959798995e-06, + "loss": 0.0192, + "step": 13800 + }, + { + "epoch": 7.73, + "grad_norm": 2.011089563369751, + "learning_rate": 8.661407035175881e-06, + "loss": 0.0195, + "step": 13825 + }, + { + "epoch": 7.75, + "grad_norm": 1.7866398096084595, + "learning_rate": 8.658894472361809e-06, + "loss": 0.0193, + "step": 13850 + }, + { + "epoch": 7.76, + "grad_norm": 1.6536067724227905, + "learning_rate": 8.65638190954774e-06, + "loss": 0.0188, + "step": 13875 + }, + { + "epoch": 7.77, + "grad_norm": 1.7321090698242188, + "learning_rate": 8.65386934673367e-06, + "loss": 0.0187, + "step": 13900 + }, + { + "epoch": 7.79, + "grad_norm": 1.5572364330291748, + "learning_rate": 8.651356783919599e-06, + "loss": 0.0193, + "step": 13925 + }, + { + "epoch": 7.8, + "grad_norm": 1.6894944906234741, + "learning_rate": 8.648844221105528e-06, + "loss": 0.0187, + "step": 13950 + }, + { + "epoch": 7.82, + "grad_norm": 1.5226629972457886, + "learning_rate": 8.646331658291457e-06, + "loss": 0.0178, + "step": 13975 + }, + { + "epoch": 7.83, + "grad_norm": 1.7922390699386597, + "learning_rate": 8.643819095477388e-06, + "loss": 0.0195, + "step": 14000 + }, + { + "epoch": 7.83, + "eval_loss": 0.107501320540905, + "eval_runtime": 1255.7901, + "eval_samples_per_second": 1.194, + "eval_steps_per_second": 1.194, + "eval_wer": 26.423020373871037, + "step": 14000 + }, + { + "epoch": 7.84, + "grad_norm": 1.9192249774932861, + "learning_rate": 8.641306532663318e-06, + "loss": 0.0193, + "step": 14025 + }, + { + "epoch": 7.86, + "grad_norm": 1.9778767824172974, + "learning_rate": 8.638793969849247e-06, + "loss": 0.019, + "step": 14050 + }, + { + "epoch": 7.87, + "grad_norm": 1.847006916999817, + "learning_rate": 8.636281407035176e-06, + "loss": 0.019, + "step": 14075 + }, + { + "epoch": 7.89, + "grad_norm": 1.6009154319763184, + "learning_rate": 8.633768844221107e-06, + "loss": 0.019, + "step": 14100 + }, + { + "epoch": 7.9, + "grad_norm": 1.8647724390029907, + "learning_rate": 8.631256281407035e-06, + "loss": 0.0194, + "step": 14125 + }, + { + "epoch": 7.91, + "grad_norm": 1.5168023109436035, + "learning_rate": 8.628743718592966e-06, + "loss": 0.0205, + "step": 14150 + }, + { + "epoch": 7.93, + "grad_norm": 1.575461506843567, + "learning_rate": 8.626231155778895e-06, + "loss": 0.0188, + "step": 14175 + }, + { + "epoch": 7.94, + "grad_norm": 1.7249157428741455, + "learning_rate": 8.623718592964825e-06, + "loss": 0.0187, + "step": 14200 + }, + { + "epoch": 7.96, + "grad_norm": 1.7031468152999878, + "learning_rate": 8.621206030150756e-06, + "loss": 0.0197, + "step": 14225 + }, + { + "epoch": 7.97, + "grad_norm": 1.5277540683746338, + "learning_rate": 8.618693467336683e-06, + "loss": 0.0183, + "step": 14250 + }, + { + "epoch": 7.98, + "grad_norm": 1.483960747718811, + "learning_rate": 8.616180904522614e-06, + "loss": 0.0183, + "step": 14275 + }, + { + "epoch": 8.0, + "grad_norm": 1.9222930669784546, + "learning_rate": 8.613668341708544e-06, + "loss": 0.0192, + "step": 14300 + }, + { + "epoch": 8.01, + "grad_norm": 1.2624781131744385, + "learning_rate": 8.611155778894473e-06, + "loss": 0.0131, + "step": 14325 + }, + { + "epoch": 8.03, + "grad_norm": 1.2885479927062988, + "learning_rate": 8.608643216080402e-06, + "loss": 0.0115, + "step": 14350 + }, + { + "epoch": 8.04, + "grad_norm": 1.4258344173431396, + "learning_rate": 8.606130653266333e-06, + "loss": 0.0126, + "step": 14375 + }, + { + "epoch": 8.05, + "grad_norm": 1.5901182889938354, + "learning_rate": 8.60361809045226e-06, + "loss": 0.0135, + "step": 14400 + }, + { + "epoch": 8.07, + "grad_norm": 1.3778425455093384, + "learning_rate": 8.601105527638192e-06, + "loss": 0.012, + "step": 14425 + }, + { + "epoch": 8.08, + "grad_norm": 1.6377310752868652, + "learning_rate": 8.598592964824121e-06, + "loss": 0.0121, + "step": 14450 + }, + { + "epoch": 8.1, + "grad_norm": 1.2959672212600708, + "learning_rate": 8.59608040201005e-06, + "loss": 0.0117, + "step": 14475 + }, + { + "epoch": 8.11, + "grad_norm": 1.87339448928833, + "learning_rate": 8.593668341708544e-06, + "loss": 0.0139, + "step": 14500 + }, + { + "epoch": 8.12, + "grad_norm": 1.5233855247497559, + "learning_rate": 8.591155778894473e-06, + "loss": 0.0128, + "step": 14525 + }, + { + "epoch": 8.14, + "grad_norm": 1.1843684911727905, + "learning_rate": 8.588643216080402e-06, + "loss": 0.0125, + "step": 14550 + }, + { + "epoch": 8.15, + "grad_norm": 1.488094449043274, + "learning_rate": 8.586130653266332e-06, + "loss": 0.013, + "step": 14575 + }, + { + "epoch": 8.17, + "grad_norm": 1.7795379161834717, + "learning_rate": 8.583618090452261e-06, + "loss": 0.0126, + "step": 14600 + }, + { + "epoch": 8.18, + "grad_norm": 1.287470817565918, + "learning_rate": 8.581105527638192e-06, + "loss": 0.0128, + "step": 14625 + }, + { + "epoch": 8.19, + "grad_norm": 1.567731261253357, + "learning_rate": 8.578592964824121e-06, + "loss": 0.0125, + "step": 14650 + }, + { + "epoch": 8.21, + "grad_norm": 1.5656017065048218, + "learning_rate": 8.57608040201005e-06, + "loss": 0.0129, + "step": 14675 + }, + { + "epoch": 8.22, + "grad_norm": 1.6254287958145142, + "learning_rate": 8.573567839195982e-06, + "loss": 0.0128, + "step": 14700 + }, + { + "epoch": 8.24, + "grad_norm": 1.6085509061813354, + "learning_rate": 8.57105527638191e-06, + "loss": 0.0133, + "step": 14725 + }, + { + "epoch": 8.25, + "grad_norm": 1.7630277872085571, + "learning_rate": 8.56854271356784e-06, + "loss": 0.0139, + "step": 14750 + }, + { + "epoch": 8.26, + "grad_norm": 1.7692389488220215, + "learning_rate": 8.56603015075377e-06, + "loss": 0.0131, + "step": 14775 + }, + { + "epoch": 8.28, + "grad_norm": 1.5621066093444824, + "learning_rate": 8.563517587939699e-06, + "loss": 0.0142, + "step": 14800 + }, + { + "epoch": 8.29, + "grad_norm": 1.638834834098816, + "learning_rate": 8.56100502512563e-06, + "loss": 0.0126, + "step": 14825 + }, + { + "epoch": 8.31, + "grad_norm": 1.44390070438385, + "learning_rate": 8.558492462311558e-06, + "loss": 0.0137, + "step": 14850 + }, + { + "epoch": 8.32, + "grad_norm": 1.1245650053024292, + "learning_rate": 8.555979899497489e-06, + "loss": 0.0135, + "step": 14875 + }, + { + "epoch": 8.33, + "grad_norm": 1.2431079149246216, + "learning_rate": 8.553467336683418e-06, + "loss": 0.0133, + "step": 14900 + }, + { + "epoch": 8.35, + "grad_norm": 1.6766127347946167, + "learning_rate": 8.550954773869347e-06, + "loss": 0.0132, + "step": 14925 + }, + { + "epoch": 8.36, + "grad_norm": 1.9637749195098877, + "learning_rate": 8.548442211055277e-06, + "loss": 0.0137, + "step": 14950 + }, + { + "epoch": 8.38, + "grad_norm": 1.3625088930130005, + "learning_rate": 8.54603015075377e-06, + "loss": 0.0127, + "step": 14975 + }, + { + "epoch": 8.39, + "grad_norm": 1.1293421983718872, + "learning_rate": 8.5435175879397e-06, + "loss": 0.0139, + "step": 15000 + }, + { + "epoch": 8.39, + "eval_loss": 0.11623761057853699, + "eval_runtime": 1248.4909, + "eval_samples_per_second": 1.201, + "eval_steps_per_second": 1.201, + "eval_wer": 25.16978225862914, + "step": 15000 + }, + { + "epoch": 8.4, + "grad_norm": 1.4315195083618164, + "learning_rate": 8.54100502512563e-06, + "loss": 0.0139, + "step": 15025 + }, + { + "epoch": 8.42, + "grad_norm": 1.5422781705856323, + "learning_rate": 8.538492462311558e-06, + "loss": 0.0144, + "step": 15050 + }, + { + "epoch": 8.43, + "grad_norm": 1.5740288496017456, + "learning_rate": 8.535979899497489e-06, + "loss": 0.0148, + "step": 15075 + }, + { + "epoch": 8.45, + "grad_norm": 1.6422525644302368, + "learning_rate": 8.533467336683418e-06, + "loss": 0.0133, + "step": 15100 + }, + { + "epoch": 8.46, + "grad_norm": 1.862137794494629, + "learning_rate": 8.530954773869347e-06, + "loss": 0.0124, + "step": 15125 + }, + { + "epoch": 8.47, + "grad_norm": 1.6580989360809326, + "learning_rate": 8.528442211055277e-06, + "loss": 0.0135, + "step": 15150 + }, + { + "epoch": 8.49, + "grad_norm": 1.8225072622299194, + "learning_rate": 8.525929648241206e-06, + "loss": 0.0135, + "step": 15175 + }, + { + "epoch": 8.5, + "grad_norm": 1.4441477060317993, + "learning_rate": 8.523417085427135e-06, + "loss": 0.0127, + "step": 15200 + }, + { + "epoch": 8.52, + "grad_norm": 1.7101575136184692, + "learning_rate": 8.520904522613066e-06, + "loss": 0.0139, + "step": 15225 + }, + { + "epoch": 8.53, + "grad_norm": 1.7326629161834717, + "learning_rate": 8.518391959798996e-06, + "loss": 0.0132, + "step": 15250 + }, + { + "epoch": 8.54, + "grad_norm": 1.2983744144439697, + "learning_rate": 8.515879396984925e-06, + "loss": 0.0135, + "step": 15275 + }, + { + "epoch": 8.56, + "grad_norm": 1.4976806640625, + "learning_rate": 8.513366834170856e-06, + "loss": 0.0137, + "step": 15300 + }, + { + "epoch": 8.57, + "grad_norm": 1.4674962759017944, + "learning_rate": 8.510854271356784e-06, + "loss": 0.0133, + "step": 15325 + }, + { + "epoch": 8.59, + "grad_norm": 1.4622610807418823, + "learning_rate": 8.508341708542715e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.6, + "grad_norm": 1.3362663984298706, + "learning_rate": 8.505829145728644e-06, + "loss": 0.0136, + "step": 15375 + }, + { + "epoch": 8.61, + "grad_norm": 1.571964144706726, + "learning_rate": 8.503316582914573e-06, + "loss": 0.0137, + "step": 15400 + }, + { + "epoch": 8.63, + "grad_norm": 1.281298279762268, + "learning_rate": 8.500804020100504e-06, + "loss": 0.013, + "step": 15425 + }, + { + "epoch": 8.64, + "grad_norm": 1.6459954977035522, + "learning_rate": 8.498291457286432e-06, + "loss": 0.0139, + "step": 15450 + }, + { + "epoch": 8.65, + "grad_norm": 1.8181471824645996, + "learning_rate": 8.495778894472363e-06, + "loss": 0.0141, + "step": 15475 + }, + { + "epoch": 8.67, + "grad_norm": 1.6314300298690796, + "learning_rate": 8.493266331658292e-06, + "loss": 0.0134, + "step": 15500 + }, + { + "epoch": 8.68, + "grad_norm": 1.6022205352783203, + "learning_rate": 8.490753768844222e-06, + "loss": 0.0122, + "step": 15525 + }, + { + "epoch": 8.7, + "grad_norm": 2.033066511154175, + "learning_rate": 8.488241206030151e-06, + "loss": 0.0137, + "step": 15550 + }, + { + "epoch": 8.71, + "grad_norm": 1.8259072303771973, + "learning_rate": 8.485728643216082e-06, + "loss": 0.0134, + "step": 15575 + }, + { + "epoch": 8.72, + "grad_norm": 1.478550672531128, + "learning_rate": 8.48321608040201e-06, + "loss": 0.0144, + "step": 15600 + }, + { + "epoch": 8.74, + "grad_norm": 1.3908416032791138, + "learning_rate": 8.48070351758794e-06, + "loss": 0.0138, + "step": 15625 + }, + { + "epoch": 8.75, + "grad_norm": 1.4211598634719849, + "learning_rate": 8.47819095477387e-06, + "loss": 0.0144, + "step": 15650 + }, + { + "epoch": 8.77, + "grad_norm": 1.4250924587249756, + "learning_rate": 8.4756783919598e-06, + "loss": 0.0136, + "step": 15675 + }, + { + "epoch": 8.78, + "grad_norm": 1.8028937578201294, + "learning_rate": 8.47316582914573e-06, + "loss": 0.0136, + "step": 15700 + }, + { + "epoch": 8.79, + "grad_norm": 1.6728827953338623, + "learning_rate": 8.470653266331658e-06, + "loss": 0.0134, + "step": 15725 + }, + { + "epoch": 8.81, + "grad_norm": 1.2716115713119507, + "learning_rate": 8.468140703517589e-06, + "loss": 0.0135, + "step": 15750 + }, + { + "epoch": 8.82, + "grad_norm": 1.5330822467803955, + "learning_rate": 8.465628140703518e-06, + "loss": 0.014, + "step": 15775 + }, + { + "epoch": 8.84, + "grad_norm": 1.5610933303833008, + "learning_rate": 8.463115577889448e-06, + "loss": 0.0146, + "step": 15800 + }, + { + "epoch": 8.85, + "grad_norm": 1.4767587184906006, + "learning_rate": 8.460603015075377e-06, + "loss": 0.0142, + "step": 15825 + }, + { + "epoch": 8.86, + "grad_norm": 1.5992600917816162, + "learning_rate": 8.458090452261308e-06, + "loss": 0.0136, + "step": 15850 + }, + { + "epoch": 8.88, + "grad_norm": 1.625845193862915, + "learning_rate": 8.455577889447237e-06, + "loss": 0.0135, + "step": 15875 + }, + { + "epoch": 8.89, + "grad_norm": 1.4823307991027832, + "learning_rate": 8.453065326633167e-06, + "loss": 0.0143, + "step": 15900 + }, + { + "epoch": 8.91, + "grad_norm": 1.5383321046829224, + "learning_rate": 8.450552763819096e-06, + "loss": 0.0139, + "step": 15925 + }, + { + "epoch": 8.92, + "grad_norm": 1.2531863451004028, + "learning_rate": 8.448040201005025e-06, + "loss": 0.0134, + "step": 15950 + }, + { + "epoch": 8.93, + "grad_norm": 1.6139166355133057, + "learning_rate": 8.445527638190956e-06, + "loss": 0.0139, + "step": 15975 + }, + { + "epoch": 8.95, + "grad_norm": 1.510171890258789, + "learning_rate": 8.443015075376884e-06, + "loss": 0.0134, + "step": 16000 + }, + { + "epoch": 8.95, + "eval_loss": 0.12022976577281952, + "eval_runtime": 1263.3302, + "eval_samples_per_second": 1.187, + "eval_steps_per_second": 1.187, + "eval_wer": 26.01694321921165, + "step": 16000 + }, + { + "epoch": 8.96, + "grad_norm": 2.0292775630950928, + "learning_rate": 8.440502512562815e-06, + "loss": 0.0137, + "step": 16025 + }, + { + "epoch": 8.98, + "grad_norm": 2.210850954055786, + "learning_rate": 8.437989949748744e-06, + "loss": 0.014, + "step": 16050 + }, + { + "epoch": 8.99, + "grad_norm": 2.1284592151641846, + "learning_rate": 8.435477386934674e-06, + "loss": 0.0138, + "step": 16075 + }, + { + "epoch": 9.0, + "grad_norm": 1.2880257368087769, + "learning_rate": 8.432964824120605e-06, + "loss": 0.012, + "step": 16100 + }, + { + "epoch": 9.02, + "grad_norm": 1.1273249387741089, + "learning_rate": 8.430452261306534e-06, + "loss": 0.0085, + "step": 16125 + }, + { + "epoch": 9.03, + "grad_norm": 0.9629200100898743, + "learning_rate": 8.427939698492463e-06, + "loss": 0.0087, + "step": 16150 + }, + { + "epoch": 9.05, + "grad_norm": 1.1696195602416992, + "learning_rate": 8.425427135678393e-06, + "loss": 0.0095, + "step": 16175 + }, + { + "epoch": 9.06, + "grad_norm": 1.1937648057937622, + "learning_rate": 8.422914572864322e-06, + "loss": 0.0088, + "step": 16200 + }, + { + "epoch": 9.07, + "grad_norm": 1.186058759689331, + "learning_rate": 8.420402010050251e-06, + "loss": 0.0088, + "step": 16225 + }, + { + "epoch": 9.09, + "grad_norm": 1.2673548460006714, + "learning_rate": 8.417889447236182e-06, + "loss": 0.0093, + "step": 16250 + }, + { + "epoch": 9.1, + "grad_norm": 1.0217103958129883, + "learning_rate": 8.415376884422112e-06, + "loss": 0.009, + "step": 16275 + }, + { + "epoch": 9.12, + "grad_norm": 1.6276463270187378, + "learning_rate": 8.412864321608041e-06, + "loss": 0.0094, + "step": 16300 + }, + { + "epoch": 9.13, + "grad_norm": 0.9528993964195251, + "learning_rate": 8.41035175879397e-06, + "loss": 0.0086, + "step": 16325 + }, + { + "epoch": 9.14, + "grad_norm": 1.4897090196609497, + "learning_rate": 8.4078391959799e-06, + "loss": 0.0096, + "step": 16350 + }, + { + "epoch": 9.16, + "grad_norm": 1.4035155773162842, + "learning_rate": 8.40532663316583e-06, + "loss": 0.0095, + "step": 16375 + }, + { + "epoch": 9.17, + "grad_norm": 1.117077112197876, + "learning_rate": 8.40281407035176e-06, + "loss": 0.0084, + "step": 16400 + }, + { + "epoch": 9.19, + "grad_norm": 1.4381709098815918, + "learning_rate": 8.40030150753769e-06, + "loss": 0.0094, + "step": 16425 + }, + { + "epoch": 9.2, + "grad_norm": 2.295844078063965, + "learning_rate": 8.397788944723619e-06, + "loss": 0.011, + "step": 16450 + }, + { + "epoch": 9.21, + "grad_norm": 1.4901695251464844, + "learning_rate": 8.395276381909548e-06, + "loss": 0.0104, + "step": 16475 + }, + { + "epoch": 9.23, + "grad_norm": 2.0876200199127197, + "learning_rate": 8.392763819095479e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.24, + "grad_norm": 1.1527096033096313, + "learning_rate": 8.390251256281408e-06, + "loss": 0.0089, + "step": 16525 + }, + { + "epoch": 9.26, + "grad_norm": 1.299315094947815, + "learning_rate": 8.387738693467338e-06, + "loss": 0.01, + "step": 16550 + }, + { + "epoch": 9.27, + "grad_norm": 1.179197072982788, + "learning_rate": 8.385226130653267e-06, + "loss": 0.0084, + "step": 16575 + }, + { + "epoch": 9.28, + "grad_norm": 1.4978837966918945, + "learning_rate": 8.382713567839196e-06, + "loss": 0.0103, + "step": 16600 + }, + { + "epoch": 9.3, + "grad_norm": 1.2782052755355835, + "learning_rate": 8.380201005025126e-06, + "loss": 0.0092, + "step": 16625 + }, + { + "epoch": 9.31, + "grad_norm": 1.43800687789917, + "learning_rate": 8.377688442211057e-06, + "loss": 0.0099, + "step": 16650 + }, + { + "epoch": 9.33, + "grad_norm": 1.925214171409607, + "learning_rate": 8.375175879396986e-06, + "loss": 0.0097, + "step": 16675 + }, + { + "epoch": 9.34, + "grad_norm": 1.757615089416504, + "learning_rate": 8.372663316582915e-06, + "loss": 0.0095, + "step": 16700 + }, + { + "epoch": 9.35, + "grad_norm": 1.2906619310379028, + "learning_rate": 8.370150753768845e-06, + "loss": 0.01, + "step": 16725 + }, + { + "epoch": 9.37, + "grad_norm": 1.38942289352417, + "learning_rate": 8.367638190954774e-06, + "loss": 0.0097, + "step": 16750 + }, + { + "epoch": 9.38, + "grad_norm": 1.7287613153457642, + "learning_rate": 8.365125628140705e-06, + "loss": 0.0098, + "step": 16775 + }, + { + "epoch": 9.4, + "grad_norm": 1.5018854141235352, + "learning_rate": 8.362613065326634e-06, + "loss": 0.0097, + "step": 16800 + }, + { + "epoch": 9.41, + "grad_norm": 1.5454444885253906, + "learning_rate": 8.360100502512563e-06, + "loss": 0.0105, + "step": 16825 + }, + { + "epoch": 9.42, + "grad_norm": 1.5580748319625854, + "learning_rate": 8.357587939698493e-06, + "loss": 0.0096, + "step": 16850 + }, + { + "epoch": 9.44, + "grad_norm": 1.2850528955459595, + "learning_rate": 8.355075376884422e-06, + "loss": 0.0098, + "step": 16875 + }, + { + "epoch": 9.45, + "grad_norm": 1.6075043678283691, + "learning_rate": 8.352562814070353e-06, + "loss": 0.0106, + "step": 16900 + }, + { + "epoch": 9.47, + "grad_norm": 1.6207281351089478, + "learning_rate": 8.350050251256282e-06, + "loss": 0.01, + "step": 16925 + }, + { + "epoch": 9.48, + "grad_norm": 1.1454306840896606, + "learning_rate": 8.347537688442212e-06, + "loss": 0.0097, + "step": 16950 + }, + { + "epoch": 9.49, + "grad_norm": 1.3012179136276245, + "learning_rate": 8.345025125628141e-06, + "loss": 0.01, + "step": 16975 + }, + { + "epoch": 9.51, + "grad_norm": 1.6009056568145752, + "learning_rate": 8.34251256281407e-06, + "loss": 0.01, + "step": 17000 + }, + { + "epoch": 9.51, + "eval_loss": 0.12827400863170624, + "eval_runtime": 1269.7762, + "eval_samples_per_second": 1.181, + "eval_steps_per_second": 1.181, + "eval_wer": 25.981936567947912, + "step": 17000 + }, + { + "epoch": 9.52, + "grad_norm": 1.3005088567733765, + "learning_rate": 8.34e-06, + "loss": 0.0094, + "step": 17025 + }, + { + "epoch": 9.54, + "grad_norm": 1.4232419729232788, + "learning_rate": 8.33748743718593e-06, + "loss": 0.0099, + "step": 17050 + }, + { + "epoch": 9.55, + "grad_norm": 1.7481540441513062, + "learning_rate": 8.33497487437186e-06, + "loss": 0.0093, + "step": 17075 + }, + { + "epoch": 9.56, + "grad_norm": 1.8427752256393433, + "learning_rate": 8.33246231155779e-06, + "loss": 0.0106, + "step": 17100 + }, + { + "epoch": 9.58, + "grad_norm": 1.2959924936294556, + "learning_rate": 8.32994974874372e-06, + "loss": 0.0096, + "step": 17125 + }, + { + "epoch": 9.59, + "grad_norm": 1.1869925260543823, + "learning_rate": 8.327437185929648e-06, + "loss": 0.0105, + "step": 17150 + }, + { + "epoch": 9.61, + "grad_norm": 1.8451534509658813, + "learning_rate": 8.324924623115579e-06, + "loss": 0.0098, + "step": 17175 + }, + { + "epoch": 9.62, + "grad_norm": 1.6278190612792969, + "learning_rate": 8.322412060301508e-06, + "loss": 0.0095, + "step": 17200 + }, + { + "epoch": 9.63, + "grad_norm": 1.4719972610473633, + "learning_rate": 8.319899497487438e-06, + "loss": 0.0095, + "step": 17225 + }, + { + "epoch": 9.65, + "grad_norm": 1.4860060214996338, + "learning_rate": 8.317386934673367e-06, + "loss": 0.0095, + "step": 17250 + }, + { + "epoch": 9.66, + "grad_norm": 1.251642107963562, + "learning_rate": 8.314874371859298e-06, + "loss": 0.0113, + "step": 17275 + }, + { + "epoch": 9.68, + "grad_norm": 1.6547269821166992, + "learning_rate": 8.312361809045226e-06, + "loss": 0.0102, + "step": 17300 + }, + { + "epoch": 9.69, + "grad_norm": 1.5958502292633057, + "learning_rate": 8.309849246231157e-06, + "loss": 0.0104, + "step": 17325 + }, + { + "epoch": 9.7, + "grad_norm": 1.3772475719451904, + "learning_rate": 8.307336683417086e-06, + "loss": 0.01, + "step": 17350 + }, + { + "epoch": 9.72, + "grad_norm": 1.5301170349121094, + "learning_rate": 8.304824120603015e-06, + "loss": 0.0109, + "step": 17375 + }, + { + "epoch": 9.73, + "grad_norm": 1.4869940280914307, + "learning_rate": 8.302311557788946e-06, + "loss": 0.0105, + "step": 17400 + }, + { + "epoch": 9.75, + "grad_norm": 1.6237093210220337, + "learning_rate": 8.299798994974874e-06, + "loss": 0.0105, + "step": 17425 + }, + { + "epoch": 9.76, + "grad_norm": 1.461899757385254, + "learning_rate": 8.297286432160805e-06, + "loss": 0.0102, + "step": 17450 + }, + { + "epoch": 9.77, + "grad_norm": 2.14113450050354, + "learning_rate": 8.294773869346734e-06, + "loss": 0.0103, + "step": 17475 + }, + { + "epoch": 9.79, + "grad_norm": 1.672645926475525, + "learning_rate": 8.292261306532664e-06, + "loss": 0.01, + "step": 17500 + }, + { + "epoch": 9.8, + "grad_norm": 1.068429946899414, + "learning_rate": 8.289748743718595e-06, + "loss": 0.0094, + "step": 17525 + }, + { + "epoch": 9.82, + "grad_norm": 1.8176536560058594, + "learning_rate": 8.287236180904524e-06, + "loss": 0.0107, + "step": 17550 + }, + { + "epoch": 9.83, + "grad_norm": 1.489044189453125, + "learning_rate": 8.284723618090453e-06, + "loss": 0.0102, + "step": 17575 + }, + { + "epoch": 9.84, + "grad_norm": 1.374991536140442, + "learning_rate": 8.282211055276383e-06, + "loss": 0.0104, + "step": 17600 + }, + { + "epoch": 9.86, + "grad_norm": 1.9951503276824951, + "learning_rate": 8.279698492462312e-06, + "loss": 0.0101, + "step": 17625 + }, + { + "epoch": 9.87, + "grad_norm": 1.7742674350738525, + "learning_rate": 8.277185929648241e-06, + "loss": 0.0108, + "step": 17650 + }, + { + "epoch": 9.89, + "grad_norm": 1.54373300075531, + "learning_rate": 8.274673366834172e-06, + "loss": 0.0108, + "step": 17675 + }, + { + "epoch": 9.9, + "grad_norm": 1.442535400390625, + "learning_rate": 8.2721608040201e-06, + "loss": 0.0098, + "step": 17700 + }, + { + "epoch": 9.91, + "grad_norm": 1.58523428440094, + "learning_rate": 8.269648241206031e-06, + "loss": 0.0104, + "step": 17725 + }, + { + "epoch": 9.93, + "grad_norm": 1.551440954208374, + "learning_rate": 8.26713567839196e-06, + "loss": 0.0101, + "step": 17750 + }, + { + "epoch": 9.94, + "grad_norm": 1.3735955953598022, + "learning_rate": 8.26462311557789e-06, + "loss": 0.0101, + "step": 17775 + }, + { + "epoch": 9.96, + "grad_norm": 1.8585009574890137, + "learning_rate": 8.26211055276382e-06, + "loss": 0.0103, + "step": 17800 + }, + { + "epoch": 9.97, + "grad_norm": 1.6317896842956543, + "learning_rate": 8.25959798994975e-06, + "loss": 0.0097, + "step": 17825 + }, + { + "epoch": 9.98, + "grad_norm": 1.6716476678848267, + "learning_rate": 8.25708542713568e-06, + "loss": 0.0103, + "step": 17850 + }, + { + "epoch": 10.0, + "grad_norm": 1.3141593933105469, + "learning_rate": 8.254572864321609e-06, + "loss": 0.0097, + "step": 17875 + }, + { + "epoch": 10.01, + "grad_norm": 1.5377665758132935, + "learning_rate": 8.252060301507538e-06, + "loss": 0.0073, + "step": 17900 + }, + { + "epoch": 10.03, + "grad_norm": 1.3977289199829102, + "learning_rate": 8.249547738693467e-06, + "loss": 0.0064, + "step": 17925 + }, + { + "epoch": 10.04, + "grad_norm": 1.2745766639709473, + "learning_rate": 8.247035175879398e-06, + "loss": 0.0064, + "step": 17950 + }, + { + "epoch": 10.05, + "grad_norm": 1.3880548477172852, + "learning_rate": 8.244522613065328e-06, + "loss": 0.0067, + "step": 17975 + }, + { + "epoch": 10.07, + "grad_norm": 1.0573828220367432, + "learning_rate": 8.242010050251257e-06, + "loss": 0.007, + "step": 18000 + }, + { + "epoch": 10.07, + "eval_loss": 0.13322582840919495, + "eval_runtime": 1266.9247, + "eval_samples_per_second": 1.184, + "eval_steps_per_second": 1.184, + "eval_wer": 26.13596583350837, + "step": 18000 + }, + { + "epoch": 10.08, + "grad_norm": 0.9800087213516235, + "learning_rate": 8.239497487437186e-06, + "loss": 0.0067, + "step": 18025 + }, + { + "epoch": 10.1, + "grad_norm": 1.3136003017425537, + "learning_rate": 8.236984924623116e-06, + "loss": 0.0072, + "step": 18050 + }, + { + "epoch": 10.11, + "grad_norm": 2.4294538497924805, + "learning_rate": 8.234472361809047e-06, + "loss": 0.0071, + "step": 18075 + }, + { + "epoch": 10.12, + "grad_norm": 1.492241382598877, + "learning_rate": 8.231959798994976e-06, + "loss": 0.0067, + "step": 18100 + }, + { + "epoch": 10.14, + "grad_norm": 1.0879470109939575, + "learning_rate": 8.229447236180905e-06, + "loss": 0.0065, + "step": 18125 + }, + { + "epoch": 10.15, + "grad_norm": 0.8511327505111694, + "learning_rate": 8.226934673366835e-06, + "loss": 0.0068, + "step": 18150 + }, + { + "epoch": 10.16, + "grad_norm": 1.3865950107574463, + "learning_rate": 8.224422110552764e-06, + "loss": 0.007, + "step": 18175 + }, + { + "epoch": 10.18, + "grad_norm": 1.5761487483978271, + "learning_rate": 8.221909547738695e-06, + "loss": 0.0065, + "step": 18200 + }, + { + "epoch": 10.19, + "grad_norm": 1.1414053440093994, + "learning_rate": 8.219396984924624e-06, + "loss": 0.007, + "step": 18225 + }, + { + "epoch": 10.21, + "grad_norm": 1.3173131942749023, + "learning_rate": 8.216984924623116e-06, + "loss": 0.0066, + "step": 18250 + }, + { + "epoch": 10.22, + "grad_norm": 1.83867609500885, + "learning_rate": 8.214472361809047e-06, + "loss": 0.0074, + "step": 18275 + }, + { + "epoch": 10.23, + "grad_norm": 0.9537453651428223, + "learning_rate": 8.211959798994974e-06, + "loss": 0.006, + "step": 18300 + }, + { + "epoch": 10.25, + "grad_norm": 1.2373839616775513, + "learning_rate": 8.209447236180905e-06, + "loss": 0.0073, + "step": 18325 + }, + { + "epoch": 10.26, + "grad_norm": 1.6300586462020874, + "learning_rate": 8.206934673366835e-06, + "loss": 0.0069, + "step": 18350 + }, + { + "epoch": 10.28, + "grad_norm": 0.9293125867843628, + "learning_rate": 8.204422110552764e-06, + "loss": 0.0079, + "step": 18375 + }, + { + "epoch": 10.29, + "grad_norm": 1.9881733655929565, + "learning_rate": 8.201909547738695e-06, + "loss": 0.0071, + "step": 18400 + }, + { + "epoch": 10.3, + "grad_norm": 1.0632404088974, + "learning_rate": 8.199396984924623e-06, + "loss": 0.0068, + "step": 18425 + }, + { + "epoch": 10.32, + "grad_norm": 1.6890215873718262, + "learning_rate": 8.196884422110554e-06, + "loss": 0.007, + "step": 18450 + }, + { + "epoch": 10.33, + "grad_norm": 1.7216650247573853, + "learning_rate": 8.194371859296483e-06, + "loss": 0.0069, + "step": 18475 + }, + { + "epoch": 10.35, + "grad_norm": 1.3629086017608643, + "learning_rate": 8.191859296482412e-06, + "loss": 0.0076, + "step": 18500 + }, + { + "epoch": 10.36, + "grad_norm": 1.6645286083221436, + "learning_rate": 8.189346733668342e-06, + "loss": 0.0081, + "step": 18525 + }, + { + "epoch": 10.37, + "grad_norm": 1.1869726181030273, + "learning_rate": 8.186834170854273e-06, + "loss": 0.0078, + "step": 18550 + }, + { + "epoch": 10.39, + "grad_norm": 1.2777962684631348, + "learning_rate": 8.184321608040202e-06, + "loss": 0.0073, + "step": 18575 + }, + { + "epoch": 10.4, + "grad_norm": 1.689989686012268, + "learning_rate": 8.181809045226131e-06, + "loss": 0.0074, + "step": 18600 + }, + { + "epoch": 10.42, + "grad_norm": 1.9431265592575073, + "learning_rate": 8.17929648241206e-06, + "loss": 0.0074, + "step": 18625 + }, + { + "epoch": 10.43, + "grad_norm": 1.4865610599517822, + "learning_rate": 8.17678391959799e-06, + "loss": 0.007, + "step": 18650 + }, + { + "epoch": 10.44, + "grad_norm": 1.4271529912948608, + "learning_rate": 8.174271356783921e-06, + "loss": 0.0077, + "step": 18675 + }, + { + "epoch": 10.46, + "grad_norm": 1.168408751487732, + "learning_rate": 8.171758793969849e-06, + "loss": 0.0073, + "step": 18700 + }, + { + "epoch": 10.47, + "grad_norm": 1.581976056098938, + "learning_rate": 8.16924623115578e-06, + "loss": 0.0079, + "step": 18725 + }, + { + "epoch": 10.49, + "grad_norm": 1.1006019115447998, + "learning_rate": 8.166733668341709e-06, + "loss": 0.0071, + "step": 18750 + }, + { + "epoch": 10.5, + "grad_norm": 1.4304567575454712, + "learning_rate": 8.164221105527638e-06, + "loss": 0.0075, + "step": 18775 + }, + { + "epoch": 10.51, + "grad_norm": 1.6005663871765137, + "learning_rate": 8.16170854271357e-06, + "loss": 0.007, + "step": 18800 + }, + { + "epoch": 10.53, + "grad_norm": 1.4893625974655151, + "learning_rate": 8.159195979899499e-06, + "loss": 0.0066, + "step": 18825 + }, + { + "epoch": 10.54, + "grad_norm": 1.077237844467163, + "learning_rate": 8.156683417085428e-06, + "loss": 0.007, + "step": 18850 + }, + { + "epoch": 10.56, + "grad_norm": 1.4600884914398193, + "learning_rate": 8.154170854271357e-06, + "loss": 0.0078, + "step": 18875 + }, + { + "epoch": 10.57, + "grad_norm": 1.8610810041427612, + "learning_rate": 8.151658291457287e-06, + "loss": 0.0083, + "step": 18900 + }, + { + "epoch": 10.58, + "grad_norm": 1.4527398347854614, + "learning_rate": 8.149145728643216e-06, + "loss": 0.0079, + "step": 18925 + }, + { + "epoch": 10.6, + "grad_norm": 1.2474983930587769, + "learning_rate": 8.146633165829147e-06, + "loss": 0.0075, + "step": 18950 + }, + { + "epoch": 10.61, + "grad_norm": 1.0266172885894775, + "learning_rate": 8.144120603015076e-06, + "loss": 0.0071, + "step": 18975 + }, + { + "epoch": 10.63, + "grad_norm": 1.6183582544326782, + "learning_rate": 8.141608040201006e-06, + "loss": 0.0073, + "step": 19000 + }, + { + "epoch": 10.63, + "eval_loss": 0.13868772983551025, + "eval_runtime": 1263.8628, + "eval_samples_per_second": 1.187, + "eval_steps_per_second": 1.187, + "eval_wer": 25.90492193516768, + "step": 19000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 56, + "save_steps": 1000, + "total_flos": 5.91367500988416e+19, + "train_batch_size": 48, + "trial_name": null, + "trial_params": null +}