{ "best_global_step": 35000, "best_metric": 19.39343494771888, "best_model_checkpoint": "./checkpoint-35000", "epoch": 530.9734513274336, "eval_steps": 5000, "global_step": 60000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.22123893805309736, "grad_norm": 18.602331161499023, "learning_rate": 1.575e-06, "loss": 2.5106, "step": 25 }, { "epoch": 0.4424778761061947, "grad_norm": 6.637160778045654, "learning_rate": 3.4499999999999996e-06, "loss": 1.894, "step": 50 }, { "epoch": 0.6637168141592921, "grad_norm": 4.3603386878967285, "learning_rate": 5.324999999999999e-06, "loss": 1.333, "step": 75 }, { "epoch": 0.8849557522123894, "grad_norm": 3.5584959983825684, "learning_rate": 7.2e-06, "loss": 1.0488, "step": 100 }, { "epoch": 1.1061946902654867, "grad_norm": 3.270585775375366, "learning_rate": 9.074999999999999e-06, "loss": 0.8673, "step": 125 }, { "epoch": 1.3274336283185841, "grad_norm": 3.515446424484253, "learning_rate": 1.0949999999999998e-05, "loss": 0.7608, "step": 150 }, { "epoch": 1.5486725663716814, "grad_norm": 3.224313735961914, "learning_rate": 1.2825e-05, "loss": 0.669, "step": 175 }, { "epoch": 1.7699115044247788, "grad_norm": 2.9522249698638916, "learning_rate": 1.47e-05, "loss": 0.6172, "step": 200 }, { "epoch": 1.991150442477876, "grad_norm": 3.174180030822754, "learning_rate": 1.6575e-05, "loss": 0.5646, "step": 225 }, { "epoch": 2.2123893805309733, "grad_norm": 2.7301571369171143, "learning_rate": 1.8449999999999998e-05, "loss": 0.4921, "step": 250 }, { "epoch": 2.433628318584071, "grad_norm": 2.5883657932281494, "learning_rate": 2.0325e-05, "loss": 0.4612, "step": 275 }, { "epoch": 2.6548672566371683, "grad_norm": 2.662222146987915, "learning_rate": 2.2199999999999998e-05, "loss": 0.4392, "step": 300 }, { "epoch": 2.8761061946902657, "grad_norm": 2.629359006881714, "learning_rate": 2.4075e-05, "loss": 0.4163, "step": 325 }, { "epoch": 3.0973451327433628, "grad_norm": 2.235975742340088, "learning_rate": 2.5949999999999997e-05, "loss": 0.379, "step": 350 }, { "epoch": 3.3185840707964602, "grad_norm": 2.537764072418213, "learning_rate": 2.7825e-05, "loss": 0.3347, "step": 375 }, { "epoch": 3.5398230088495577, "grad_norm": 2.3638172149658203, "learning_rate": 2.97e-05, "loss": 0.3229, "step": 400 }, { "epoch": 3.7610619469026547, "grad_norm": 2.4351673126220703, "learning_rate": 3.1574999999999995e-05, "loss": 0.3181, "step": 425 }, { "epoch": 3.982300884955752, "grad_norm": 2.6229543685913086, "learning_rate": 3.345e-05, "loss": 0.3088, "step": 450 }, { "epoch": 4.20353982300885, "grad_norm": 2.1918017864227295, "learning_rate": 3.5325e-05, "loss": 0.2459, "step": 475 }, { "epoch": 4.424778761061947, "grad_norm": 2.113893508911133, "learning_rate": 3.7199999999999996e-05, "loss": 0.2377, "step": 500 }, { "epoch": 4.646017699115045, "grad_norm": 2.422183036804199, "learning_rate": 3.7492085427135675e-05, "loss": 0.234, "step": 525 }, { "epoch": 4.867256637168142, "grad_norm": 2.2018935680389404, "learning_rate": 3.748266331658291e-05, "loss": 0.2391, "step": 550 }, { "epoch": 5.088495575221239, "grad_norm": 2.1604204177856445, "learning_rate": 3.747324120603015e-05, "loss": 0.2067, "step": 575 }, { "epoch": 5.3097345132743365, "grad_norm": 1.893356442451477, "learning_rate": 3.746381909547738e-05, "loss": 0.1696, "step": 600 }, { "epoch": 5.530973451327434, "grad_norm": 1.976858377456665, "learning_rate": 3.745439698492462e-05, "loss": 0.1703, "step": 625 }, { "epoch": 5.752212389380531, "grad_norm": 1.8846240043640137, "learning_rate": 3.744497487437186e-05, "loss": 0.1692, "step": 650 }, { "epoch": 5.9734513274336285, "grad_norm": 1.917733073234558, "learning_rate": 3.743555276381909e-05, "loss": 0.1731, "step": 675 }, { "epoch": 6.1946902654867255, "grad_norm": 1.4693013429641724, "learning_rate": 3.7426130653266327e-05, "loss": 0.1224, "step": 700 }, { "epoch": 6.415929203539823, "grad_norm": 1.6126384735107422, "learning_rate": 3.7416708542713565e-05, "loss": 0.1193, "step": 725 }, { "epoch": 6.6371681415929205, "grad_norm": 1.708425760269165, "learning_rate": 3.74072864321608e-05, "loss": 0.1205, "step": 750 }, { "epoch": 6.8584070796460175, "grad_norm": 1.7630101442337036, "learning_rate": 3.7397864321608034e-05, "loss": 0.1235, "step": 775 }, { "epoch": 7.079646017699115, "grad_norm": 1.4447585344314575, "learning_rate": 3.738844221105527e-05, "loss": 0.1077, "step": 800 }, { "epoch": 7.300884955752212, "grad_norm": 1.567300796508789, "learning_rate": 3.737902010050251e-05, "loss": 0.0795, "step": 825 }, { "epoch": 7.522123893805309, "grad_norm": 1.526865839958191, "learning_rate": 3.736959798994975e-05, "loss": 0.0826, "step": 850 }, { "epoch": 7.743362831858407, "grad_norm": 1.5067248344421387, "learning_rate": 3.7360175879396985e-05, "loss": 0.0843, "step": 875 }, { "epoch": 7.964601769911504, "grad_norm": 1.5940072536468506, "learning_rate": 3.7350753768844216e-05, "loss": 0.085, "step": 900 }, { "epoch": 8.185840707964601, "grad_norm": 1.1414343118667603, "learning_rate": 3.7341331658291454e-05, "loss": 0.0571, "step": 925 }, { "epoch": 8.4070796460177, "grad_norm": 1.603317141532898, "learning_rate": 3.733190954773869e-05, "loss": 0.0548, "step": 950 }, { "epoch": 8.628318584070797, "grad_norm": 1.5459048748016357, "learning_rate": 3.732248743718593e-05, "loss": 0.0569, "step": 975 }, { "epoch": 8.849557522123893, "grad_norm": 1.5160927772521973, "learning_rate": 3.731306532663316e-05, "loss": 0.0575, "step": 1000 }, { "epoch": 9.070796460176991, "grad_norm": 0.8897690176963806, "learning_rate": 3.73036432160804e-05, "loss": 0.0522, "step": 1025 }, { "epoch": 9.29203539823009, "grad_norm": 1.1123886108398438, "learning_rate": 3.729422110552763e-05, "loss": 0.0356, "step": 1050 }, { "epoch": 9.513274336283185, "grad_norm": 1.3723399639129639, "learning_rate": 3.728479899497487e-05, "loss": 0.0369, "step": 1075 }, { "epoch": 9.734513274336283, "grad_norm": 1.3542860746383667, "learning_rate": 3.7275376884422106e-05, "loss": 0.0376, "step": 1100 }, { "epoch": 9.955752212389381, "grad_norm": 1.3826254606246948, "learning_rate": 3.7265954773869344e-05, "loss": 0.0402, "step": 1125 }, { "epoch": 10.176991150442477, "grad_norm": 0.8760184645652771, "learning_rate": 3.725653266331658e-05, "loss": 0.0266, "step": 1150 }, { "epoch": 10.398230088495575, "grad_norm": 0.7905477285385132, "learning_rate": 3.724711055276381e-05, "loss": 0.0229, "step": 1175 }, { "epoch": 10.619469026548673, "grad_norm": 0.9804538488388062, "learning_rate": 3.723768844221105e-05, "loss": 0.0246, "step": 1200 }, { "epoch": 10.84070796460177, "grad_norm": 1.0528316497802734, "learning_rate": 3.722826633165829e-05, "loss": 0.0257, "step": 1225 }, { "epoch": 11.061946902654867, "grad_norm": 0.6683358550071716, "learning_rate": 3.721884422110553e-05, "loss": 0.0239, "step": 1250 }, { "epoch": 11.283185840707965, "grad_norm": 0.7000510096549988, "learning_rate": 3.7209422110552765e-05, "loss": 0.0161, "step": 1275 }, { "epoch": 11.504424778761061, "grad_norm": 0.6889680027961731, "learning_rate": 3.7199999999999996e-05, "loss": 0.0154, "step": 1300 }, { "epoch": 11.725663716814159, "grad_norm": 1.1626912355422974, "learning_rate": 3.7190577889447234e-05, "loss": 0.0164, "step": 1325 }, { "epoch": 11.946902654867257, "grad_norm": 1.0828447341918945, "learning_rate": 3.7181155778894465e-05, "loss": 0.018, "step": 1350 }, { "epoch": 12.168141592920353, "grad_norm": 0.37422680854797363, "learning_rate": 3.71717336683417e-05, "loss": 0.0131, "step": 1375 }, { "epoch": 12.389380530973451, "grad_norm": 0.4810941517353058, "learning_rate": 3.716231155778894e-05, "loss": 0.0108, "step": 1400 }, { "epoch": 12.610619469026549, "grad_norm": 0.5290389060974121, "learning_rate": 3.715288944723618e-05, "loss": 0.011, "step": 1425 }, { "epoch": 12.831858407079647, "grad_norm": 0.599158763885498, "learning_rate": 3.714346733668341e-05, "loss": 0.0113, "step": 1450 }, { "epoch": 13.053097345132743, "grad_norm": 0.5375319719314575, "learning_rate": 3.713404522613065e-05, "loss": 0.0111, "step": 1475 }, { "epoch": 13.274336283185841, "grad_norm": 0.6644614338874817, "learning_rate": 3.7124623115577886e-05, "loss": 0.0093, "step": 1500 }, { "epoch": 13.495575221238939, "grad_norm": 0.3936254382133484, "learning_rate": 3.7115201005025124e-05, "loss": 0.0084, "step": 1525 }, { "epoch": 13.716814159292035, "grad_norm": 0.5639329552650452, "learning_rate": 3.710577889447236e-05, "loss": 0.0083, "step": 1550 }, { "epoch": 13.938053097345133, "grad_norm": 0.45128333568573, "learning_rate": 3.709635678391959e-05, "loss": 0.0087, "step": 1575 }, { "epoch": 14.15929203539823, "grad_norm": 0.4120185673236847, "learning_rate": 3.708693467336683e-05, "loss": 0.0083, "step": 1600 }, { "epoch": 14.380530973451327, "grad_norm": 0.6860498189926147, "learning_rate": 3.707751256281407e-05, "loss": 0.0077, "step": 1625 }, { "epoch": 14.601769911504425, "grad_norm": 0.4518347680568695, "learning_rate": 3.706809045226131e-05, "loss": 0.0079, "step": 1650 }, { "epoch": 14.823008849557523, "grad_norm": 0.7469002604484558, "learning_rate": 3.705866834170854e-05, "loss": 0.0086, "step": 1675 }, { "epoch": 15.044247787610619, "grad_norm": 0.6766924858093262, "learning_rate": 3.7049246231155776e-05, "loss": 0.0093, "step": 1700 }, { "epoch": 15.265486725663717, "grad_norm": 0.4621752202510834, "learning_rate": 3.7039824120603014e-05, "loss": 0.0079, "step": 1725 }, { "epoch": 15.486725663716815, "grad_norm": 0.9068520069122314, "learning_rate": 3.7030402010050245e-05, "loss": 0.0091, "step": 1750 }, { "epoch": 15.70796460176991, "grad_norm": 0.5266802906990051, "learning_rate": 3.702097989949748e-05, "loss": 0.0094, "step": 1775 }, { "epoch": 15.929203539823009, "grad_norm": 3.0936825275421143, "learning_rate": 3.701155778894472e-05, "loss": 0.0091, "step": 1800 }, { "epoch": 16.150442477876105, "grad_norm": 0.7230461239814758, "learning_rate": 3.700213567839196e-05, "loss": 0.0079, "step": 1825 }, { "epoch": 16.371681415929203, "grad_norm": 1.2678554058074951, "learning_rate": 3.699271356783919e-05, "loss": 0.0075, "step": 1850 }, { "epoch": 16.5929203539823, "grad_norm": 0.8378119468688965, "learning_rate": 3.698329145728643e-05, "loss": 0.0087, "step": 1875 }, { "epoch": 16.8141592920354, "grad_norm": 0.694353461265564, "learning_rate": 3.6973869346733666e-05, "loss": 0.0092, "step": 1900 }, { "epoch": 17.035398230088497, "grad_norm": 0.8645338416099548, "learning_rate": 3.6964447236180904e-05, "loss": 0.0087, "step": 1925 }, { "epoch": 17.256637168141594, "grad_norm": 1.070916771888733, "learning_rate": 3.695502512562814e-05, "loss": 0.0069, "step": 1950 }, { "epoch": 17.47787610619469, "grad_norm": 0.557037353515625, "learning_rate": 3.694560301507537e-05, "loss": 0.0075, "step": 1975 }, { "epoch": 17.699115044247787, "grad_norm": 0.972762405872345, "learning_rate": 3.693618090452261e-05, "loss": 0.0078, "step": 2000 }, { "epoch": 17.920353982300885, "grad_norm": 0.9033729434013367, "learning_rate": 3.692675879396984e-05, "loss": 0.0078, "step": 2025 }, { "epoch": 18.141592920353983, "grad_norm": 0.4926462471485138, "learning_rate": 3.691733668341708e-05, "loss": 0.0064, "step": 2050 }, { "epoch": 18.36283185840708, "grad_norm": 0.3346686363220215, "learning_rate": 3.690791457286432e-05, "loss": 0.0054, "step": 2075 }, { "epoch": 18.58407079646018, "grad_norm": 0.44990572333335876, "learning_rate": 3.6898492462311556e-05, "loss": 0.0053, "step": 2100 }, { "epoch": 18.805309734513273, "grad_norm": 0.58111172914505, "learning_rate": 3.6889070351758794e-05, "loss": 0.0055, "step": 2125 }, { "epoch": 19.02654867256637, "grad_norm": 0.5601857304573059, "learning_rate": 3.6879648241206025e-05, "loss": 0.005, "step": 2150 }, { "epoch": 19.24778761061947, "grad_norm": 0.4468419849872589, "learning_rate": 3.687022613065326e-05, "loss": 0.0045, "step": 2175 }, { "epoch": 19.469026548672566, "grad_norm": 0.49860286712646484, "learning_rate": 3.68608040201005e-05, "loss": 0.0045, "step": 2200 }, { "epoch": 19.690265486725664, "grad_norm": 0.3553190529346466, "learning_rate": 3.685138190954774e-05, "loss": 0.0043, "step": 2225 }, { "epoch": 19.911504424778762, "grad_norm": 0.5199891328811646, "learning_rate": 3.684195979899497e-05, "loss": 0.0041, "step": 2250 }, { "epoch": 20.13274336283186, "grad_norm": 0.1921943724155426, "learning_rate": 3.683253768844221e-05, "loss": 0.0033, "step": 2275 }, { "epoch": 20.353982300884955, "grad_norm": 0.10891763120889664, "learning_rate": 3.682311557788944e-05, "loss": 0.0028, "step": 2300 }, { "epoch": 20.575221238938052, "grad_norm": 0.09283024072647095, "learning_rate": 3.6813693467336683e-05, "loss": 0.0024, "step": 2325 }, { "epoch": 20.79646017699115, "grad_norm": 0.5227077603340149, "learning_rate": 3.680427135678392e-05, "loss": 0.0025, "step": 2350 }, { "epoch": 21.01769911504425, "grad_norm": 0.22678300738334656, "learning_rate": 3.679484924623115e-05, "loss": 0.0028, "step": 2375 }, { "epoch": 21.238938053097346, "grad_norm": 0.08032209426164627, "learning_rate": 3.678542713567839e-05, "loss": 0.0021, "step": 2400 }, { "epoch": 21.460176991150444, "grad_norm": 0.07821410894393921, "learning_rate": 3.677600502512562e-05, "loss": 0.002, "step": 2425 }, { "epoch": 21.68141592920354, "grad_norm": 0.1442423015832901, "learning_rate": 3.676658291457286e-05, "loss": 0.0022, "step": 2450 }, { "epoch": 21.902654867256636, "grad_norm": 0.08893956243991852, "learning_rate": 3.67571608040201e-05, "loss": 0.0021, "step": 2475 }, { "epoch": 22.123893805309734, "grad_norm": 0.06235270947217941, "learning_rate": 3.6747738693467335e-05, "loss": 0.0018, "step": 2500 }, { "epoch": 22.345132743362832, "grad_norm": 0.06658627092838287, "learning_rate": 3.6738316582914567e-05, "loss": 0.0018, "step": 2525 }, { "epoch": 22.56637168141593, "grad_norm": 0.07976372539997101, "learning_rate": 3.6728894472361804e-05, "loss": 0.0016, "step": 2550 }, { "epoch": 22.787610619469028, "grad_norm": 0.1210562139749527, "learning_rate": 3.671947236180904e-05, "loss": 0.0018, "step": 2575 }, { "epoch": 23.008849557522122, "grad_norm": 0.07866708189249039, "learning_rate": 3.671005025125628e-05, "loss": 0.002, "step": 2600 }, { "epoch": 23.23008849557522, "grad_norm": 0.5405462980270386, "learning_rate": 3.670062814070352e-05, "loss": 0.0026, "step": 2625 }, { "epoch": 23.451327433628318, "grad_norm": 0.11391867697238922, "learning_rate": 3.669120603015075e-05, "loss": 0.0023, "step": 2650 }, { "epoch": 23.672566371681416, "grad_norm": 0.22629842162132263, "learning_rate": 3.668178391959799e-05, "loss": 0.0025, "step": 2675 }, { "epoch": 23.893805309734514, "grad_norm": 0.40358221530914307, "learning_rate": 3.667236180904522e-05, "loss": 0.0037, "step": 2700 }, { "epoch": 24.115044247787612, "grad_norm": 0.36762353777885437, "learning_rate": 3.6662939698492456e-05, "loss": 0.0048, "step": 2725 }, { "epoch": 24.336283185840706, "grad_norm": 0.7483735084533691, "learning_rate": 3.6653517587939694e-05, "loss": 0.0059, "step": 2750 }, { "epoch": 24.557522123893804, "grad_norm": 1.2625534534454346, "learning_rate": 3.664409547738693e-05, "loss": 0.0089, "step": 2775 }, { "epoch": 24.778761061946902, "grad_norm": 1.3285272121429443, "learning_rate": 3.663467336683417e-05, "loss": 0.0104, "step": 2800 }, { "epoch": 25.0, "grad_norm": 2.0200207233428955, "learning_rate": 3.66252512562814e-05, "loss": 0.0113, "step": 2825 }, { "epoch": 25.221238938053098, "grad_norm": 0.8350592851638794, "learning_rate": 3.661582914572864e-05, "loss": 0.0094, "step": 2850 }, { "epoch": 25.442477876106196, "grad_norm": 0.8716378808021545, "learning_rate": 3.660640703517588e-05, "loss": 0.0106, "step": 2875 }, { "epoch": 25.663716814159294, "grad_norm": 1.0014169216156006, "learning_rate": 3.6596984924623115e-05, "loss": 0.0107, "step": 2900 }, { "epoch": 25.884955752212388, "grad_norm": 1.0365616083145142, "learning_rate": 3.6587562814070346e-05, "loss": 0.0107, "step": 2925 }, { "epoch": 26.106194690265486, "grad_norm": 0.8492879867553711, "learning_rate": 3.6578140703517584e-05, "loss": 0.0098, "step": 2950 }, { "epoch": 26.327433628318584, "grad_norm": 1.3080741167068481, "learning_rate": 3.656871859296482e-05, "loss": 0.0085, "step": 2975 }, { "epoch": 26.548672566371682, "grad_norm": 0.7727700471878052, "learning_rate": 3.655929648241205e-05, "loss": 0.0083, "step": 3000 }, { "epoch": 26.76991150442478, "grad_norm": 0.9564117193222046, "learning_rate": 3.65498743718593e-05, "loss": 0.0084, "step": 3025 }, { "epoch": 26.991150442477878, "grad_norm": 1.2931952476501465, "learning_rate": 3.654045226130653e-05, "loss": 0.0081, "step": 3050 }, { "epoch": 27.212389380530972, "grad_norm": 0.7459219694137573, "learning_rate": 3.653103015075377e-05, "loss": 0.0062, "step": 3075 }, { "epoch": 27.43362831858407, "grad_norm": 0.7966432571411133, "learning_rate": 3.6521608040201e-05, "loss": 0.006, "step": 3100 }, { "epoch": 27.654867256637168, "grad_norm": 0.5758165717124939, "learning_rate": 3.6512185929648236e-05, "loss": 0.0054, "step": 3125 }, { "epoch": 27.876106194690266, "grad_norm": 0.7811841368675232, "learning_rate": 3.6502763819095474e-05, "loss": 0.0052, "step": 3150 }, { "epoch": 28.097345132743364, "grad_norm": 0.5312969088554382, "learning_rate": 3.649334170854271e-05, "loss": 0.0045, "step": 3175 }, { "epoch": 28.31858407079646, "grad_norm": 0.6065810918807983, "learning_rate": 3.648391959798995e-05, "loss": 0.0035, "step": 3200 }, { "epoch": 28.539823008849556, "grad_norm": 0.5350639224052429, "learning_rate": 3.647449748743718e-05, "loss": 0.0038, "step": 3225 }, { "epoch": 28.761061946902654, "grad_norm": 0.7335090637207031, "learning_rate": 3.646507537688442e-05, "loss": 0.005, "step": 3250 }, { "epoch": 28.98230088495575, "grad_norm": 0.9164904356002808, "learning_rate": 3.645565326633166e-05, "loss": 0.0046, "step": 3275 }, { "epoch": 29.20353982300885, "grad_norm": 0.6394228935241699, "learning_rate": 3.6446231155778895e-05, "loss": 0.0043, "step": 3300 }, { "epoch": 29.424778761061948, "grad_norm": 0.4392627775669098, "learning_rate": 3.6436809045226126e-05, "loss": 0.0037, "step": 3325 }, { "epoch": 29.646017699115045, "grad_norm": 0.5359008312225342, "learning_rate": 3.6427386934673364e-05, "loss": 0.0036, "step": 3350 }, { "epoch": 29.86725663716814, "grad_norm": 0.36960330605506897, "learning_rate": 3.64179648241206e-05, "loss": 0.0038, "step": 3375 }, { "epoch": 30.088495575221238, "grad_norm": 0.40550148487091064, "learning_rate": 3.640854271356783e-05, "loss": 0.0029, "step": 3400 }, { "epoch": 30.309734513274336, "grad_norm": 0.7572898268699646, "learning_rate": 3.639912060301507e-05, "loss": 0.0029, "step": 3425 }, { "epoch": 30.530973451327434, "grad_norm": 0.49364155530929565, "learning_rate": 3.638969849246231e-05, "loss": 0.0029, "step": 3450 }, { "epoch": 30.75221238938053, "grad_norm": 0.2951892614364624, "learning_rate": 3.638027638190955e-05, "loss": 0.0026, "step": 3475 }, { "epoch": 30.97345132743363, "grad_norm": 0.23061247169971466, "learning_rate": 3.637085427135678e-05, "loss": 0.0029, "step": 3500 }, { "epoch": 31.194690265486727, "grad_norm": 0.4704987406730652, "learning_rate": 3.6361432160804016e-05, "loss": 0.0023, "step": 3525 }, { "epoch": 31.41592920353982, "grad_norm": 0.8864805698394775, "learning_rate": 3.6352010050251254e-05, "loss": 0.0021, "step": 3550 }, { "epoch": 31.63716814159292, "grad_norm": 0.1839492917060852, "learning_rate": 3.634258793969849e-05, "loss": 0.0018, "step": 3575 }, { "epoch": 31.858407079646017, "grad_norm": 0.33682090044021606, "learning_rate": 3.633316582914573e-05, "loss": 0.0018, "step": 3600 }, { "epoch": 32.07964601769911, "grad_norm": 0.22735558450222015, "learning_rate": 3.632374371859296e-05, "loss": 0.0017, "step": 3625 }, { "epoch": 32.30088495575221, "grad_norm": 0.37455394864082336, "learning_rate": 3.63143216080402e-05, "loss": 0.0013, "step": 3650 }, { "epoch": 32.52212389380531, "grad_norm": 0.21050739288330078, "learning_rate": 3.630489949748743e-05, "loss": 0.0015, "step": 3675 }, { "epoch": 32.743362831858406, "grad_norm": 0.7870774269104004, "learning_rate": 3.629547738693467e-05, "loss": 0.0016, "step": 3700 }, { "epoch": 32.9646017699115, "grad_norm": 0.2250039130449295, "learning_rate": 3.6286055276381906e-05, "loss": 0.0022, "step": 3725 }, { "epoch": 33.1858407079646, "grad_norm": 0.5498101115226746, "learning_rate": 3.6276633165829144e-05, "loss": 0.0027, "step": 3750 }, { "epoch": 33.4070796460177, "grad_norm": 0.2101869136095047, "learning_rate": 3.6267211055276375e-05, "loss": 0.0027, "step": 3775 }, { "epoch": 33.6283185840708, "grad_norm": 0.5264481902122498, "learning_rate": 3.625778894472361e-05, "loss": 0.003, "step": 3800 }, { "epoch": 33.849557522123895, "grad_norm": 0.6650252342224121, "learning_rate": 3.624836683417085e-05, "loss": 0.0031, "step": 3825 }, { "epoch": 34.07079646017699, "grad_norm": 0.5468911528587341, "learning_rate": 3.623894472361809e-05, "loss": 0.0042, "step": 3850 }, { "epoch": 34.29203539823009, "grad_norm": 0.5998144745826721, "learning_rate": 3.6229522613065326e-05, "loss": 0.0041, "step": 3875 }, { "epoch": 34.51327433628319, "grad_norm": 0.3813066780567169, "learning_rate": 3.622010050251256e-05, "loss": 0.0042, "step": 3900 }, { "epoch": 34.73451327433628, "grad_norm": 0.4529206454753876, "learning_rate": 3.6210678391959796e-05, "loss": 0.0038, "step": 3925 }, { "epoch": 34.95575221238938, "grad_norm": 0.9884107112884521, "learning_rate": 3.6201256281407033e-05, "loss": 0.0043, "step": 3950 }, { "epoch": 35.176991150442475, "grad_norm": 0.8120687007904053, "learning_rate": 3.619183417085427e-05, "loss": 0.0045, "step": 3975 }, { "epoch": 35.39823008849557, "grad_norm": 0.9935774207115173, "learning_rate": 3.618241206030151e-05, "loss": 0.004, "step": 4000 }, { "epoch": 35.61946902654867, "grad_norm": 0.8607205152511597, "learning_rate": 3.617298994974874e-05, "loss": 0.005, "step": 4025 }, { "epoch": 35.84070796460177, "grad_norm": 0.8314578533172607, "learning_rate": 3.616356783919598e-05, "loss": 0.0059, "step": 4050 }, { "epoch": 36.06194690265487, "grad_norm": 0.599431037902832, "learning_rate": 3.615414572864321e-05, "loss": 0.0065, "step": 4075 }, { "epoch": 36.283185840707965, "grad_norm": 0.4331784248352051, "learning_rate": 3.614472361809045e-05, "loss": 0.0054, "step": 4100 }, { "epoch": 36.50442477876106, "grad_norm": 0.6591492295265198, "learning_rate": 3.6135301507537685e-05, "loss": 0.0051, "step": 4125 }, { "epoch": 36.72566371681416, "grad_norm": 0.7147579193115234, "learning_rate": 3.612587939698492e-05, "loss": 0.0047, "step": 4150 }, { "epoch": 36.94690265486726, "grad_norm": 1.9876089096069336, "learning_rate": 3.6116457286432155e-05, "loss": 0.0056, "step": 4175 }, { "epoch": 37.16814159292036, "grad_norm": 1.0397142171859741, "learning_rate": 3.610703517587939e-05, "loss": 0.0048, "step": 4200 }, { "epoch": 37.389380530973455, "grad_norm": 0.6235687732696533, "learning_rate": 3.609761306532663e-05, "loss": 0.0039, "step": 4225 }, { "epoch": 37.610619469026545, "grad_norm": 0.6543871164321899, "learning_rate": 3.608819095477387e-05, "loss": 0.0042, "step": 4250 }, { "epoch": 37.83185840707964, "grad_norm": 0.4313727915287018, "learning_rate": 3.6078768844221106e-05, "loss": 0.0044, "step": 4275 }, { "epoch": 38.05309734513274, "grad_norm": 0.21667367219924927, "learning_rate": 3.606934673366834e-05, "loss": 0.0039, "step": 4300 }, { "epoch": 38.27433628318584, "grad_norm": 0.6250361800193787, "learning_rate": 3.6059924623115575e-05, "loss": 0.0031, "step": 4325 }, { "epoch": 38.49557522123894, "grad_norm": 0.4341956377029419, "learning_rate": 3.6050502512562806e-05, "loss": 0.0028, "step": 4350 }, { "epoch": 38.716814159292035, "grad_norm": 0.850063145160675, "learning_rate": 3.6041080402010044e-05, "loss": 0.003, "step": 4375 }, { "epoch": 38.93805309734513, "grad_norm": 0.5022916197776794, "learning_rate": 3.603165829145728e-05, "loss": 0.0031, "step": 4400 }, { "epoch": 39.15929203539823, "grad_norm": 0.36580532789230347, "learning_rate": 3.602223618090452e-05, "loss": 0.0025, "step": 4425 }, { "epoch": 39.38053097345133, "grad_norm": 0.5397484302520752, "learning_rate": 3.601281407035176e-05, "loss": 0.0018, "step": 4450 }, { "epoch": 39.60176991150443, "grad_norm": 0.09317639470100403, "learning_rate": 3.600339195979899e-05, "loss": 0.0015, "step": 4475 }, { "epoch": 39.823008849557525, "grad_norm": 0.4195622205734253, "learning_rate": 3.599396984924623e-05, "loss": 0.0014, "step": 4500 }, { "epoch": 40.04424778761062, "grad_norm": 0.330092191696167, "learning_rate": 3.5984547738693465e-05, "loss": 0.0018, "step": 4525 }, { "epoch": 40.26548672566372, "grad_norm": 0.09767819195985794, "learning_rate": 3.59751256281407e-05, "loss": 0.0018, "step": 4550 }, { "epoch": 40.48672566371681, "grad_norm": 0.12107487767934799, "learning_rate": 3.5965703517587934e-05, "loss": 0.0015, "step": 4575 }, { "epoch": 40.70796460176991, "grad_norm": 0.14181815087795258, "learning_rate": 3.595628140703517e-05, "loss": 0.0011, "step": 4600 }, { "epoch": 40.92920353982301, "grad_norm": 0.39292359352111816, "learning_rate": 3.594685929648241e-05, "loss": 0.0012, "step": 4625 }, { "epoch": 41.150442477876105, "grad_norm": 0.06408234685659409, "learning_rate": 3.593743718592965e-05, "loss": 0.001, "step": 4650 }, { "epoch": 41.3716814159292, "grad_norm": 0.13170376420021057, "learning_rate": 3.5928015075376886e-05, "loss": 0.0009, "step": 4675 }, { "epoch": 41.5929203539823, "grad_norm": 0.19049133360385895, "learning_rate": 3.591859296482412e-05, "loss": 0.0008, "step": 4700 }, { "epoch": 41.8141592920354, "grad_norm": 0.09958677738904953, "learning_rate": 3.5909170854271355e-05, "loss": 0.001, "step": 4725 }, { "epoch": 42.0353982300885, "grad_norm": 0.03725467249751091, "learning_rate": 3.5899748743718586e-05, "loss": 0.0009, "step": 4750 }, { "epoch": 42.256637168141594, "grad_norm": 0.06322778016328812, "learning_rate": 3.5890326633165824e-05, "loss": 0.0009, "step": 4775 }, { "epoch": 42.47787610619469, "grad_norm": 0.15316827595233917, "learning_rate": 3.588090452261306e-05, "loss": 0.0009, "step": 4800 }, { "epoch": 42.69911504424779, "grad_norm": 0.3181770145893097, "learning_rate": 3.58714824120603e-05, "loss": 0.0011, "step": 4825 }, { "epoch": 42.92035398230089, "grad_norm": 0.10112958401441574, "learning_rate": 3.586206030150754e-05, "loss": 0.001, "step": 4850 }, { "epoch": 43.14159292035398, "grad_norm": 0.36567315459251404, "learning_rate": 3.585263819095477e-05, "loss": 0.0012, "step": 4875 }, { "epoch": 43.36283185840708, "grad_norm": 0.21454451978206635, "learning_rate": 3.584321608040201e-05, "loss": 0.001, "step": 4900 }, { "epoch": 43.584070796460175, "grad_norm": 0.5421210527420044, "learning_rate": 3.5833793969849245e-05, "loss": 0.0011, "step": 4925 }, { "epoch": 43.80530973451327, "grad_norm": 0.058048658072948456, "learning_rate": 3.582437185929648e-05, "loss": 0.0011, "step": 4950 }, { "epoch": 44.02654867256637, "grad_norm": 0.12616457045078278, "learning_rate": 3.5814949748743714e-05, "loss": 0.001, "step": 4975 }, { "epoch": 44.24778761061947, "grad_norm": 0.3239384591579437, "learning_rate": 3.580552763819095e-05, "loss": 0.0014, "step": 5000 }, { "epoch": 44.24778761061947, "eval_loss": 0.46391189098358154, "eval_runtime": 68.6114, "eval_samples_per_second": 209.63, "eval_steps_per_second": 1.647, "eval_wer": 21.205500355476946, "step": 5000 }, { "epoch": 44.469026548672566, "grad_norm": 0.36909669637680054, "learning_rate": 3.579610552763818e-05, "loss": 0.0015, "step": 5025 }, { "epoch": 44.690265486725664, "grad_norm": 0.600792646408081, "learning_rate": 3.578668341708542e-05, "loss": 0.0014, "step": 5050 }, { "epoch": 44.91150442477876, "grad_norm": 0.25420069694519043, "learning_rate": 3.577726130653266e-05, "loss": 0.0017, "step": 5075 }, { "epoch": 45.13274336283186, "grad_norm": 0.49910032749176025, "learning_rate": 3.57678391959799e-05, "loss": 0.0016, "step": 5100 }, { "epoch": 45.35398230088496, "grad_norm": 0.30805960297584534, "learning_rate": 3.5758417085427135e-05, "loss": 0.0022, "step": 5125 }, { "epoch": 45.575221238938056, "grad_norm": 0.29786399006843567, "learning_rate": 3.5748994974874366e-05, "loss": 0.0027, "step": 5150 }, { "epoch": 45.796460176991154, "grad_norm": 0.36274364590644836, "learning_rate": 3.5739572864321604e-05, "loss": 0.0028, "step": 5175 }, { "epoch": 46.017699115044245, "grad_norm": 0.5863615274429321, "learning_rate": 3.573015075376884e-05, "loss": 0.0033, "step": 5200 }, { "epoch": 46.23893805309734, "grad_norm": 0.8882954716682434, "learning_rate": 3.572072864321608e-05, "loss": 0.0043, "step": 5225 }, { "epoch": 46.46017699115044, "grad_norm": 1.1716095209121704, "learning_rate": 3.571130653266332e-05, "loss": 0.0046, "step": 5250 }, { "epoch": 46.68141592920354, "grad_norm": 1.162951946258545, "learning_rate": 3.570188442211055e-05, "loss": 0.0057, "step": 5275 }, { "epoch": 46.902654867256636, "grad_norm": 0.9007552862167358, "learning_rate": 3.569246231155779e-05, "loss": 0.0061, "step": 5300 }, { "epoch": 47.123893805309734, "grad_norm": 1.213646411895752, "learning_rate": 3.568304020100502e-05, "loss": 0.0065, "step": 5325 }, { "epoch": 47.34513274336283, "grad_norm": 0.7590187788009644, "learning_rate": 3.567361809045226e-05, "loss": 0.0061, "step": 5350 }, { "epoch": 47.56637168141593, "grad_norm": 0.9213088750839233, "learning_rate": 3.5664195979899494e-05, "loss": 0.0059, "step": 5375 }, { "epoch": 47.78761061946903, "grad_norm": 1.436537265777588, "learning_rate": 3.565477386934673e-05, "loss": 0.0063, "step": 5400 }, { "epoch": 48.008849557522126, "grad_norm": 0.5588847398757935, "learning_rate": 3.564535175879396e-05, "loss": 0.0065, "step": 5425 }, { "epoch": 48.230088495575224, "grad_norm": 0.49504005908966064, "learning_rate": 3.56359296482412e-05, "loss": 0.0042, "step": 5450 }, { "epoch": 48.45132743362832, "grad_norm": 0.35268068313598633, "learning_rate": 3.562650753768844e-05, "loss": 0.0043, "step": 5475 }, { "epoch": 48.67256637168141, "grad_norm": 0.44546326994895935, "learning_rate": 3.5617085427135677e-05, "loss": 0.0045, "step": 5500 }, { "epoch": 48.89380530973451, "grad_norm": 1.1131490468978882, "learning_rate": 3.5607663316582914e-05, "loss": 0.0049, "step": 5525 }, { "epoch": 49.11504424778761, "grad_norm": 0.5831943154335022, "learning_rate": 3.5598241206030146e-05, "loss": 0.0041, "step": 5550 }, { "epoch": 49.336283185840706, "grad_norm": 0.4020663797855377, "learning_rate": 3.5588819095477384e-05, "loss": 0.0035, "step": 5575 }, { "epoch": 49.557522123893804, "grad_norm": 0.3665708005428314, "learning_rate": 3.557939698492462e-05, "loss": 0.0034, "step": 5600 }, { "epoch": 49.7787610619469, "grad_norm": 0.4677286446094513, "learning_rate": 3.556997487437186e-05, "loss": 0.0033, "step": 5625 }, { "epoch": 50.0, "grad_norm": 0.4547424018383026, "learning_rate": 3.556055276381909e-05, "loss": 0.0034, "step": 5650 }, { "epoch": 50.2212389380531, "grad_norm": 0.1864093542098999, "learning_rate": 3.555113065326633e-05, "loss": 0.0024, "step": 5675 }, { "epoch": 50.442477876106196, "grad_norm": 0.26309406757354736, "learning_rate": 3.5541708542713566e-05, "loss": 0.002, "step": 5700 }, { "epoch": 50.663716814159294, "grad_norm": 0.26417773962020874, "learning_rate": 3.55322864321608e-05, "loss": 0.002, "step": 5725 }, { "epoch": 50.88495575221239, "grad_norm": 0.3043419420719147, "learning_rate": 3.5522864321608035e-05, "loss": 0.0018, "step": 5750 }, { "epoch": 51.10619469026549, "grad_norm": 0.14515939354896545, "learning_rate": 3.5513442211055273e-05, "loss": 0.0016, "step": 5775 }, { "epoch": 51.32743362831859, "grad_norm": 0.07325629889965057, "learning_rate": 3.550402010050251e-05, "loss": 0.0012, "step": 5800 }, { "epoch": 51.54867256637168, "grad_norm": 0.1604243516921997, "learning_rate": 3.549459798994974e-05, "loss": 0.0012, "step": 5825 }, { "epoch": 51.769911504424776, "grad_norm": 0.3486507534980774, "learning_rate": 3.548517587939698e-05, "loss": 0.0016, "step": 5850 }, { "epoch": 51.991150442477874, "grad_norm": 0.11894264072179794, "learning_rate": 3.547575376884422e-05, "loss": 0.0013, "step": 5875 }, { "epoch": 52.21238938053097, "grad_norm": 0.12770044803619385, "learning_rate": 3.5466331658291456e-05, "loss": 0.0009, "step": 5900 }, { "epoch": 52.43362831858407, "grad_norm": 0.15084324777126312, "learning_rate": 3.5456909547738694e-05, "loss": 0.0011, "step": 5925 }, { "epoch": 52.65486725663717, "grad_norm": 0.4647946059703827, "learning_rate": 3.5447487437185925e-05, "loss": 0.0011, "step": 5950 }, { "epoch": 52.876106194690266, "grad_norm": 0.31086570024490356, "learning_rate": 3.543806532663316e-05, "loss": 0.0012, "step": 5975 }, { "epoch": 53.097345132743364, "grad_norm": 0.0742727667093277, "learning_rate": 3.5428643216080394e-05, "loss": 0.0011, "step": 6000 }, { "epoch": 53.31858407079646, "grad_norm": 0.08821789920330048, "learning_rate": 3.541922110552763e-05, "loss": 0.0009, "step": 6025 }, { "epoch": 53.53982300884956, "grad_norm": 0.13363653421401978, "learning_rate": 3.540979899497487e-05, "loss": 0.0008, "step": 6050 }, { "epoch": 53.76106194690266, "grad_norm": 0.12411464005708694, "learning_rate": 3.540037688442211e-05, "loss": 0.0008, "step": 6075 }, { "epoch": 53.982300884955755, "grad_norm": 0.21393641829490662, "learning_rate": 3.5390954773869346e-05, "loss": 0.0008, "step": 6100 }, { "epoch": 54.203539823008846, "grad_norm": 0.022086799144744873, "learning_rate": 3.538153266331658e-05, "loss": 0.0005, "step": 6125 }, { "epoch": 54.424778761061944, "grad_norm": 0.09155420958995819, "learning_rate": 3.5372110552763815e-05, "loss": 0.0006, "step": 6150 }, { "epoch": 54.64601769911504, "grad_norm": 0.057288557291030884, "learning_rate": 3.536268844221105e-05, "loss": 0.0006, "step": 6175 }, { "epoch": 54.86725663716814, "grad_norm": 0.08642381429672241, "learning_rate": 3.535326633165829e-05, "loss": 0.0006, "step": 6200 }, { "epoch": 55.08849557522124, "grad_norm": 0.20698335766792297, "learning_rate": 3.534384422110552e-05, "loss": 0.0006, "step": 6225 }, { "epoch": 55.309734513274336, "grad_norm": 0.027225693687796593, "learning_rate": 3.533442211055276e-05, "loss": 0.0004, "step": 6250 }, { "epoch": 55.530973451327434, "grad_norm": 0.06026812642812729, "learning_rate": 3.5325e-05, "loss": 0.0006, "step": 6275 }, { "epoch": 55.75221238938053, "grad_norm": 0.6306746602058411, "learning_rate": 3.5315577889447236e-05, "loss": 0.0005, "step": 6300 }, { "epoch": 55.97345132743363, "grad_norm": 0.02361147478222847, "learning_rate": 3.5306155778894474e-05, "loss": 0.0004, "step": 6325 }, { "epoch": 56.19469026548673, "grad_norm": 0.014253910630941391, "learning_rate": 3.5296733668341705e-05, "loss": 0.0004, "step": 6350 }, { "epoch": 56.415929203539825, "grad_norm": 0.02029568701982498, "learning_rate": 3.528731155778894e-05, "loss": 0.0004, "step": 6375 }, { "epoch": 56.63716814159292, "grad_norm": 0.09712693095207214, "learning_rate": 3.5277889447236174e-05, "loss": 0.0003, "step": 6400 }, { "epoch": 56.85840707964602, "grad_norm": 0.010721324943006039, "learning_rate": 3.526846733668341e-05, "loss": 0.0003, "step": 6425 }, { "epoch": 57.07964601769911, "grad_norm": 0.008517120964825153, "learning_rate": 3.525904522613065e-05, "loss": 0.0003, "step": 6450 }, { "epoch": 57.30088495575221, "grad_norm": 0.0076059615239501, "learning_rate": 3.524962311557789e-05, "loss": 0.0003, "step": 6475 }, { "epoch": 57.52212389380531, "grad_norm": 0.008236125111579895, "learning_rate": 3.5240201005025126e-05, "loss": 0.0003, "step": 6500 }, { "epoch": 57.743362831858406, "grad_norm": 0.010843484662473202, "learning_rate": 3.523077889447236e-05, "loss": 0.0003, "step": 6525 }, { "epoch": 57.9646017699115, "grad_norm": 0.008303100243210793, "learning_rate": 3.5221356783919595e-05, "loss": 0.0003, "step": 6550 }, { "epoch": 58.1858407079646, "grad_norm": 0.01410588063299656, "learning_rate": 3.521193467336683e-05, "loss": 0.0002, "step": 6575 }, { "epoch": 58.4070796460177, "grad_norm": 0.0070640332996845245, "learning_rate": 3.520251256281407e-05, "loss": 0.0002, "step": 6600 }, { "epoch": 58.6283185840708, "grad_norm": 0.006724391598254442, "learning_rate": 3.51930904522613e-05, "loss": 0.0002, "step": 6625 }, { "epoch": 58.849557522123895, "grad_norm": 0.0069237216375768185, "learning_rate": 3.518366834170854e-05, "loss": 0.0002, "step": 6650 }, { "epoch": 59.07079646017699, "grad_norm": 0.0060720546171069145, "learning_rate": 3.517424623115577e-05, "loss": 0.0002, "step": 6675 }, { "epoch": 59.29203539823009, "grad_norm": 0.006104297935962677, "learning_rate": 3.516482412060301e-05, "loss": 0.0002, "step": 6700 }, { "epoch": 59.51327433628319, "grad_norm": 0.006220530718564987, "learning_rate": 3.515540201005025e-05, "loss": 0.0002, "step": 6725 }, { "epoch": 59.73451327433628, "grad_norm": 0.006241916678845882, "learning_rate": 3.5145979899497485e-05, "loss": 0.0002, "step": 6750 }, { "epoch": 59.95575221238938, "grad_norm": 0.0060973213985562325, "learning_rate": 3.513655778894472e-05, "loss": 0.0002, "step": 6775 }, { "epoch": 60.176991150442475, "grad_norm": 0.0053083752281963825, "learning_rate": 3.5127135678391954e-05, "loss": 0.0002, "step": 6800 }, { "epoch": 60.39823008849557, "grad_norm": 0.005671331658959389, "learning_rate": 3.511771356783919e-05, "loss": 0.0002, "step": 6825 }, { "epoch": 60.61946902654867, "grad_norm": 0.005219395738095045, "learning_rate": 3.510829145728643e-05, "loss": 0.0002, "step": 6850 }, { "epoch": 60.84070796460177, "grad_norm": 0.005818720906972885, "learning_rate": 3.509886934673367e-05, "loss": 0.0002, "step": 6875 }, { "epoch": 61.06194690265487, "grad_norm": 0.004716409370303154, "learning_rate": 3.50894472361809e-05, "loss": 0.0002, "step": 6900 }, { "epoch": 61.283185840707965, "grad_norm": 0.004982034210115671, "learning_rate": 3.508002512562814e-05, "loss": 0.0002, "step": 6925 }, { "epoch": 61.50442477876106, "grad_norm": 0.005218575708568096, "learning_rate": 3.5070603015075375e-05, "loss": 0.0002, "step": 6950 }, { "epoch": 61.72566371681416, "grad_norm": 0.005393555853515863, "learning_rate": 3.506118090452261e-05, "loss": 0.0002, "step": 6975 }, { "epoch": 61.94690265486726, "grad_norm": 0.004949633497744799, "learning_rate": 3.505175879396985e-05, "loss": 0.0002, "step": 7000 }, { "epoch": 62.16814159292036, "grad_norm": 0.004518670029938221, "learning_rate": 3.504233668341708e-05, "loss": 0.0002, "step": 7025 }, { "epoch": 62.389380530973455, "grad_norm": 0.004960978403687477, "learning_rate": 3.503291457286432e-05, "loss": 0.0002, "step": 7050 }, { "epoch": 62.610619469026545, "grad_norm": 0.004929111339151859, "learning_rate": 3.502349246231155e-05, "loss": 0.0002, "step": 7075 }, { "epoch": 62.83185840707964, "grad_norm": 0.0046343617141246796, "learning_rate": 3.501407035175879e-05, "loss": 0.0002, "step": 7100 }, { "epoch": 63.05309734513274, "grad_norm": 0.004272716119885445, "learning_rate": 3.5004648241206027e-05, "loss": 0.0002, "step": 7125 }, { "epoch": 63.27433628318584, "grad_norm": 0.004529283381998539, "learning_rate": 3.4995226130653265e-05, "loss": 0.0001, "step": 7150 }, { "epoch": 63.49557522123894, "grad_norm": 0.004303035791963339, "learning_rate": 3.49858040201005e-05, "loss": 0.0002, "step": 7175 }, { "epoch": 63.716814159292035, "grad_norm": 0.004358489532023668, "learning_rate": 3.4976381909547734e-05, "loss": 0.0001, "step": 7200 }, { "epoch": 63.93805309734513, "grad_norm": 0.004464657045900822, "learning_rate": 3.496695979899497e-05, "loss": 0.0001, "step": 7225 }, { "epoch": 64.15929203539822, "grad_norm": 0.003989487886428833, "learning_rate": 3.495753768844221e-05, "loss": 0.0001, "step": 7250 }, { "epoch": 64.38053097345133, "grad_norm": 0.003923088312149048, "learning_rate": 3.494811557788945e-05, "loss": 0.0001, "step": 7275 }, { "epoch": 64.60176991150442, "grad_norm": 0.003915212582796812, "learning_rate": 3.493869346733668e-05, "loss": 0.0001, "step": 7300 }, { "epoch": 64.82300884955752, "grad_norm": 0.004168102517724037, "learning_rate": 3.4929271356783916e-05, "loss": 0.0002, "step": 7325 }, { "epoch": 65.04424778761062, "grad_norm": 0.003908833023160696, "learning_rate": 3.4919849246231154e-05, "loss": 0.0001, "step": 7350 }, { "epoch": 65.26548672566372, "grad_norm": 0.003935889806598425, "learning_rate": 3.4910427135678386e-05, "loss": 0.0001, "step": 7375 }, { "epoch": 65.48672566371681, "grad_norm": 0.0037951054982841015, "learning_rate": 3.4901005025125623e-05, "loss": 0.0001, "step": 7400 }, { "epoch": 65.70796460176992, "grad_norm": 0.004108997993171215, "learning_rate": 3.489158291457286e-05, "loss": 0.0001, "step": 7425 }, { "epoch": 65.929203539823, "grad_norm": 0.0039563304744660854, "learning_rate": 3.48821608040201e-05, "loss": 0.0001, "step": 7450 }, { "epoch": 66.15044247787611, "grad_norm": 0.0036372211761772633, "learning_rate": 3.487273869346733e-05, "loss": 0.0001, "step": 7475 }, { "epoch": 66.3716814159292, "grad_norm": 0.00331837753765285, "learning_rate": 3.486331658291457e-05, "loss": 0.0001, "step": 7500 }, { "epoch": 66.59292035398231, "grad_norm": 0.003832829650491476, "learning_rate": 3.4853894472361806e-05, "loss": 0.0001, "step": 7525 }, { "epoch": 66.8141592920354, "grad_norm": 0.003613576525822282, "learning_rate": 3.4844472361809044e-05, "loss": 0.0001, "step": 7550 }, { "epoch": 67.03539823008849, "grad_norm": 0.0030760851223021746, "learning_rate": 3.483505025125628e-05, "loss": 0.0001, "step": 7575 }, { "epoch": 67.2566371681416, "grad_norm": 0.003599856048822403, "learning_rate": 3.482562814070351e-05, "loss": 0.0001, "step": 7600 }, { "epoch": 67.47787610619469, "grad_norm": 0.0034639365039765835, "learning_rate": 3.481620603015075e-05, "loss": 0.0001, "step": 7625 }, { "epoch": 67.69911504424779, "grad_norm": 0.0034027874935418367, "learning_rate": 3.480678391959799e-05, "loss": 0.0001, "step": 7650 }, { "epoch": 67.92035398230088, "grad_norm": 0.003278808668255806, "learning_rate": 3.479736180904523e-05, "loss": 0.0001, "step": 7675 }, { "epoch": 68.14159292035399, "grad_norm": 0.0034000594168901443, "learning_rate": 3.478793969849246e-05, "loss": 0.0001, "step": 7700 }, { "epoch": 68.36283185840708, "grad_norm": 0.0032303871121257544, "learning_rate": 3.4778517587939696e-05, "loss": 0.0001, "step": 7725 }, { "epoch": 68.58407079646018, "grad_norm": 0.003303369740024209, "learning_rate": 3.476909547738693e-05, "loss": 0.0001, "step": 7750 }, { "epoch": 68.80530973451327, "grad_norm": 0.0032171180937439203, "learning_rate": 3.4759673366834165e-05, "loss": 0.0001, "step": 7775 }, { "epoch": 69.02654867256638, "grad_norm": 0.0030202167108654976, "learning_rate": 3.47502512562814e-05, "loss": 0.0001, "step": 7800 }, { "epoch": 69.24778761061947, "grad_norm": 0.0030858637765049934, "learning_rate": 3.474082914572864e-05, "loss": 0.0001, "step": 7825 }, { "epoch": 69.46902654867256, "grad_norm": 0.0030148138757795095, "learning_rate": 3.473140703517588e-05, "loss": 0.0001, "step": 7850 }, { "epoch": 69.69026548672566, "grad_norm": 0.003023445839062333, "learning_rate": 3.472198492462311e-05, "loss": 0.0001, "step": 7875 }, { "epoch": 69.91150442477876, "grad_norm": 0.003226812928915024, "learning_rate": 3.471256281407035e-05, "loss": 0.0001, "step": 7900 }, { "epoch": 70.13274336283186, "grad_norm": 0.002880989108234644, "learning_rate": 3.4703140703517586e-05, "loss": 0.0001, "step": 7925 }, { "epoch": 70.35398230088495, "grad_norm": 0.002949036657810211, "learning_rate": 3.4693718592964824e-05, "loss": 0.0001, "step": 7950 }, { "epoch": 70.57522123893806, "grad_norm": 0.0029469975270330906, "learning_rate": 3.468429648241206e-05, "loss": 0.0001, "step": 7975 }, { "epoch": 70.79646017699115, "grad_norm": 0.0027570202946662903, "learning_rate": 3.467487437185929e-05, "loss": 0.0001, "step": 8000 }, { "epoch": 71.01769911504425, "grad_norm": 0.002688049105927348, "learning_rate": 3.466545226130653e-05, "loss": 0.0001, "step": 8025 }, { "epoch": 71.23893805309734, "grad_norm": 0.0028269037138670683, "learning_rate": 3.465603015075376e-05, "loss": 0.0001, "step": 8050 }, { "epoch": 71.46017699115045, "grad_norm": 0.0026765763759613037, "learning_rate": 3.4646608040201e-05, "loss": 0.0001, "step": 8075 }, { "epoch": 71.68141592920354, "grad_norm": 0.0025807430502027273, "learning_rate": 3.463718592964824e-05, "loss": 0.0001, "step": 8100 }, { "epoch": 71.90265486725664, "grad_norm": 0.002572764875367284, "learning_rate": 3.4627763819095476e-05, "loss": 0.0001, "step": 8125 }, { "epoch": 72.12389380530973, "grad_norm": 0.00260861124843359, "learning_rate": 3.461834170854271e-05, "loss": 0.0001, "step": 8150 }, { "epoch": 72.34513274336283, "grad_norm": 0.0025257074739784002, "learning_rate": 3.4608919597989945e-05, "loss": 0.0001, "step": 8175 }, { "epoch": 72.56637168141593, "grad_norm": 0.002665309701114893, "learning_rate": 3.459949748743718e-05, "loss": 0.0001, "step": 8200 }, { "epoch": 72.78761061946902, "grad_norm": 0.0025875826831907034, "learning_rate": 3.459007537688442e-05, "loss": 0.0001, "step": 8225 }, { "epoch": 73.00884955752213, "grad_norm": 0.0023846894036978483, "learning_rate": 3.458065326633166e-05, "loss": 0.0001, "step": 8250 }, { "epoch": 73.23008849557522, "grad_norm": 0.0023821492213755846, "learning_rate": 3.457123115577889e-05, "loss": 0.0001, "step": 8275 }, { "epoch": 73.45132743362832, "grad_norm": 0.0025490194093436003, "learning_rate": 3.456180904522613e-05, "loss": 0.0001, "step": 8300 }, { "epoch": 73.67256637168141, "grad_norm": 0.002556287217885256, "learning_rate": 3.455238693467336e-05, "loss": 0.0001, "step": 8325 }, { "epoch": 73.89380530973452, "grad_norm": 0.0023395647294819355, "learning_rate": 3.45429648241206e-05, "loss": 0.0001, "step": 8350 }, { "epoch": 74.11504424778761, "grad_norm": 0.0022825044579803944, "learning_rate": 3.4533542713567835e-05, "loss": 0.0001, "step": 8375 }, { "epoch": 74.33628318584071, "grad_norm": 0.0024044134188443422, "learning_rate": 3.452412060301507e-05, "loss": 0.0001, "step": 8400 }, { "epoch": 74.5575221238938, "grad_norm": 0.002278772182762623, "learning_rate": 3.451469849246231e-05, "loss": 0.0001, "step": 8425 }, { "epoch": 74.77876106194691, "grad_norm": 0.0023314072750508785, "learning_rate": 3.450527638190954e-05, "loss": 0.0001, "step": 8450 }, { "epoch": 75.0, "grad_norm": 0.0037646088749170303, "learning_rate": 3.449585427135678e-05, "loss": 0.0001, "step": 8475 }, { "epoch": 75.22123893805309, "grad_norm": 0.0021475611720234156, "learning_rate": 3.448643216080402e-05, "loss": 0.0001, "step": 8500 }, { "epoch": 75.4424778761062, "grad_norm": 0.0023158651310950518, "learning_rate": 3.4477010050251256e-05, "loss": 0.0001, "step": 8525 }, { "epoch": 75.66371681415929, "grad_norm": 0.002321685431525111, "learning_rate": 3.446758793969849e-05, "loss": 0.0001, "step": 8550 }, { "epoch": 75.88495575221239, "grad_norm": 0.0023148921318352222, "learning_rate": 3.4458165829145725e-05, "loss": 0.0001, "step": 8575 }, { "epoch": 76.10619469026548, "grad_norm": 0.0022297685500234365, "learning_rate": 3.444874371859296e-05, "loss": 0.0001, "step": 8600 }, { "epoch": 76.32743362831859, "grad_norm": 0.002155529335141182, "learning_rate": 3.44393216080402e-05, "loss": 0.0001, "step": 8625 }, { "epoch": 76.54867256637168, "grad_norm": 0.002168163890019059, "learning_rate": 3.442989949748744e-05, "loss": 0.0001, "step": 8650 }, { "epoch": 76.76991150442478, "grad_norm": 0.0021967354696244, "learning_rate": 3.442047738693467e-05, "loss": 0.0001, "step": 8675 }, { "epoch": 76.99115044247787, "grad_norm": 0.0020751056727021933, "learning_rate": 3.441105527638191e-05, "loss": 0.0001, "step": 8700 }, { "epoch": 77.21238938053098, "grad_norm": 0.002004786627367139, "learning_rate": 3.440163316582914e-05, "loss": 0.0001, "step": 8725 }, { "epoch": 77.43362831858407, "grad_norm": 0.002037131693214178, "learning_rate": 3.439221105527638e-05, "loss": 0.0001, "step": 8750 }, { "epoch": 77.65486725663717, "grad_norm": 0.0020701480098068714, "learning_rate": 3.4382788944723615e-05, "loss": 0.0001, "step": 8775 }, { "epoch": 77.87610619469027, "grad_norm": 0.001957944128662348, "learning_rate": 3.437336683417085e-05, "loss": 0.0001, "step": 8800 }, { "epoch": 78.09734513274336, "grad_norm": 0.0019169041188433766, "learning_rate": 3.436394472361809e-05, "loss": 0.0001, "step": 8825 }, { "epoch": 78.31858407079646, "grad_norm": 0.0018474205862730742, "learning_rate": 3.435452261306532e-05, "loss": 0.0001, "step": 8850 }, { "epoch": 78.53982300884955, "grad_norm": 0.001842922531068325, "learning_rate": 3.434510050251256e-05, "loss": 0.0001, "step": 8875 }, { "epoch": 78.76106194690266, "grad_norm": 0.0019322725711390376, "learning_rate": 3.43356783919598e-05, "loss": 0.0001, "step": 8900 }, { "epoch": 78.98230088495575, "grad_norm": 0.0019978296477347612, "learning_rate": 3.4326256281407035e-05, "loss": 0.0001, "step": 8925 }, { "epoch": 79.20353982300885, "grad_norm": 0.0019446579972282052, "learning_rate": 3.4316834170854266e-05, "loss": 0.0001, "step": 8950 }, { "epoch": 79.42477876106194, "grad_norm": 0.0017224609619006515, "learning_rate": 3.4307412060301504e-05, "loss": 0.0001, "step": 8975 }, { "epoch": 79.64601769911505, "grad_norm": 0.0018867019098252058, "learning_rate": 3.4297989949748736e-05, "loss": 0.0001, "step": 9000 }, { "epoch": 79.86725663716814, "grad_norm": 0.001859160140156746, "learning_rate": 3.4288567839195974e-05, "loss": 0.0001, "step": 9025 }, { "epoch": 80.08849557522124, "grad_norm": 0.0017475001513957977, "learning_rate": 3.427914572864321e-05, "loss": 0.0001, "step": 9050 }, { "epoch": 80.30973451327434, "grad_norm": 0.001730918069370091, "learning_rate": 3.426972361809045e-05, "loss": 0.0001, "step": 9075 }, { "epoch": 80.53097345132744, "grad_norm": 0.001767538022249937, "learning_rate": 3.426030150753769e-05, "loss": 0.0001, "step": 9100 }, { "epoch": 80.75221238938053, "grad_norm": 0.0017332116840407252, "learning_rate": 3.425087939698492e-05, "loss": 0.0001, "step": 9125 }, { "epoch": 80.97345132743362, "grad_norm": 0.0017095796065405011, "learning_rate": 3.4241457286432156e-05, "loss": 0.0001, "step": 9150 }, { "epoch": 81.19469026548673, "grad_norm": 0.0016707316972315311, "learning_rate": 3.4232035175879394e-05, "loss": 0.0001, "step": 9175 }, { "epoch": 81.41592920353982, "grad_norm": 0.0016317686531692743, "learning_rate": 3.422261306532663e-05, "loss": 0.0001, "step": 9200 }, { "epoch": 81.63716814159292, "grad_norm": 0.0016070872079581022, "learning_rate": 3.421319095477387e-05, "loss": 0.0001, "step": 9225 }, { "epoch": 81.85840707964601, "grad_norm": 0.0017378904158249497, "learning_rate": 3.42037688442211e-05, "loss": 0.0001, "step": 9250 }, { "epoch": 82.07964601769912, "grad_norm": 0.0015088396612554789, "learning_rate": 3.419434673366834e-05, "loss": 0.0001, "step": 9275 }, { "epoch": 82.30088495575221, "grad_norm": 0.001627083751372993, "learning_rate": 3.418492462311558e-05, "loss": 0.0001, "step": 9300 }, { "epoch": 82.52212389380531, "grad_norm": 0.0014865185366943479, "learning_rate": 3.4175502512562815e-05, "loss": 0.0001, "step": 9325 }, { "epoch": 82.7433628318584, "grad_norm": 0.0015754458727315068, "learning_rate": 3.4166080402010046e-05, "loss": 0.0001, "step": 9350 }, { "epoch": 82.96460176991151, "grad_norm": 0.001605819328688085, "learning_rate": 3.4156658291457284e-05, "loss": 0.0001, "step": 9375 }, { "epoch": 83.1858407079646, "grad_norm": 0.0015235698083415627, "learning_rate": 3.4147236180904515e-05, "loss": 0.0001, "step": 9400 }, { "epoch": 83.40707964601769, "grad_norm": 0.0014782625949010253, "learning_rate": 3.413781407035175e-05, "loss": 0.0001, "step": 9425 }, { "epoch": 83.6283185840708, "grad_norm": 0.001562912599183619, "learning_rate": 3.412839195979899e-05, "loss": 0.0001, "step": 9450 }, { "epoch": 83.84955752212389, "grad_norm": 0.0014572669751942158, "learning_rate": 3.411896984924623e-05, "loss": 0.0001, "step": 9475 }, { "epoch": 84.070796460177, "grad_norm": 0.001398448715917766, "learning_rate": 3.410954773869347e-05, "loss": 0.0001, "step": 9500 }, { "epoch": 84.29203539823008, "grad_norm": 0.0014940325636416674, "learning_rate": 3.41001256281407e-05, "loss": 0.0001, "step": 9525 }, { "epoch": 84.51327433628319, "grad_norm": 0.0013887282693758607, "learning_rate": 3.4090703517587936e-05, "loss": 0.0001, "step": 9550 }, { "epoch": 84.73451327433628, "grad_norm": 0.0014121175045147538, "learning_rate": 3.4081281407035174e-05, "loss": 0.0001, "step": 9575 }, { "epoch": 84.95575221238938, "grad_norm": 0.001450953190214932, "learning_rate": 3.407185929648241e-05, "loss": 0.0001, "step": 9600 }, { "epoch": 85.17699115044248, "grad_norm": 0.001311933621764183, "learning_rate": 3.406243718592964e-05, "loss": 0.0, "step": 9625 }, { "epoch": 85.39823008849558, "grad_norm": 0.0013559252256527543, "learning_rate": 3.405301507537688e-05, "loss": 0.0, "step": 9650 }, { "epoch": 85.61946902654867, "grad_norm": 0.0013929266715422273, "learning_rate": 3.404359296482412e-05, "loss": 0.0, "step": 9675 }, { "epoch": 85.84070796460178, "grad_norm": 0.0013806700007990003, "learning_rate": 3.403417085427135e-05, "loss": 0.0, "step": 9700 }, { "epoch": 86.06194690265487, "grad_norm": 0.00128337147179991, "learning_rate": 3.402474874371859e-05, "loss": 0.0, "step": 9725 }, { "epoch": 86.28318584070796, "grad_norm": 0.0012245335383340716, "learning_rate": 3.4015326633165826e-05, "loss": 0.0, "step": 9750 }, { "epoch": 86.50442477876106, "grad_norm": 0.001322528230957687, "learning_rate": 3.4005904522613064e-05, "loss": 0.0, "step": 9775 }, { "epoch": 86.72566371681415, "grad_norm": 0.001261829980649054, "learning_rate": 3.3996482412060295e-05, "loss": 0.0, "step": 9800 }, { "epoch": 86.94690265486726, "grad_norm": 0.0013101190561428666, "learning_rate": 3.398706030150753e-05, "loss": 0.0, "step": 9825 }, { "epoch": 87.16814159292035, "grad_norm": 0.0012159369653090835, "learning_rate": 3.397763819095477e-05, "loss": 0.0, "step": 9850 }, { "epoch": 87.38938053097345, "grad_norm": 0.0012463531456887722, "learning_rate": 3.396821608040201e-05, "loss": 0.0, "step": 9875 }, { "epoch": 87.61061946902655, "grad_norm": 0.001236763666383922, "learning_rate": 3.395879396984925e-05, "loss": 0.0, "step": 9900 }, { "epoch": 87.83185840707965, "grad_norm": 0.0012331437319517136, "learning_rate": 3.394937185929648e-05, "loss": 0.0, "step": 9925 }, { "epoch": 88.05309734513274, "grad_norm": 0.0011590432841330767, "learning_rate": 3.3939949748743716e-05, "loss": 0.0, "step": 9950 }, { "epoch": 88.27433628318585, "grad_norm": 0.001161050284281373, "learning_rate": 3.3930527638190954e-05, "loss": 0.0, "step": 9975 }, { "epoch": 88.49557522123894, "grad_norm": 0.0011522416025400162, "learning_rate": 3.392110552763819e-05, "loss": 0.0, "step": 10000 }, { "epoch": 88.49557522123894, "eval_loss": 0.5179793834686279, "eval_runtime": 66.8437, "eval_samples_per_second": 215.174, "eval_steps_per_second": 1.691, "eval_wer": 20.250047685931783, "step": 10000 }, { "epoch": 88.71681415929204, "grad_norm": 0.0012261044466868043, "learning_rate": 3.391168341708542e-05, "loss": 0.0, "step": 10025 }, { "epoch": 88.93805309734513, "grad_norm": 0.001178042497485876, "learning_rate": 3.390226130653266e-05, "loss": 0.0, "step": 10050 }, { "epoch": 89.15929203539822, "grad_norm": 0.001106926123611629, "learning_rate": 3.38928391959799e-05, "loss": 0.0, "step": 10075 }, { "epoch": 89.38053097345133, "grad_norm": 0.001169239287264645, "learning_rate": 3.388341708542713e-05, "loss": 0.0, "step": 10100 }, { "epoch": 89.60176991150442, "grad_norm": 0.001035262132063508, "learning_rate": 3.387399497487437e-05, "loss": 0.0, "step": 10125 }, { "epoch": 89.82300884955752, "grad_norm": 0.001087056822143495, "learning_rate": 3.3864572864321606e-05, "loss": 0.0, "step": 10150 }, { "epoch": 90.04424778761062, "grad_norm": 0.0009958508890122175, "learning_rate": 3.3855150753768844e-05, "loss": 0.0, "step": 10175 }, { "epoch": 90.26548672566372, "grad_norm": 0.001075174193829298, "learning_rate": 3.3845728643216075e-05, "loss": 0.0, "step": 10200 }, { "epoch": 90.48672566371681, "grad_norm": 0.0010605278657749295, "learning_rate": 3.383630653266331e-05, "loss": 0.0, "step": 10225 }, { "epoch": 90.70796460176992, "grad_norm": 0.0010611022589728236, "learning_rate": 3.382688442211055e-05, "loss": 0.0, "step": 10250 }, { "epoch": 90.929203539823, "grad_norm": 0.0010499174240976572, "learning_rate": 3.381746231155779e-05, "loss": 0.0, "step": 10275 }, { "epoch": 91.15044247787611, "grad_norm": 0.000977793475612998, "learning_rate": 3.3808040201005026e-05, "loss": 0.0, "step": 10300 }, { "epoch": 91.3716814159292, "grad_norm": 0.0010217357194051147, "learning_rate": 3.379861809045226e-05, "loss": 0.0, "step": 10325 }, { "epoch": 91.59292035398231, "grad_norm": 0.0009978745365515351, "learning_rate": 3.3789195979899496e-05, "loss": 0.0, "step": 10350 }, { "epoch": 91.8141592920354, "grad_norm": 0.0010214103385806084, "learning_rate": 3.377977386934673e-05, "loss": 0.0, "step": 10375 }, { "epoch": 92.03539823008849, "grad_norm": 0.0009538016747683287, "learning_rate": 3.3770351758793965e-05, "loss": 0.0, "step": 10400 }, { "epoch": 92.2566371681416, "grad_norm": 0.0009079010342247784, "learning_rate": 3.37609296482412e-05, "loss": 0.0, "step": 10425 }, { "epoch": 92.47787610619469, "grad_norm": 0.0009828143520280719, "learning_rate": 3.375150753768844e-05, "loss": 0.0, "step": 10450 }, { "epoch": 92.69911504424779, "grad_norm": 0.0009621867211535573, "learning_rate": 3.374208542713568e-05, "loss": 0.0, "step": 10475 }, { "epoch": 92.92035398230088, "grad_norm": 0.00097340636420995, "learning_rate": 3.373266331658291e-05, "loss": 0.0, "step": 10500 }, { "epoch": 93.14159292035399, "grad_norm": 0.0008971706847660244, "learning_rate": 3.372324120603015e-05, "loss": 0.0, "step": 10525 }, { "epoch": 93.36283185840708, "grad_norm": 0.0009454868268221617, "learning_rate": 3.3713819095477385e-05, "loss": 0.0, "step": 10550 }, { "epoch": 93.58407079646018, "grad_norm": 0.0009352303459309042, "learning_rate": 3.370439698492462e-05, "loss": 0.0, "step": 10575 }, { "epoch": 93.80530973451327, "grad_norm": 0.0009195778984576464, "learning_rate": 3.3694974874371854e-05, "loss": 0.0, "step": 10600 }, { "epoch": 94.02654867256638, "grad_norm": 0.0008464140701107681, "learning_rate": 3.368555276381909e-05, "loss": 0.0, "step": 10625 }, { "epoch": 94.24778761061947, "grad_norm": 0.0008656398858875036, "learning_rate": 3.3676130653266324e-05, "loss": 0.0, "step": 10650 }, { "epoch": 94.46902654867256, "grad_norm": 0.0008653774857521057, "learning_rate": 3.366670854271357e-05, "loss": 0.0, "step": 10675 }, { "epoch": 94.69026548672566, "grad_norm": 0.0008719101897440851, "learning_rate": 3.3657286432160806e-05, "loss": 0.0, "step": 10700 }, { "epoch": 94.91150442477876, "grad_norm": 0.0009018222917802632, "learning_rate": 3.364786432160804e-05, "loss": 0.0, "step": 10725 }, { "epoch": 95.13274336283186, "grad_norm": 0.0008259322494268417, "learning_rate": 3.3638442211055275e-05, "loss": 0.0, "step": 10750 }, { "epoch": 95.35398230088495, "grad_norm": 0.0008178823045454919, "learning_rate": 3.3629020100502506e-05, "loss": 0.0, "step": 10775 }, { "epoch": 95.57522123893806, "grad_norm": 0.0007829046226106584, "learning_rate": 3.3619597989949744e-05, "loss": 0.0, "step": 10800 }, { "epoch": 95.79646017699115, "grad_norm": 0.0008351016440428793, "learning_rate": 3.361017587939698e-05, "loss": 0.0, "step": 10825 }, { "epoch": 96.01769911504425, "grad_norm": 0.0007857868331484497, "learning_rate": 3.360075376884422e-05, "loss": 0.0, "step": 10850 }, { "epoch": 96.23893805309734, "grad_norm": 0.0007762198802083731, "learning_rate": 3.359133165829145e-05, "loss": 0.0, "step": 10875 }, { "epoch": 96.46017699115045, "grad_norm": 0.0007983743562363088, "learning_rate": 3.358190954773869e-05, "loss": 0.0, "step": 10900 }, { "epoch": 96.68141592920354, "grad_norm": 0.0008089557522907853, "learning_rate": 3.357248743718593e-05, "loss": 0.0, "step": 10925 }, { "epoch": 96.90265486725664, "grad_norm": 0.0007960131042636931, "learning_rate": 3.3563065326633165e-05, "loss": 0.0, "step": 10950 }, { "epoch": 97.12389380530973, "grad_norm": 0.0007273138617165387, "learning_rate": 3.35536432160804e-05, "loss": 0.0, "step": 10975 }, { "epoch": 97.34513274336283, "grad_norm": 0.0007552773458883166, "learning_rate": 3.3544221105527634e-05, "loss": 0.0, "step": 11000 }, { "epoch": 97.56637168141593, "grad_norm": 0.000760455324780196, "learning_rate": 3.353479899497487e-05, "loss": 0.0, "step": 11025 }, { "epoch": 97.78761061946902, "grad_norm": 0.0007642640848644078, "learning_rate": 3.35253768844221e-05, "loss": 0.0, "step": 11050 }, { "epoch": 98.00884955752213, "grad_norm": 0.0007067148690111935, "learning_rate": 3.351595477386934e-05, "loss": 0.0, "step": 11075 }, { "epoch": 98.23008849557522, "grad_norm": 0.0007193459314294159, "learning_rate": 3.350653266331658e-05, "loss": 0.0, "step": 11100 }, { "epoch": 98.45132743362832, "grad_norm": 0.0007070624269545078, "learning_rate": 3.349711055276382e-05, "loss": 0.0, "step": 11125 }, { "epoch": 98.67256637168141, "grad_norm": 0.0007255867240019143, "learning_rate": 3.3487688442211055e-05, "loss": 0.0, "step": 11150 }, { "epoch": 98.89380530973452, "grad_norm": 0.0007341677555814385, "learning_rate": 3.3478266331658286e-05, "loss": 0.0, "step": 11175 }, { "epoch": 99.11504424778761, "grad_norm": 0.0006701911916024983, "learning_rate": 3.3468844221105524e-05, "loss": 0.0, "step": 11200 }, { "epoch": 99.33628318584071, "grad_norm": 0.0006373999640345573, "learning_rate": 3.345942211055276e-05, "loss": 0.0, "step": 11225 }, { "epoch": 99.5575221238938, "grad_norm": 0.0006620438653044403, "learning_rate": 3.345e-05, "loss": 0.0, "step": 11250 }, { "epoch": 99.77876106194691, "grad_norm": 0.0006552780396305025, "learning_rate": 3.344057788944723e-05, "loss": 0.0, "step": 11275 }, { "epoch": 100.0, "grad_norm": 0.0011742340866476297, "learning_rate": 3.343115577889447e-05, "loss": 0.0, "step": 11300 }, { "epoch": 100.22123893805309, "grad_norm": 0.0006294625345617533, "learning_rate": 3.342173366834171e-05, "loss": 0.0, "step": 11325 }, { "epoch": 100.4424778761062, "grad_norm": 0.0006384712760336697, "learning_rate": 3.341231155778894e-05, "loss": 0.0, "step": 11350 }, { "epoch": 100.66371681415929, "grad_norm": 0.0006269979639910161, "learning_rate": 3.3402889447236176e-05, "loss": 0.0, "step": 11375 }, { "epoch": 100.88495575221239, "grad_norm": 0.00066130340564996, "learning_rate": 3.3393467336683414e-05, "loss": 0.0, "step": 11400 }, { "epoch": 101.10619469026548, "grad_norm": 0.0006095270509831607, "learning_rate": 3.338404522613065e-05, "loss": 0.0, "step": 11425 }, { "epoch": 101.32743362831859, "grad_norm": 0.0006224742392078042, "learning_rate": 3.337462311557788e-05, "loss": 0.0, "step": 11450 }, { "epoch": 101.54867256637168, "grad_norm": 0.00061755848582834, "learning_rate": 3.336520100502512e-05, "loss": 0.0, "step": 11475 }, { "epoch": 101.76991150442478, "grad_norm": 0.0006215705652721226, "learning_rate": 3.335577889447236e-05, "loss": 0.0, "step": 11500 }, { "epoch": 101.99115044247787, "grad_norm": 0.0006332016782835126, "learning_rate": 3.33463567839196e-05, "loss": 0.0, "step": 11525 }, { "epoch": 102.21238938053098, "grad_norm": 0.0005780520150437951, "learning_rate": 3.3336934673366835e-05, "loss": 0.0, "step": 11550 }, { "epoch": 102.43362831858407, "grad_norm": 0.0006018158746883273, "learning_rate": 3.3327512562814066e-05, "loss": 0.0, "step": 11575 }, { "epoch": 102.65486725663717, "grad_norm": 0.0005683922790922225, "learning_rate": 3.3318090452261304e-05, "loss": 0.0, "step": 11600 }, { "epoch": 102.87610619469027, "grad_norm": 0.0005828657886013389, "learning_rate": 3.330866834170854e-05, "loss": 0.0, "step": 11625 }, { "epoch": 103.09734513274336, "grad_norm": 0.0005268717068247497, "learning_rate": 3.329924623115578e-05, "loss": 0.0, "step": 11650 }, { "epoch": 103.31858407079646, "grad_norm": 0.0005713331047445536, "learning_rate": 3.328982412060301e-05, "loss": 0.0, "step": 11675 }, { "epoch": 103.53982300884955, "grad_norm": 0.0005595171824097633, "learning_rate": 3.328040201005025e-05, "loss": 0.0, "step": 11700 }, { "epoch": 103.76106194690266, "grad_norm": 0.0005593955283984542, "learning_rate": 3.327097989949749e-05, "loss": 0.0, "step": 11725 }, { "epoch": 103.98230088495575, "grad_norm": 0.0005398447392508388, "learning_rate": 3.326155778894472e-05, "loss": 0.0, "step": 11750 }, { "epoch": 104.20353982300885, "grad_norm": 0.0005087525933049619, "learning_rate": 3.3252135678391956e-05, "loss": 0.0, "step": 11775 }, { "epoch": 104.42477876106194, "grad_norm": 0.00054331892170012, "learning_rate": 3.3242713567839194e-05, "loss": 0.0, "step": 11800 }, { "epoch": 104.64601769911505, "grad_norm": 0.0005263140774331987, "learning_rate": 3.323329145728643e-05, "loss": 0.0, "step": 11825 }, { "epoch": 104.86725663716814, "grad_norm": 0.0005101055721752346, "learning_rate": 3.322386934673366e-05, "loss": 0.0, "step": 11850 }, { "epoch": 105.08849557522124, "grad_norm": 0.0004926638794131577, "learning_rate": 3.32144472361809e-05, "loss": 0.0, "step": 11875 }, { "epoch": 105.30973451327434, "grad_norm": 0.0004924276145175099, "learning_rate": 3.320502512562814e-05, "loss": 0.0, "step": 11900 }, { "epoch": 105.53097345132744, "grad_norm": 0.0005014511407352984, "learning_rate": 3.3195603015075377e-05, "loss": 0.0, "step": 11925 }, { "epoch": 105.75221238938053, "grad_norm": 0.00048014672938734293, "learning_rate": 3.3186180904522614e-05, "loss": 0.0, "step": 11950 }, { "epoch": 105.97345132743362, "grad_norm": 0.0004981139209121466, "learning_rate": 3.3176758793969846e-05, "loss": 0.0, "step": 11975 }, { "epoch": 106.19469026548673, "grad_norm": 0.00046380216372199357, "learning_rate": 3.3167336683417084e-05, "loss": 0.0, "step": 12000 }, { "epoch": 106.41592920353982, "grad_norm": 0.00047072675079107285, "learning_rate": 3.3157914572864315e-05, "loss": 0.0, "step": 12025 }, { "epoch": 106.63716814159292, "grad_norm": 0.0004854618455283344, "learning_rate": 3.314849246231155e-05, "loss": 0.0, "step": 12050 }, { "epoch": 106.85840707964601, "grad_norm": 0.0004931954317726195, "learning_rate": 3.313907035175879e-05, "loss": 0.0, "step": 12075 }, { "epoch": 107.07964601769912, "grad_norm": 0.0004412159905768931, "learning_rate": 3.312964824120603e-05, "loss": 0.0, "step": 12100 }, { "epoch": 107.30088495575221, "grad_norm": 0.0004534273175522685, "learning_rate": 3.312022613065326e-05, "loss": 0.0, "step": 12125 }, { "epoch": 107.52212389380531, "grad_norm": 0.00044499558862298727, "learning_rate": 3.31108040201005e-05, "loss": 0.0, "step": 12150 }, { "epoch": 107.7433628318584, "grad_norm": 0.00046239711809903383, "learning_rate": 3.3101381909547735e-05, "loss": 0.0, "step": 12175 }, { "epoch": 107.96460176991151, "grad_norm": 0.00044727171189151704, "learning_rate": 3.309195979899497e-05, "loss": 0.0, "step": 12200 }, { "epoch": 108.1858407079646, "grad_norm": 0.00042742956429719925, "learning_rate": 3.308253768844221e-05, "loss": 0.0, "step": 12225 }, { "epoch": 108.40707964601769, "grad_norm": 0.0004419190518092364, "learning_rate": 3.307311557788944e-05, "loss": 0.0, "step": 12250 }, { "epoch": 108.6283185840708, "grad_norm": 0.0004380150348879397, "learning_rate": 3.306369346733668e-05, "loss": 0.0, "step": 12275 }, { "epoch": 108.84955752212389, "grad_norm": 0.0004308440547902137, "learning_rate": 3.305427135678392e-05, "loss": 0.0, "step": 12300 }, { "epoch": 109.070796460177, "grad_norm": 0.00039512827061116695, "learning_rate": 3.3044849246231156e-05, "loss": 0.0, "step": 12325 }, { "epoch": 109.29203539823008, "grad_norm": 0.00042166540515609086, "learning_rate": 3.303542713567839e-05, "loss": 0.0, "step": 12350 }, { "epoch": 109.51327433628319, "grad_norm": 0.00041787291411310434, "learning_rate": 3.3026005025125625e-05, "loss": 0.0, "step": 12375 }, { "epoch": 109.73451327433628, "grad_norm": 0.0004270370409358293, "learning_rate": 3.301658291457286e-05, "loss": 0.0, "step": 12400 }, { "epoch": 109.95575221238938, "grad_norm": 0.00043235401972196996, "learning_rate": 3.3007160804020094e-05, "loss": 0.0, "step": 12425 }, { "epoch": 110.17699115044248, "grad_norm": 0.0003742171684280038, "learning_rate": 3.299773869346733e-05, "loss": 0.0, "step": 12450 }, { "epoch": 110.39823008849558, "grad_norm": 0.0003894402470905334, "learning_rate": 3.298831658291457e-05, "loss": 0.0, "step": 12475 }, { "epoch": 110.61946902654867, "grad_norm": 0.00039075451786629856, "learning_rate": 3.297889447236181e-05, "loss": 0.0, "step": 12500 }, { "epoch": 110.84070796460178, "grad_norm": 0.0003838959673885256, "learning_rate": 3.296947236180904e-05, "loss": 0.0, "step": 12525 }, { "epoch": 111.06194690265487, "grad_norm": 0.00034599396167322993, "learning_rate": 3.296005025125628e-05, "loss": 0.0, "step": 12550 }, { "epoch": 111.28318584070796, "grad_norm": 0.00038058572681620717, "learning_rate": 3.2950628140703515e-05, "loss": 0.0, "step": 12575 }, { "epoch": 111.50442477876106, "grad_norm": 0.0003626858233474195, "learning_rate": 3.294120603015075e-05, "loss": 0.0, "step": 12600 }, { "epoch": 111.72566371681415, "grad_norm": 0.00035919720539823174, "learning_rate": 3.293178391959799e-05, "loss": 0.0, "step": 12625 }, { "epoch": 111.94690265486726, "grad_norm": 0.00037452601827681065, "learning_rate": 3.292236180904522e-05, "loss": 0.0, "step": 12650 }, { "epoch": 112.16814159292035, "grad_norm": 0.00036667121457867324, "learning_rate": 3.291293969849246e-05, "loss": 0.0, "step": 12675 }, { "epoch": 112.38938053097345, "grad_norm": 0.0003419696004129946, "learning_rate": 3.290351758793969e-05, "loss": 0.0, "step": 12700 }, { "epoch": 112.61061946902655, "grad_norm": 0.0003534434945322573, "learning_rate": 3.289409547738693e-05, "loss": 0.0, "step": 12725 }, { "epoch": 112.83185840707965, "grad_norm": 0.0003585779049899429, "learning_rate": 3.288467336683417e-05, "loss": 0.0, "step": 12750 }, { "epoch": 113.05309734513274, "grad_norm": 0.0003136063169222325, "learning_rate": 3.2875251256281405e-05, "loss": 0.0, "step": 12775 }, { "epoch": 113.27433628318585, "grad_norm": 0.00031190679874271154, "learning_rate": 3.286582914572864e-05, "loss": 0.0, "step": 12800 }, { "epoch": 113.49557522123894, "grad_norm": 0.00032804999500513077, "learning_rate": 3.2856407035175874e-05, "loss": 0.0, "step": 12825 }, { "epoch": 113.71681415929204, "grad_norm": 0.00033855991205200553, "learning_rate": 3.284698492462311e-05, "loss": 0.0, "step": 12850 }, { "epoch": 113.93805309734513, "grad_norm": 0.0003366032033227384, "learning_rate": 3.283756281407035e-05, "loss": 0.0, "step": 12875 }, { "epoch": 114.15929203539822, "grad_norm": 0.00031767829204909503, "learning_rate": 3.282814070351759e-05, "loss": 0.0, "step": 12900 }, { "epoch": 114.38053097345133, "grad_norm": 0.0003249355941079557, "learning_rate": 3.281871859296482e-05, "loss": 0.0, "step": 12925 }, { "epoch": 114.60176991150442, "grad_norm": 0.0003131679550278932, "learning_rate": 3.280929648241206e-05, "loss": 0.0, "step": 12950 }, { "epoch": 114.82300884955752, "grad_norm": 0.0003141721535939723, "learning_rate": 3.2799874371859295e-05, "loss": 0.0, "step": 12975 }, { "epoch": 115.04424778761062, "grad_norm": 0.00030246900860220194, "learning_rate": 3.279045226130653e-05, "loss": 0.0, "step": 13000 }, { "epoch": 115.26548672566372, "grad_norm": 0.0002990305074490607, "learning_rate": 3.278103015075377e-05, "loss": 0.0, "step": 13025 }, { "epoch": 115.48672566371681, "grad_norm": 0.0003000983560923487, "learning_rate": 3.2771608040201e-05, "loss": 0.0, "step": 13050 }, { "epoch": 115.70796460176992, "grad_norm": 0.0003029140061698854, "learning_rate": 3.276218592964824e-05, "loss": 0.0, "step": 13075 }, { "epoch": 115.929203539823, "grad_norm": 0.00029890044243074954, "learning_rate": 3.275276381909547e-05, "loss": 0.0, "step": 13100 }, { "epoch": 116.15044247787611, "grad_norm": 0.0002794157771859318, "learning_rate": 3.274334170854271e-05, "loss": 0.0, "step": 13125 }, { "epoch": 116.3716814159292, "grad_norm": 0.00027819868410006166, "learning_rate": 3.273391959798995e-05, "loss": 0.0, "step": 13150 }, { "epoch": 116.59292035398231, "grad_norm": 0.0002822064852807671, "learning_rate": 3.2724497487437185e-05, "loss": 0.0, "step": 13175 }, { "epoch": 116.8141592920354, "grad_norm": 0.000300791347399354, "learning_rate": 3.271507537688442e-05, "loss": 0.0, "step": 13200 }, { "epoch": 117.03539823008849, "grad_norm": 0.0002685796935111284, "learning_rate": 3.2705653266331654e-05, "loss": 0.0, "step": 13225 }, { "epoch": 117.2566371681416, "grad_norm": 0.00027848154422827065, "learning_rate": 3.269623115577889e-05, "loss": 0.0, "step": 13250 }, { "epoch": 117.47787610619469, "grad_norm": 0.0002695005969144404, "learning_rate": 3.268680904522613e-05, "loss": 0.0, "step": 13275 }, { "epoch": 117.69911504424779, "grad_norm": 0.0002717023598961532, "learning_rate": 3.267738693467337e-05, "loss": 0.0, "step": 13300 }, { "epoch": 117.92035398230088, "grad_norm": 0.00027723584207706153, "learning_rate": 3.26679648241206e-05, "loss": 0.0, "step": 13325 }, { "epoch": 118.14159292035399, "grad_norm": 0.00025636443751864135, "learning_rate": 3.265854271356784e-05, "loss": 0.0, "step": 13350 }, { "epoch": 118.36283185840708, "grad_norm": 0.0002531832433305681, "learning_rate": 3.264912060301507e-05, "loss": 0.0, "step": 13375 }, { "epoch": 118.58407079646018, "grad_norm": 0.00025198841467499733, "learning_rate": 3.2639698492462306e-05, "loss": 0.0, "step": 13400 }, { "epoch": 118.80530973451327, "grad_norm": 0.00025299336994066834, "learning_rate": 3.2630276381909544e-05, "loss": 0.0, "step": 13425 }, { "epoch": 119.02654867256638, "grad_norm": 0.00022913687280379236, "learning_rate": 3.262085427135678e-05, "loss": 0.0, "step": 13450 }, { "epoch": 119.24778761061947, "grad_norm": 0.00023393111769109964, "learning_rate": 3.261143216080402e-05, "loss": 0.0, "step": 13475 }, { "epoch": 119.46902654867256, "grad_norm": 0.0002453260822221637, "learning_rate": 3.260201005025125e-05, "loss": 0.0, "step": 13500 }, { "epoch": 119.69026548672566, "grad_norm": 0.00026405462995171547, "learning_rate": 3.259258793969849e-05, "loss": 0.0, "step": 13525 }, { "epoch": 119.91150442477876, "grad_norm": 0.0002473360800649971, "learning_rate": 3.2583165829145727e-05, "loss": 0.0, "step": 13550 }, { "epoch": 120.13274336283186, "grad_norm": 0.00022574650938622653, "learning_rate": 3.2573743718592964e-05, "loss": 0.0, "step": 13575 }, { "epoch": 120.35398230088495, "grad_norm": 0.00022817269200459123, "learning_rate": 3.2564321608040196e-05, "loss": 0.0, "step": 13600 }, { "epoch": 120.57522123893806, "grad_norm": 0.00023194430104922503, "learning_rate": 3.2554899497487434e-05, "loss": 0.0, "step": 13625 }, { "epoch": 120.79646017699115, "grad_norm": 0.00024213413416873664, "learning_rate": 3.254547738693467e-05, "loss": 0.0, "step": 13650 }, { "epoch": 121.01769911504425, "grad_norm": 0.00021600343461614102, "learning_rate": 3.25360552763819e-05, "loss": 0.0, "step": 13675 }, { "epoch": 121.23893805309734, "grad_norm": 0.00021943068713881075, "learning_rate": 3.252663316582915e-05, "loss": 0.0, "step": 13700 }, { "epoch": 121.46017699115045, "grad_norm": 0.00021682149963453412, "learning_rate": 3.251721105527638e-05, "loss": 0.0, "step": 13725 }, { "epoch": 121.68141592920354, "grad_norm": 0.00022443781199399382, "learning_rate": 3.2507788944723616e-05, "loss": 0.0, "step": 13750 }, { "epoch": 121.90265486725664, "grad_norm": 0.00022259342949837446, "learning_rate": 3.249836683417085e-05, "loss": 0.0, "step": 13775 }, { "epoch": 122.12389380530973, "grad_norm": 0.00020813688752241433, "learning_rate": 3.2488944723618085e-05, "loss": 0.0, "step": 13800 }, { "epoch": 122.34513274336283, "grad_norm": 0.0002180004375986755, "learning_rate": 3.2479522613065323e-05, "loss": 0.0, "step": 13825 }, { "epoch": 122.56637168141593, "grad_norm": 0.00020301344920881093, "learning_rate": 3.247010050251256e-05, "loss": 0.0, "step": 13850 }, { "epoch": 122.78761061946902, "grad_norm": 0.00021121461759321392, "learning_rate": 3.24606783919598e-05, "loss": 0.0, "step": 13875 }, { "epoch": 123.00884955752213, "grad_norm": 0.0002038378152064979, "learning_rate": 3.245125628140703e-05, "loss": 0.0, "step": 13900 }, { "epoch": 123.23008849557522, "grad_norm": 0.00019846604845952243, "learning_rate": 3.244183417085427e-05, "loss": 0.0, "step": 13925 }, { "epoch": 123.45132743362832, "grad_norm": 0.00019904010696336627, "learning_rate": 3.2432412060301506e-05, "loss": 0.0, "step": 13950 }, { "epoch": 123.67256637168141, "grad_norm": 0.00019466732919681817, "learning_rate": 3.2422989949748744e-05, "loss": 0.0, "step": 13975 }, { "epoch": 123.89380530973452, "grad_norm": 0.0002037520462181419, "learning_rate": 3.2413567839195975e-05, "loss": 0.0, "step": 14000 }, { "epoch": 124.11504424778761, "grad_norm": 0.00018894982349593192, "learning_rate": 3.240414572864321e-05, "loss": 0.0, "step": 14025 }, { "epoch": 124.33628318584071, "grad_norm": 0.00018865948368329555, "learning_rate": 3.239472361809045e-05, "loss": 0.0, "step": 14050 }, { "epoch": 124.5575221238938, "grad_norm": 0.00018658381304703653, "learning_rate": 3.238530150753768e-05, "loss": 0.0, "step": 14075 }, { "epoch": 124.77876106194691, "grad_norm": 0.00019108246488031, "learning_rate": 3.237587939698492e-05, "loss": 0.0, "step": 14100 }, { "epoch": 125.0, "grad_norm": 0.0003162229259032756, "learning_rate": 3.236645728643216e-05, "loss": 0.0, "step": 14125 }, { "epoch": 125.22123893805309, "grad_norm": 0.00017791100253816694, "learning_rate": 3.2357035175879396e-05, "loss": 0.0, "step": 14150 }, { "epoch": 125.4424778761062, "grad_norm": 0.00017894543998409063, "learning_rate": 3.234761306532663e-05, "loss": 0.0, "step": 14175 }, { "epoch": 125.66371681415929, "grad_norm": 0.00018914676911663264, "learning_rate": 3.2338190954773865e-05, "loss": 0.0, "step": 14200 }, { "epoch": 125.88495575221239, "grad_norm": 0.00017691227549221367, "learning_rate": 3.23287688442211e-05, "loss": 0.0, "step": 14225 }, { "epoch": 126.10619469026548, "grad_norm": 0.00016795202100183815, "learning_rate": 3.231934673366834e-05, "loss": 0.0, "step": 14250 }, { "epoch": 126.32743362831859, "grad_norm": 0.00016660653636790812, "learning_rate": 3.230992462311558e-05, "loss": 0.0, "step": 14275 }, { "epoch": 126.54867256637168, "grad_norm": 0.00017592209042049944, "learning_rate": 3.230050251256281e-05, "loss": 0.0, "step": 14300 }, { "epoch": 126.76991150442478, "grad_norm": 0.0001725174079183489, "learning_rate": 3.229108040201005e-05, "loss": 0.0, "step": 14325 }, { "epoch": 126.99115044247787, "grad_norm": 0.00016334152314811945, "learning_rate": 3.228165829145728e-05, "loss": 0.0, "step": 14350 }, { "epoch": 127.21238938053098, "grad_norm": 0.00016021879855543375, "learning_rate": 3.227223618090452e-05, "loss": 0.0, "step": 14375 }, { "epoch": 127.43362831858407, "grad_norm": 0.00015505387273151428, "learning_rate": 3.2262814070351755e-05, "loss": 0.0, "step": 14400 }, { "epoch": 127.65486725663717, "grad_norm": 0.00015921524027362466, "learning_rate": 3.225339195979899e-05, "loss": 0.0, "step": 14425 }, { "epoch": 127.87610619469027, "grad_norm": 0.0001676641550147906, "learning_rate": 3.224396984924623e-05, "loss": 0.0, "step": 14450 }, { "epoch": 128.09734513274336, "grad_norm": 0.00014578094123862684, "learning_rate": 3.223454773869346e-05, "loss": 0.0, "step": 14475 }, { "epoch": 128.31858407079645, "grad_norm": 0.0001531911693746224, "learning_rate": 3.22251256281407e-05, "loss": 0.0, "step": 14500 }, { "epoch": 128.53982300884957, "grad_norm": 0.00015877175610512495, "learning_rate": 3.221570351758794e-05, "loss": 0.0, "step": 14525 }, { "epoch": 128.76106194690266, "grad_norm": 0.00014913754421286285, "learning_rate": 3.2206281407035176e-05, "loss": 0.0, "step": 14550 }, { "epoch": 128.98230088495575, "grad_norm": 0.0001496290642535314, "learning_rate": 3.219685929648241e-05, "loss": 0.0, "step": 14575 }, { "epoch": 129.20353982300884, "grad_norm": 0.00013924903760198504, "learning_rate": 3.2187437185929645e-05, "loss": 0.0, "step": 14600 }, { "epoch": 129.42477876106196, "grad_norm": 0.0001447364775231108, "learning_rate": 3.217801507537688e-05, "loss": 0.0, "step": 14625 }, { "epoch": 129.64601769911505, "grad_norm": 0.00014924371498636901, "learning_rate": 3.216859296482412e-05, "loss": 0.0, "step": 14650 }, { "epoch": 129.86725663716814, "grad_norm": 0.00014909409219399095, "learning_rate": 3.215917085427136e-05, "loss": 0.0, "step": 14675 }, { "epoch": 130.08849557522123, "grad_norm": 0.00013484338705893606, "learning_rate": 3.214974874371859e-05, "loss": 0.0, "step": 14700 }, { "epoch": 130.30973451327435, "grad_norm": 0.00014204980107024312, "learning_rate": 3.214032663316583e-05, "loss": 0.0, "step": 14725 }, { "epoch": 130.53097345132744, "grad_norm": 0.0001349444210063666, "learning_rate": 3.213090452261306e-05, "loss": 0.0, "step": 14750 }, { "epoch": 130.75221238938053, "grad_norm": 0.0001359093439532444, "learning_rate": 3.21214824120603e-05, "loss": 0.0, "step": 14775 }, { "epoch": 130.97345132743362, "grad_norm": 0.0001366245560348034, "learning_rate": 3.2112060301507535e-05, "loss": 0.0, "step": 14800 }, { "epoch": 131.1946902654867, "grad_norm": 0.00012521161988843232, "learning_rate": 3.210263819095477e-05, "loss": 0.0, "step": 14825 }, { "epoch": 131.41592920353983, "grad_norm": 0.00013026637316215783, "learning_rate": 3.2093216080402004e-05, "loss": 0.0, "step": 14850 }, { "epoch": 131.63716814159292, "grad_norm": 0.00013358989963307977, "learning_rate": 3.208379396984924e-05, "loss": 0.0, "step": 14875 }, { "epoch": 131.858407079646, "grad_norm": 0.00012801903358194977, "learning_rate": 3.207437185929648e-05, "loss": 0.0, "step": 14900 }, { "epoch": 132.0796460176991, "grad_norm": 0.00012315553613007069, "learning_rate": 3.206494974874372e-05, "loss": 0.0, "step": 14925 }, { "epoch": 132.30088495575222, "grad_norm": 0.00012301499373279512, "learning_rate": 3.2055527638190956e-05, "loss": 0.0, "step": 14950 }, { "epoch": 132.52212389380531, "grad_norm": 0.00012160424375906587, "learning_rate": 3.204610552763819e-05, "loss": 0.0, "step": 14975 }, { "epoch": 132.7433628318584, "grad_norm": 0.00012721198436338454, "learning_rate": 3.2036683417085425e-05, "loss": 0.0, "step": 15000 }, { "epoch": 132.7433628318584, "eval_loss": 0.6086333394050598, "eval_runtime": 69.0901, "eval_samples_per_second": 208.177, "eval_steps_per_second": 1.636, "eval_wer": 20.664481783974058, "step": 15000 }, { "epoch": 132.9646017699115, "grad_norm": 0.0001227973261848092, "learning_rate": 3.2027261306532656e-05, "loss": 0.0, "step": 15025 }, { "epoch": 133.18584070796462, "grad_norm": 0.00011745069787139073, "learning_rate": 3.2017839195979894e-05, "loss": 0.0, "step": 15050 }, { "epoch": 133.4070796460177, "grad_norm": 0.0001177309823106043, "learning_rate": 3.200841708542713e-05, "loss": 0.0, "step": 15075 }, { "epoch": 133.6283185840708, "grad_norm": 0.00011846800043713301, "learning_rate": 3.199899497487437e-05, "loss": 0.0, "step": 15100 }, { "epoch": 133.8495575221239, "grad_norm": 0.00011864864791277796, "learning_rate": 3.198957286432161e-05, "loss": 0.0, "step": 15125 }, { "epoch": 134.07079646017698, "grad_norm": 0.00011089773033745587, "learning_rate": 3.198015075376884e-05, "loss": 0.0, "step": 15150 }, { "epoch": 134.2920353982301, "grad_norm": 0.00010421755723655224, "learning_rate": 3.1970728643216077e-05, "loss": 0.0, "step": 15175 }, { "epoch": 134.5132743362832, "grad_norm": 0.00011302108032396063, "learning_rate": 3.1961306532663315e-05, "loss": 0.0, "step": 15200 }, { "epoch": 134.73451327433628, "grad_norm": 0.00010540281073190272, "learning_rate": 3.195188442211055e-05, "loss": 0.0, "step": 15225 }, { "epoch": 134.95575221238937, "grad_norm": 0.00011068591265939176, "learning_rate": 3.1942462311557784e-05, "loss": 0.0, "step": 15250 }, { "epoch": 135.1769911504425, "grad_norm": 0.00010172714974032715, "learning_rate": 3.193304020100502e-05, "loss": 0.0, "step": 15275 }, { "epoch": 135.39823008849558, "grad_norm": 0.00010134557669516653, "learning_rate": 3.192361809045226e-05, "loss": 0.0, "step": 15300 }, { "epoch": 135.61946902654867, "grad_norm": 0.00010302195732947439, "learning_rate": 3.19141959798995e-05, "loss": 0.0, "step": 15325 }, { "epoch": 135.84070796460176, "grad_norm": 0.00010134543117601424, "learning_rate": 3.1904773869346735e-05, "loss": 0.0, "step": 15350 }, { "epoch": 136.06194690265488, "grad_norm": 0.00010209771426161751, "learning_rate": 3.1895351758793966e-05, "loss": 0.0, "step": 15375 }, { "epoch": 136.28318584070797, "grad_norm": 9.434438834432513e-05, "learning_rate": 3.1885929648241204e-05, "loss": 0.0, "step": 15400 }, { "epoch": 136.50442477876106, "grad_norm": 9.907045750878751e-05, "learning_rate": 3.1876507537688436e-05, "loss": 0.0, "step": 15425 }, { "epoch": 136.72566371681415, "grad_norm": 0.0001018818438751623, "learning_rate": 3.1867085427135673e-05, "loss": 0.0, "step": 15450 }, { "epoch": 136.94690265486724, "grad_norm": 9.723913535708562e-05, "learning_rate": 3.185766331658291e-05, "loss": 0.0, "step": 15475 }, { "epoch": 137.16814159292036, "grad_norm": 9.640046482672915e-05, "learning_rate": 3.184824120603015e-05, "loss": 0.0, "step": 15500 }, { "epoch": 137.38938053097345, "grad_norm": 9.776013757800683e-05, "learning_rate": 3.183881909547739e-05, "loss": 0.0, "step": 15525 }, { "epoch": 137.61061946902655, "grad_norm": 9.202082583215088e-05, "learning_rate": 3.182939698492462e-05, "loss": 0.0, "step": 15550 }, { "epoch": 137.83185840707964, "grad_norm": 9.632689761929214e-05, "learning_rate": 3.1819974874371856e-05, "loss": 0.0, "step": 15575 }, { "epoch": 138.05309734513276, "grad_norm": 8.585565228713676e-05, "learning_rate": 3.1810552763819094e-05, "loss": 0.0, "step": 15600 }, { "epoch": 138.27433628318585, "grad_norm": 9.025737381307408e-05, "learning_rate": 3.180113065326633e-05, "loss": 0.0, "step": 15625 }, { "epoch": 138.49557522123894, "grad_norm": 8.881489338818938e-05, "learning_rate": 3.179170854271356e-05, "loss": 0.0, "step": 15650 }, { "epoch": 138.71681415929203, "grad_norm": 8.823377720545977e-05, "learning_rate": 3.17822864321608e-05, "loss": 0.0, "step": 15675 }, { "epoch": 138.93805309734512, "grad_norm": 8.773766603553668e-05, "learning_rate": 3.177286432160804e-05, "loss": 0.0, "step": 15700 }, { "epoch": 139.15929203539824, "grad_norm": 8.402310049859807e-05, "learning_rate": 3.176344221105527e-05, "loss": 0.0, "step": 15725 }, { "epoch": 139.38053097345133, "grad_norm": 8.196994167519733e-05, "learning_rate": 3.175402010050251e-05, "loss": 0.0, "step": 15750 }, { "epoch": 139.60176991150442, "grad_norm": 8.579234417993575e-05, "learning_rate": 3.1744597989949746e-05, "loss": 0.0, "step": 15775 }, { "epoch": 139.8230088495575, "grad_norm": 8.137343684211373e-05, "learning_rate": 3.1735175879396984e-05, "loss": 0.0, "step": 15800 }, { "epoch": 140.04424778761063, "grad_norm": 7.529032154707238e-05, "learning_rate": 3.1725753768844215e-05, "loss": 0.0, "step": 15825 }, { "epoch": 140.26548672566372, "grad_norm": 8.112651266856119e-05, "learning_rate": 3.171633165829145e-05, "loss": 0.0, "step": 15850 }, { "epoch": 140.4867256637168, "grad_norm": 8.533461368642747e-05, "learning_rate": 3.170690954773869e-05, "loss": 0.0, "step": 15875 }, { "epoch": 140.7079646017699, "grad_norm": 8.598016574978828e-05, "learning_rate": 3.169748743718593e-05, "loss": 0.0, "step": 15900 }, { "epoch": 140.92920353982302, "grad_norm": 8.237986912718043e-05, "learning_rate": 3.168806532663317e-05, "loss": 0.0, "step": 15925 }, { "epoch": 141.1504424778761, "grad_norm": 7.660566188860685e-05, "learning_rate": 3.16786432160804e-05, "loss": 0.0, "step": 15950 }, { "epoch": 141.3716814159292, "grad_norm": 7.450656266883016e-05, "learning_rate": 3.1669221105527636e-05, "loss": 0.0, "step": 15975 }, { "epoch": 141.5929203539823, "grad_norm": 7.785335765220225e-05, "learning_rate": 3.1659798994974874e-05, "loss": 0.0, "step": 16000 }, { "epoch": 141.81415929203538, "grad_norm": 8.008073200471699e-05, "learning_rate": 3.165037688442211e-05, "loss": 0.0, "step": 16025 }, { "epoch": 142.0353982300885, "grad_norm": 7.253543299157172e-05, "learning_rate": 3.164095477386934e-05, "loss": 0.0, "step": 16050 }, { "epoch": 142.2566371681416, "grad_norm": 7.08417355781421e-05, "learning_rate": 3.163153266331658e-05, "loss": 0.0, "step": 16075 }, { "epoch": 142.47787610619469, "grad_norm": 7.066688704071566e-05, "learning_rate": 3.162211055276381e-05, "loss": 0.0, "step": 16100 }, { "epoch": 142.69911504424778, "grad_norm": 7.224662840599194e-05, "learning_rate": 3.161268844221105e-05, "loss": 0.0, "step": 16125 }, { "epoch": 142.9203539823009, "grad_norm": 7.187834125943482e-05, "learning_rate": 3.160326633165829e-05, "loss": 0.0, "step": 16150 }, { "epoch": 143.141592920354, "grad_norm": 7.097806519595906e-05, "learning_rate": 3.1593844221105526e-05, "loss": 0.0, "step": 16175 }, { "epoch": 143.36283185840708, "grad_norm": 7.010024273768067e-05, "learning_rate": 3.1584422110552764e-05, "loss": 0.0, "step": 16200 }, { "epoch": 143.58407079646017, "grad_norm": 7.04536068951711e-05, "learning_rate": 3.1574999999999995e-05, "loss": 0.0, "step": 16225 }, { "epoch": 143.8053097345133, "grad_norm": 6.855610990896821e-05, "learning_rate": 3.156557788944723e-05, "loss": 0.0, "step": 16250 }, { "epoch": 144.02654867256638, "grad_norm": 6.240099901333451e-05, "learning_rate": 3.155615577889447e-05, "loss": 0.0, "step": 16275 }, { "epoch": 144.24778761061947, "grad_norm": 6.321370892692357e-05, "learning_rate": 3.154673366834171e-05, "loss": 0.0, "step": 16300 }, { "epoch": 144.46902654867256, "grad_norm": 6.465456681326032e-05, "learning_rate": 3.153731155778895e-05, "loss": 0.0, "step": 16325 }, { "epoch": 144.69026548672565, "grad_norm": 6.488285725936294e-05, "learning_rate": 3.152788944723618e-05, "loss": 0.0, "step": 16350 }, { "epoch": 144.91150442477877, "grad_norm": 6.45591426291503e-05, "learning_rate": 3.1518467336683416e-05, "loss": 0.0, "step": 16375 }, { "epoch": 145.13274336283186, "grad_norm": 5.9746271290350705e-05, "learning_rate": 3.150904522613065e-05, "loss": 0.0, "step": 16400 }, { "epoch": 145.35398230088495, "grad_norm": 6.154268339741975e-05, "learning_rate": 3.1499623115577885e-05, "loss": 0.0, "step": 16425 }, { "epoch": 145.57522123893804, "grad_norm": 5.937482274021022e-05, "learning_rate": 3.149020100502512e-05, "loss": 0.0, "step": 16450 }, { "epoch": 145.79646017699116, "grad_norm": 5.961513306829147e-05, "learning_rate": 3.148077889447236e-05, "loss": 0.0, "step": 16475 }, { "epoch": 146.01769911504425, "grad_norm": 5.549533307203092e-05, "learning_rate": 3.147135678391959e-05, "loss": 0.0, "step": 16500 }, { "epoch": 146.23893805309734, "grad_norm": 5.920678086113185e-05, "learning_rate": 3.146193467336683e-05, "loss": 0.0, "step": 16525 }, { "epoch": 146.46017699115043, "grad_norm": 5.814691030536778e-05, "learning_rate": 3.145251256281407e-05, "loss": 0.0, "step": 16550 }, { "epoch": 146.68141592920355, "grad_norm": 5.88978837186005e-05, "learning_rate": 3.1443090452261306e-05, "loss": 0.0, "step": 16575 }, { "epoch": 146.90265486725664, "grad_norm": 5.73635334149003e-05, "learning_rate": 3.1433668341708544e-05, "loss": 0.0, "step": 16600 }, { "epoch": 147.12389380530973, "grad_norm": 5.8368703321320936e-05, "learning_rate": 3.1424246231155775e-05, "loss": 0.0, "step": 16625 }, { "epoch": 147.34513274336283, "grad_norm": 5.288918691803701e-05, "learning_rate": 3.141482412060301e-05, "loss": 0.0, "step": 16650 }, { "epoch": 147.56637168141592, "grad_norm": 5.464577770908363e-05, "learning_rate": 3.1405402010050244e-05, "loss": 0.0, "step": 16675 }, { "epoch": 147.78761061946904, "grad_norm": 5.554040762945078e-05, "learning_rate": 3.139597989949748e-05, "loss": 0.0, "step": 16700 }, { "epoch": 148.00884955752213, "grad_norm": 5.3267816838342696e-05, "learning_rate": 3.138655778894472e-05, "loss": 0.0, "step": 16725 }, { "epoch": 148.23008849557522, "grad_norm": 5.2804811275564134e-05, "learning_rate": 3.137713567839196e-05, "loss": 0.0, "step": 16750 }, { "epoch": 148.4513274336283, "grad_norm": 5.183510438655503e-05, "learning_rate": 3.1367713567839196e-05, "loss": 0.0, "step": 16775 }, { "epoch": 148.67256637168143, "grad_norm": 5.431536919786595e-05, "learning_rate": 3.135829145728643e-05, "loss": 0.0, "step": 16800 }, { "epoch": 148.89380530973452, "grad_norm": 5.326955943019129e-05, "learning_rate": 3.1348869346733665e-05, "loss": 0.0, "step": 16825 }, { "epoch": 149.1150442477876, "grad_norm": 4.838196036871523e-05, "learning_rate": 3.13394472361809e-05, "loss": 0.0, "step": 16850 }, { "epoch": 149.3362831858407, "grad_norm": 4.792392428498715e-05, "learning_rate": 3.133002512562814e-05, "loss": 0.0, "step": 16875 }, { "epoch": 149.55752212389382, "grad_norm": 4.8764126404421404e-05, "learning_rate": 3.132060301507537e-05, "loss": 0.0, "step": 16900 }, { "epoch": 149.7787610619469, "grad_norm": 4.93328261654824e-05, "learning_rate": 3.131118090452261e-05, "loss": 0.0, "step": 16925 }, { "epoch": 150.0, "grad_norm": 7.915851165307686e-05, "learning_rate": 3.130175879396985e-05, "loss": 0.0, "step": 16950 }, { "epoch": 150.2212389380531, "grad_norm": 4.673408329836093e-05, "learning_rate": 3.1292336683417085e-05, "loss": 0.0, "step": 16975 }, { "epoch": 150.44247787610618, "grad_norm": 4.678339246311225e-05, "learning_rate": 3.128291457286432e-05, "loss": 0.0, "step": 17000 }, { "epoch": 150.6637168141593, "grad_norm": 4.650899063562974e-05, "learning_rate": 3.1273492462311554e-05, "loss": 0.0, "step": 17025 }, { "epoch": 150.8849557522124, "grad_norm": 4.589417949318886e-05, "learning_rate": 3.126407035175879e-05, "loss": 0.0, "step": 17050 }, { "epoch": 151.10619469026548, "grad_norm": 4.2551801016088575e-05, "learning_rate": 3.1254648241206024e-05, "loss": 0.0, "step": 17075 }, { "epoch": 151.32743362831857, "grad_norm": 4.526301199803129e-05, "learning_rate": 3.124522613065326e-05, "loss": 0.0, "step": 17100 }, { "epoch": 151.5486725663717, "grad_norm": 4.664048537961207e-05, "learning_rate": 3.12358040201005e-05, "loss": 0.0, "step": 17125 }, { "epoch": 151.76991150442478, "grad_norm": 4.558287764666602e-05, "learning_rate": 3.122638190954774e-05, "loss": 0.0, "step": 17150 }, { "epoch": 151.99115044247787, "grad_norm": 4.608816743711941e-05, "learning_rate": 3.1216959798994975e-05, "loss": 0.0, "step": 17175 }, { "epoch": 152.21238938053096, "grad_norm": 4.338638245826587e-05, "learning_rate": 3.1207537688442206e-05, "loss": 0.0, "step": 17200 }, { "epoch": 152.43362831858408, "grad_norm": 4.205362347420305e-05, "learning_rate": 3.1198115577889444e-05, "loss": 0.0, "step": 17225 }, { "epoch": 152.65486725663717, "grad_norm": 4.298292697058059e-05, "learning_rate": 3.118869346733668e-05, "loss": 0.0, "step": 17250 }, { "epoch": 152.87610619469027, "grad_norm": 4.426393206813373e-05, "learning_rate": 3.117927135678392e-05, "loss": 0.0, "step": 17275 }, { "epoch": 153.09734513274336, "grad_norm": 4.051177165820263e-05, "learning_rate": 3.116984924623115e-05, "loss": 0.0, "step": 17300 }, { "epoch": 153.31858407079645, "grad_norm": 3.9495775126852095e-05, "learning_rate": 3.116042713567839e-05, "loss": 0.0, "step": 17325 }, { "epoch": 153.53982300884957, "grad_norm": 3.7791323848068714e-05, "learning_rate": 3.115100502512562e-05, "loss": 0.0, "step": 17350 }, { "epoch": 153.76106194690266, "grad_norm": 4.0328661270905286e-05, "learning_rate": 3.114158291457286e-05, "loss": 0.0, "step": 17375 }, { "epoch": 153.98230088495575, "grad_norm": 4.1376479202881455e-05, "learning_rate": 3.1132160804020096e-05, "loss": 0.0, "step": 17400 }, { "epoch": 154.20353982300884, "grad_norm": 3.719543383340351e-05, "learning_rate": 3.1122738693467334e-05, "loss": 0.0, "step": 17425 }, { "epoch": 154.42477876106196, "grad_norm": 3.645156175480224e-05, "learning_rate": 3.111331658291457e-05, "loss": 0.0, "step": 17450 }, { "epoch": 154.64601769911505, "grad_norm": 3.729850868694484e-05, "learning_rate": 3.11038944723618e-05, "loss": 0.0, "step": 17475 }, { "epoch": 154.86725663716814, "grad_norm": 3.678638313431293e-05, "learning_rate": 3.109447236180904e-05, "loss": 0.0, "step": 17500 }, { "epoch": 155.08849557522123, "grad_norm": 3.5775407013716176e-05, "learning_rate": 3.108505025125628e-05, "loss": 0.0, "step": 17525 }, { "epoch": 155.30973451327435, "grad_norm": 3.678599387058057e-05, "learning_rate": 3.107562814070352e-05, "loss": 0.0, "step": 17550 }, { "epoch": 155.53097345132744, "grad_norm": 3.724336784216575e-05, "learning_rate": 3.106620603015075e-05, "loss": 0.0, "step": 17575 }, { "epoch": 155.75221238938053, "grad_norm": 3.682634269353002e-05, "learning_rate": 3.1056783919597986e-05, "loss": 0.0, "step": 17600 }, { "epoch": 155.97345132743362, "grad_norm": 3.666687189252116e-05, "learning_rate": 3.1047361809045224e-05, "loss": 0.0, "step": 17625 }, { "epoch": 156.1946902654867, "grad_norm": 3.3785727282520384e-05, "learning_rate": 3.103793969849246e-05, "loss": 0.0, "step": 17650 }, { "epoch": 156.41592920353983, "grad_norm": 3.3487773180240765e-05, "learning_rate": 3.10285175879397e-05, "loss": 0.0, "step": 17675 }, { "epoch": 156.63716814159292, "grad_norm": 3.4201861126348376e-05, "learning_rate": 3.101909547738693e-05, "loss": 0.0, "step": 17700 }, { "epoch": 156.858407079646, "grad_norm": 3.3890250051626936e-05, "learning_rate": 3.100967336683417e-05, "loss": 0.0, "step": 17725 }, { "epoch": 157.0796460176991, "grad_norm": 3.089765596087091e-05, "learning_rate": 3.10002512562814e-05, "loss": 0.0, "step": 17750 }, { "epoch": 157.30088495575222, "grad_norm": 3.083161936956458e-05, "learning_rate": 3.099082914572864e-05, "loss": 0.0, "step": 17775 }, { "epoch": 157.52212389380531, "grad_norm": 3.158415711368434e-05, "learning_rate": 3.0981407035175876e-05, "loss": 0.0, "step": 17800 }, { "epoch": 157.7433628318584, "grad_norm": 3.3609474485274404e-05, "learning_rate": 3.0971984924623114e-05, "loss": 0.0, "step": 17825 }, { "epoch": 157.9646017699115, "grad_norm": 3.2153588108485565e-05, "learning_rate": 3.096256281407035e-05, "loss": 0.0, "step": 17850 }, { "epoch": 158.18584070796462, "grad_norm": 3.0152530598570593e-05, "learning_rate": 3.095314070351758e-05, "loss": 0.0, "step": 17875 }, { "epoch": 158.4070796460177, "grad_norm": 3.0337871066876687e-05, "learning_rate": 3.094371859296482e-05, "loss": 0.0, "step": 17900 }, { "epoch": 158.6283185840708, "grad_norm": 3.198089325451292e-05, "learning_rate": 3.093429648241206e-05, "loss": 0.0, "step": 17925 }, { "epoch": 158.8495575221239, "grad_norm": 3.1847277568886057e-05, "learning_rate": 3.09248743718593e-05, "loss": 0.0, "step": 17950 }, { "epoch": 159.07079646017698, "grad_norm": 2.714863330766093e-05, "learning_rate": 3.091545226130653e-05, "loss": 0.0, "step": 17975 }, { "epoch": 159.2920353982301, "grad_norm": 2.872884761018213e-05, "learning_rate": 3.0906030150753766e-05, "loss": 0.0, "step": 18000 }, { "epoch": 159.5132743362832, "grad_norm": 2.8787424525944516e-05, "learning_rate": 3.0896608040201004e-05, "loss": 0.0, "step": 18025 }, { "epoch": 159.73451327433628, "grad_norm": 2.976525320264045e-05, "learning_rate": 3.0887185929648235e-05, "loss": 0.0, "step": 18050 }, { "epoch": 159.95575221238937, "grad_norm": 2.9661652661161497e-05, "learning_rate": 3.087776381909547e-05, "loss": 0.0, "step": 18075 }, { "epoch": 160.1769911504425, "grad_norm": 2.7167190637555905e-05, "learning_rate": 3.086834170854271e-05, "loss": 0.0, "step": 18100 }, { "epoch": 160.39823008849558, "grad_norm": 2.735570706136059e-05, "learning_rate": 3.085891959798995e-05, "loss": 0.0, "step": 18125 }, { "epoch": 160.61946902654867, "grad_norm": 2.871399738069158e-05, "learning_rate": 3.084949748743718e-05, "loss": 0.0, "step": 18150 }, { "epoch": 160.84070796460176, "grad_norm": 2.807032797136344e-05, "learning_rate": 3.084007537688442e-05, "loss": 0.0, "step": 18175 }, { "epoch": 161.06194690265488, "grad_norm": 2.5061146516236477e-05, "learning_rate": 3.0830653266331656e-05, "loss": 0.0, "step": 18200 }, { "epoch": 161.28318584070797, "grad_norm": 2.721739474509377e-05, "learning_rate": 3.0821231155778894e-05, "loss": 0.0, "step": 18225 }, { "epoch": 161.50442477876106, "grad_norm": 2.621088788146153e-05, "learning_rate": 3.081180904522613e-05, "loss": 0.0, "step": 18250 }, { "epoch": 161.72566371681415, "grad_norm": 2.7013338694814593e-05, "learning_rate": 3.080238693467336e-05, "loss": 0.0, "step": 18275 }, { "epoch": 161.94690265486724, "grad_norm": 2.6428075216244906e-05, "learning_rate": 3.07929648241206e-05, "loss": 0.0, "step": 18300 }, { "epoch": 162.16814159292036, "grad_norm": 2.4166620278265327e-05, "learning_rate": 3.078354271356784e-05, "loss": 0.0, "step": 18325 }, { "epoch": 162.38938053097345, "grad_norm": 2.4362387193832546e-05, "learning_rate": 3.0774120603015076e-05, "loss": 0.0, "step": 18350 }, { "epoch": 162.61061946902655, "grad_norm": 2.4746426788624376e-05, "learning_rate": 3.076469849246231e-05, "loss": 0.0, "step": 18375 }, { "epoch": 162.83185840707964, "grad_norm": 2.5647173970355652e-05, "learning_rate": 3.0755276381909546e-05, "loss": 0.0, "step": 18400 }, { "epoch": 163.05309734513276, "grad_norm": 2.3502439944422804e-05, "learning_rate": 3.0745854271356783e-05, "loss": 0.0, "step": 18425 }, { "epoch": 163.27433628318585, "grad_norm": 2.3647678972338326e-05, "learning_rate": 3.0736432160804015e-05, "loss": 0.0, "step": 18450 }, { "epoch": 163.49557522123894, "grad_norm": 2.4864777515176684e-05, "learning_rate": 3.072701005025125e-05, "loss": 0.0, "step": 18475 }, { "epoch": 163.71681415929203, "grad_norm": 2.4329518055310473e-05, "learning_rate": 3.071758793969849e-05, "loss": 0.0, "step": 18500 }, { "epoch": 163.93805309734512, "grad_norm": 2.3652986783417873e-05, "learning_rate": 3.070816582914573e-05, "loss": 0.0, "step": 18525 }, { "epoch": 164.15929203539824, "grad_norm": 2.2134157916298136e-05, "learning_rate": 3.069874371859296e-05, "loss": 0.0, "step": 18550 }, { "epoch": 164.38053097345133, "grad_norm": 2.1876283426536247e-05, "learning_rate": 3.06893216080402e-05, "loss": 0.0, "step": 18575 }, { "epoch": 164.60176991150442, "grad_norm": 2.3060023522702977e-05, "learning_rate": 3.0679899497487435e-05, "loss": 0.0, "step": 18600 }, { "epoch": 164.8230088495575, "grad_norm": 2.379834586463403e-05, "learning_rate": 3.067047738693467e-05, "loss": 0.0, "step": 18625 }, { "epoch": 165.04424778761063, "grad_norm": 2.1120597011758946e-05, "learning_rate": 3.066105527638191e-05, "loss": 0.0, "step": 18650 }, { "epoch": 165.26548672566372, "grad_norm": 2.1763020413345657e-05, "learning_rate": 3.065163316582914e-05, "loss": 0.0, "step": 18675 }, { "epoch": 165.4867256637168, "grad_norm": 2.1123496480868198e-05, "learning_rate": 3.064221105527638e-05, "loss": 0.0, "step": 18700 }, { "epoch": 165.7079646017699, "grad_norm": 2.0720726752188057e-05, "learning_rate": 3.063278894472361e-05, "loss": 0.0, "step": 18725 }, { "epoch": 165.92920353982302, "grad_norm": 2.1498253772733733e-05, "learning_rate": 3.062336683417085e-05, "loss": 0.0, "step": 18750 }, { "epoch": 166.1504424778761, "grad_norm": 2.0322175259934738e-05, "learning_rate": 3.061394472361809e-05, "loss": 0.0, "step": 18775 }, { "epoch": 166.3716814159292, "grad_norm": 2.0232437236700207e-05, "learning_rate": 3.0604522613065325e-05, "loss": 0.0, "step": 18800 }, { "epoch": 166.5929203539823, "grad_norm": 2.0109995602979325e-05, "learning_rate": 3.0595100502512556e-05, "loss": 0.0, "step": 18825 }, { "epoch": 166.81415929203538, "grad_norm": 2.020849751716014e-05, "learning_rate": 3.0585678391959794e-05, "loss": 0.0, "step": 18850 }, { "epoch": 167.0353982300885, "grad_norm": 1.8596663721837103e-05, "learning_rate": 3.057625628140703e-05, "loss": 0.0, "step": 18875 }, { "epoch": 167.2566371681416, "grad_norm": 1.8883600205299444e-05, "learning_rate": 3.056683417085427e-05, "loss": 0.0, "step": 18900 }, { "epoch": 167.47787610619469, "grad_norm": 1.9570434233173728e-05, "learning_rate": 3.055741206030151e-05, "loss": 0.0, "step": 18925 }, { "epoch": 167.69911504424778, "grad_norm": 1.954054823727347e-05, "learning_rate": 3.054798994974874e-05, "loss": 0.0, "step": 18950 }, { "epoch": 167.9203539823009, "grad_norm": 1.8896414985647425e-05, "learning_rate": 3.053856783919598e-05, "loss": 0.0, "step": 18975 }, { "epoch": 168.141592920354, "grad_norm": 1.8187245586887002e-05, "learning_rate": 3.052914572864321e-05, "loss": 0.0, "step": 19000 }, { "epoch": 168.36283185840708, "grad_norm": 1.751833588059526e-05, "learning_rate": 3.051972361809045e-05, "loss": 0.0, "step": 19025 }, { "epoch": 168.58407079646017, "grad_norm": 1.9732624423340894e-05, "learning_rate": 3.0510301507537688e-05, "loss": 0.0, "step": 19050 }, { "epoch": 168.8053097345133, "grad_norm": 1.8063959942082874e-05, "learning_rate": 3.0500879396984922e-05, "loss": 0.0, "step": 19075 }, { "epoch": 169.02654867256638, "grad_norm": 1.765260094543919e-05, "learning_rate": 3.049145728643216e-05, "loss": 0.0, "step": 19100 }, { "epoch": 169.24778761061947, "grad_norm": 1.696557410468813e-05, "learning_rate": 3.0482035175879395e-05, "loss": 0.0, "step": 19125 }, { "epoch": 169.46902654867256, "grad_norm": 1.854299807746429e-05, "learning_rate": 3.0472613065326633e-05, "loss": 0.0, "step": 19150 }, { "epoch": 169.69026548672565, "grad_norm": 1.7397653209627606e-05, "learning_rate": 3.0463190954773864e-05, "loss": 0.0, "step": 19175 }, { "epoch": 169.91150442477877, "grad_norm": 1.647380122449249e-05, "learning_rate": 3.0453768844221105e-05, "loss": 0.0, "step": 19200 }, { "epoch": 170.13274336283186, "grad_norm": 1.645602606004104e-05, "learning_rate": 3.0444346733668336e-05, "loss": 0.0, "step": 19225 }, { "epoch": 170.35398230088495, "grad_norm": 1.6597619833191857e-05, "learning_rate": 3.0434924623115574e-05, "loss": 0.0, "step": 19250 }, { "epoch": 170.57522123893804, "grad_norm": 1.669575613050256e-05, "learning_rate": 3.0425502512562812e-05, "loss": 0.0, "step": 19275 }, { "epoch": 170.79646017699116, "grad_norm": 1.669384255365003e-05, "learning_rate": 3.0416080402010047e-05, "loss": 0.0, "step": 19300 }, { "epoch": 171.01769911504425, "grad_norm": 1.5373296264442615e-05, "learning_rate": 3.0406658291457284e-05, "loss": 0.0, "step": 19325 }, { "epoch": 171.23893805309734, "grad_norm": 1.5488858480239287e-05, "learning_rate": 3.039723618090452e-05, "loss": 0.0, "step": 19350 }, { "epoch": 171.46017699115043, "grad_norm": 1.6061492715380155e-05, "learning_rate": 3.0387814070351757e-05, "loss": 0.0, "step": 19375 }, { "epoch": 171.68141592920355, "grad_norm": 1.5882935258559883e-05, "learning_rate": 3.037839195979899e-05, "loss": 0.0, "step": 19400 }, { "epoch": 171.90265486725664, "grad_norm": 1.5720805095043033e-05, "learning_rate": 3.036896984924623e-05, "loss": 0.0, "step": 19425 }, { "epoch": 172.12389380530973, "grad_norm": 1.488530142523814e-05, "learning_rate": 3.0359547738693464e-05, "loss": 0.0, "step": 19450 }, { "epoch": 172.34513274336283, "grad_norm": 1.530918598291464e-05, "learning_rate": 3.0350125628140702e-05, "loss": 0.0, "step": 19475 }, { "epoch": 172.56637168141592, "grad_norm": 1.4933929378457833e-05, "learning_rate": 3.034070351758794e-05, "loss": 0.0, "step": 19500 }, { "epoch": 172.78761061946904, "grad_norm": 1.5128611266845837e-05, "learning_rate": 3.033128140703517e-05, "loss": 0.0, "step": 19525 }, { "epoch": 173.00884955752213, "grad_norm": 1.3560685147240292e-05, "learning_rate": 3.0321859296482412e-05, "loss": 0.0, "step": 19550 }, { "epoch": 173.23008849557522, "grad_norm": 1.3809654774377123e-05, "learning_rate": 3.0312437185929643e-05, "loss": 0.0, "step": 19575 }, { "epoch": 173.4513274336283, "grad_norm": 1.4624785762862302e-05, "learning_rate": 3.030301507537688e-05, "loss": 0.0, "step": 19600 }, { "epoch": 173.67256637168143, "grad_norm": 1.436613001715159e-05, "learning_rate": 3.0293592964824116e-05, "loss": 0.0, "step": 19625 }, { "epoch": 173.89380530973452, "grad_norm": 1.4463673323916737e-05, "learning_rate": 3.0284170854271354e-05, "loss": 0.0, "step": 19650 }, { "epoch": 174.1150442477876, "grad_norm": 1.3433063941192813e-05, "learning_rate": 3.0274748743718592e-05, "loss": 0.0, "step": 19675 }, { "epoch": 174.3362831858407, "grad_norm": 1.342534687864827e-05, "learning_rate": 3.0265326633165826e-05, "loss": 0.0, "step": 19700 }, { "epoch": 174.55752212389382, "grad_norm": 1.4017597095516976e-05, "learning_rate": 3.0255904522613064e-05, "loss": 0.0, "step": 19725 }, { "epoch": 174.7787610619469, "grad_norm": 1.4061187357583549e-05, "learning_rate": 3.02464824120603e-05, "loss": 0.0, "step": 19750 }, { "epoch": 175.0, "grad_norm": 2.3386879547615536e-05, "learning_rate": 3.0237060301507537e-05, "loss": 0.0, "step": 19775 }, { "epoch": 175.2212389380531, "grad_norm": 1.2863692973041907e-05, "learning_rate": 3.022763819095477e-05, "loss": 0.0, "step": 19800 }, { "epoch": 175.44247787610618, "grad_norm": 1.29440750242793e-05, "learning_rate": 3.021821608040201e-05, "loss": 0.0, "step": 19825 }, { "epoch": 175.6637168141593, "grad_norm": 1.24760226754006e-05, "learning_rate": 3.020879396984924e-05, "loss": 0.0, "step": 19850 }, { "epoch": 175.8849557522124, "grad_norm": 1.3329840840015095e-05, "learning_rate": 3.0199371859296478e-05, "loss": 0.0, "step": 19875 }, { "epoch": 176.10619469026548, "grad_norm": 1.242750386154512e-05, "learning_rate": 3.0189949748743716e-05, "loss": 0.0, "step": 19900 }, { "epoch": 176.32743362831857, "grad_norm": 1.2041745321766939e-05, "learning_rate": 3.018052763819095e-05, "loss": 0.0, "step": 19925 }, { "epoch": 176.5486725663717, "grad_norm": 1.1973632354056463e-05, "learning_rate": 3.017110552763819e-05, "loss": 0.0, "step": 19950 }, { "epoch": 176.76991150442478, "grad_norm": 1.260339286091039e-05, "learning_rate": 3.0161683417085423e-05, "loss": 0.0, "step": 19975 }, { "epoch": 176.99115044247787, "grad_norm": 1.2614805200428236e-05, "learning_rate": 3.015226130653266e-05, "loss": 0.0, "step": 20000 }, { "epoch": 176.99115044247787, "eval_loss": 0.7125903367996216, "eval_runtime": 67.5346, "eval_samples_per_second": 212.972, "eval_steps_per_second": 1.673, "eval_wer": 20.97487384903501, "step": 20000 }, { "epoch": 177.21238938053096, "grad_norm": 1.169081951957196e-05, "learning_rate": 3.0142839195979896e-05, "loss": 0.0, "step": 20025 }, { "epoch": 177.43362831858408, "grad_norm": 1.2020368558296468e-05, "learning_rate": 3.0133417085427134e-05, "loss": 0.0, "step": 20050 }, { "epoch": 177.65486725663717, "grad_norm": 1.1246896065131295e-05, "learning_rate": 3.0123994974874368e-05, "loss": 0.0, "step": 20075 }, { "epoch": 177.87610619469027, "grad_norm": 1.1524572073540185e-05, "learning_rate": 3.0114572864321606e-05, "loss": 0.0, "step": 20100 }, { "epoch": 178.09734513274336, "grad_norm": 1.1110500054201111e-05, "learning_rate": 3.0105150753768844e-05, "loss": 0.0, "step": 20125 }, { "epoch": 178.31858407079645, "grad_norm": 1.1541183994268067e-05, "learning_rate": 3.009572864321608e-05, "loss": 0.0, "step": 20150 }, { "epoch": 178.53982300884957, "grad_norm": 1.1776502105931286e-05, "learning_rate": 3.0086306532663316e-05, "loss": 0.0, "step": 20175 }, { "epoch": 178.76106194690266, "grad_norm": 1.1110502782685217e-05, "learning_rate": 3.0076884422110548e-05, "loss": 0.0, "step": 20200 }, { "epoch": 178.98230088495575, "grad_norm": 1.1525436093506869e-05, "learning_rate": 3.0067462311557785e-05, "loss": 0.0, "step": 20225 }, { "epoch": 179.20353982300884, "grad_norm": 1.0934762030956335e-05, "learning_rate": 3.005804020100502e-05, "loss": 0.0, "step": 20250 }, { "epoch": 179.42477876106196, "grad_norm": 1.103356134990463e-05, "learning_rate": 3.0048618090452258e-05, "loss": 0.0, "step": 20275 }, { "epoch": 179.64601769911505, "grad_norm": 1.1192755664524157e-05, "learning_rate": 3.0039195979899496e-05, "loss": 0.0, "step": 20300 }, { "epoch": 179.86725663716814, "grad_norm": 1.0648571333149448e-05, "learning_rate": 3.002977386934673e-05, "loss": 0.0, "step": 20325 }, { "epoch": 180.08849557522123, "grad_norm": 1.015719954011729e-05, "learning_rate": 3.002035175879397e-05, "loss": 0.0, "step": 20350 }, { "epoch": 180.30973451327435, "grad_norm": 1.0093190212501213e-05, "learning_rate": 3.0010929648241203e-05, "loss": 0.0, "step": 20375 }, { "epoch": 180.53097345132744, "grad_norm": 1.0189358363277279e-05, "learning_rate": 3.000150753768844e-05, "loss": 0.0, "step": 20400 }, { "epoch": 180.75221238938053, "grad_norm": 1.0108396963914856e-05, "learning_rate": 2.9992085427135675e-05, "loss": 0.0, "step": 20425 }, { "epoch": 180.97345132743362, "grad_norm": 1.0920614840870257e-05, "learning_rate": 2.9982663316582913e-05, "loss": 0.0, "step": 20450 }, { "epoch": 181.1946902654867, "grad_norm": 9.898592907120474e-06, "learning_rate": 2.9973241206030148e-05, "loss": 0.0, "step": 20475 }, { "epoch": 181.41592920353983, "grad_norm": 9.698942449176684e-06, "learning_rate": 2.9963819095477386e-05, "loss": 0.0, "step": 20500 }, { "epoch": 181.63716814159292, "grad_norm": 1.0033192666014656e-05, "learning_rate": 2.9954396984924624e-05, "loss": 0.0, "step": 20525 }, { "epoch": 181.858407079646, "grad_norm": 1.0161260433960706e-05, "learning_rate": 2.9944974874371855e-05, "loss": 0.0, "step": 20550 }, { "epoch": 182.0796460176991, "grad_norm": 9.562485502101481e-06, "learning_rate": 2.9935552763819093e-05, "loss": 0.0, "step": 20575 }, { "epoch": 182.30088495575222, "grad_norm": 9.279337064072024e-06, "learning_rate": 2.9926130653266327e-05, "loss": 0.0, "step": 20600 }, { "epoch": 182.52212389380531, "grad_norm": 9.350412256026175e-06, "learning_rate": 2.9916708542713565e-05, "loss": 0.0, "step": 20625 }, { "epoch": 182.7433628318584, "grad_norm": 9.279878213419579e-06, "learning_rate": 2.99072864321608e-05, "loss": 0.0, "step": 20650 }, { "epoch": 182.9646017699115, "grad_norm": 9.447763659409247e-06, "learning_rate": 2.9897864321608038e-05, "loss": 0.0, "step": 20675 }, { "epoch": 183.18584070796462, "grad_norm": 8.820994480629452e-06, "learning_rate": 2.9888442211055272e-05, "loss": 0.0, "step": 20700 }, { "epoch": 183.4070796460177, "grad_norm": 9.373869943374302e-06, "learning_rate": 2.987902010050251e-05, "loss": 0.0, "step": 20725 }, { "epoch": 183.6283185840708, "grad_norm": 9.05165416043019e-06, "learning_rate": 2.9869597989949748e-05, "loss": 0.0, "step": 20750 }, { "epoch": 183.8495575221239, "grad_norm": 9.177663741866127e-06, "learning_rate": 2.9860175879396983e-05, "loss": 0.0, "step": 20775 }, { "epoch": 184.07079646017698, "grad_norm": 8.41005294205388e-06, "learning_rate": 2.985075376884422e-05, "loss": 0.0, "step": 20800 }, { "epoch": 184.2920353982301, "grad_norm": 8.519081347913016e-06, "learning_rate": 2.9841331658291455e-05, "loss": 0.0, "step": 20825 }, { "epoch": 184.5132743362832, "grad_norm": 8.532471838407218e-06, "learning_rate": 2.9831909547738693e-05, "loss": 0.0, "step": 20850 }, { "epoch": 184.73451327433628, "grad_norm": 8.494052053720225e-06, "learning_rate": 2.9822487437185924e-05, "loss": 0.0, "step": 20875 }, { "epoch": 184.95575221238937, "grad_norm": 8.600711225881241e-06, "learning_rate": 2.9813065326633162e-05, "loss": 0.0, "step": 20900 }, { "epoch": 185.1769911504425, "grad_norm": 7.96441327111097e-06, "learning_rate": 2.98036432160804e-05, "loss": 0.0, "step": 20925 }, { "epoch": 185.39823008849558, "grad_norm": 7.965331860759761e-06, "learning_rate": 2.9794221105527635e-05, "loss": 0.0, "step": 20950 }, { "epoch": 185.61946902654867, "grad_norm": 8.03299462859286e-06, "learning_rate": 2.9784798994974872e-05, "loss": 0.0, "step": 20975 }, { "epoch": 185.84070796460176, "grad_norm": 8.232344953285065e-06, "learning_rate": 2.9775376884422107e-05, "loss": 0.0, "step": 21000 }, { "epoch": 186.06194690265488, "grad_norm": 8.109732334560249e-06, "learning_rate": 2.9765954773869345e-05, "loss": 0.0, "step": 21025 }, { "epoch": 186.28318584070797, "grad_norm": 7.807308065821417e-06, "learning_rate": 2.975653266331658e-05, "loss": 0.0, "step": 21050 }, { "epoch": 186.50442477876106, "grad_norm": 7.652526619494893e-06, "learning_rate": 2.9747110552763817e-05, "loss": 0.0, "step": 21075 }, { "epoch": 186.72566371681415, "grad_norm": 7.834957614250015e-06, "learning_rate": 2.9737688442211052e-05, "loss": 0.0, "step": 21100 }, { "epoch": 186.94690265486724, "grad_norm": 8.043120033107698e-06, "learning_rate": 2.972826633165829e-05, "loss": 0.0, "step": 21125 }, { "epoch": 187.16814159292036, "grad_norm": 7.521067800553283e-06, "learning_rate": 2.9718844221105528e-05, "loss": 0.0, "step": 21150 }, { "epoch": 187.38938053097345, "grad_norm": 7.46522755434853e-06, "learning_rate": 2.9709422110552762e-05, "loss": 0.0, "step": 21175 }, { "epoch": 187.61061946902655, "grad_norm": 7.82967617851682e-06, "learning_rate": 2.97e-05, "loss": 0.0, "step": 21200 }, { "epoch": 187.83185840707964, "grad_norm": 7.881420060584787e-06, "learning_rate": 2.969057788944723e-05, "loss": 0.0, "step": 21225 }, { "epoch": 188.05309734513276, "grad_norm": 6.979531917750137e-06, "learning_rate": 2.968115577889447e-05, "loss": 0.0, "step": 21250 }, { "epoch": 188.27433628318585, "grad_norm": 7.100856237229891e-06, "learning_rate": 2.9671733668341704e-05, "loss": 0.0, "step": 21275 }, { "epoch": 188.49557522123894, "grad_norm": 7.07072786099161e-06, "learning_rate": 2.9662311557788942e-05, "loss": 0.0, "step": 21300 }, { "epoch": 188.71681415929203, "grad_norm": 7.183413799793925e-06, "learning_rate": 2.9652889447236176e-05, "loss": 0.0, "step": 21325 }, { "epoch": 188.93805309734512, "grad_norm": 7.501171694457298e-06, "learning_rate": 2.9643467336683414e-05, "loss": 0.0, "step": 21350 }, { "epoch": 189.15929203539824, "grad_norm": 6.794829914724687e-06, "learning_rate": 2.9634045226130652e-05, "loss": 0.0, "step": 21375 }, { "epoch": 189.38053097345133, "grad_norm": 6.914126061019488e-06, "learning_rate": 2.9624623115577887e-05, "loss": 0.0, "step": 21400 }, { "epoch": 189.60176991150442, "grad_norm": 6.977436441957252e-06, "learning_rate": 2.9615201005025125e-05, "loss": 0.0, "step": 21425 }, { "epoch": 189.8230088495575, "grad_norm": 7.054320121824276e-06, "learning_rate": 2.960577889447236e-05, "loss": 0.0, "step": 21450 }, { "epoch": 190.04424778761063, "grad_norm": 6.893785666761687e-06, "learning_rate": 2.9596356783919597e-05, "loss": 0.0, "step": 21475 }, { "epoch": 190.26548672566372, "grad_norm": 6.4556861616438255e-06, "learning_rate": 2.958693467336683e-05, "loss": 0.0, "step": 21500 }, { "epoch": 190.4867256637168, "grad_norm": 6.400185156962834e-06, "learning_rate": 2.957751256281407e-05, "loss": 0.0, "step": 21525 }, { "epoch": 190.7079646017699, "grad_norm": 6.802868028898956e-06, "learning_rate": 2.9568090452261308e-05, "loss": 0.0, "step": 21550 }, { "epoch": 190.92920353982302, "grad_norm": 6.753615252819145e-06, "learning_rate": 2.955866834170854e-05, "loss": 0.0, "step": 21575 }, { "epoch": 191.1504424778761, "grad_norm": 6.678891622868832e-06, "learning_rate": 2.9549246231155777e-05, "loss": 0.0, "step": 21600 }, { "epoch": 191.3716814159292, "grad_norm": 6.341920652630506e-06, "learning_rate": 2.953982412060301e-05, "loss": 0.0, "step": 21625 }, { "epoch": 191.5929203539823, "grad_norm": 6.426645541068865e-06, "learning_rate": 2.953040201005025e-05, "loss": 0.0, "step": 21650 }, { "epoch": 191.81415929203538, "grad_norm": 6.5284907577733975e-06, "learning_rate": 2.9520979899497484e-05, "loss": 0.0, "step": 21675 }, { "epoch": 192.0353982300885, "grad_norm": 6.07876881986158e-06, "learning_rate": 2.951155778894472e-05, "loss": 0.0, "step": 21700 }, { "epoch": 192.2566371681416, "grad_norm": 6.064452463760972e-06, "learning_rate": 2.9502135678391956e-05, "loss": 0.0, "step": 21725 }, { "epoch": 192.47787610619469, "grad_norm": 6.213407687027939e-06, "learning_rate": 2.9492713567839194e-05, "loss": 0.0, "step": 21750 }, { "epoch": 192.69911504424778, "grad_norm": 6.2289368543133605e-06, "learning_rate": 2.9483291457286432e-05, "loss": 0.0, "step": 21775 }, { "epoch": 192.9203539823009, "grad_norm": 6.28187217444065e-06, "learning_rate": 2.9473869346733666e-05, "loss": 0.0, "step": 21800 }, { "epoch": 193.141592920354, "grad_norm": 5.68640825804323e-06, "learning_rate": 2.9464447236180904e-05, "loss": 0.0, "step": 21825 }, { "epoch": 193.36283185840708, "grad_norm": 5.809448794025229e-06, "learning_rate": 2.9455025125628136e-05, "loss": 0.0, "step": 21850 }, { "epoch": 193.58407079646017, "grad_norm": 5.8924356380885e-06, "learning_rate": 2.9445603015075377e-05, "loss": 0.0, "step": 21875 }, { "epoch": 193.8053097345133, "grad_norm": 5.827768745803041e-06, "learning_rate": 2.9436180904522608e-05, "loss": 0.0, "step": 21900 }, { "epoch": 194.02654867256638, "grad_norm": 5.8385344345879275e-06, "learning_rate": 2.9426758793969846e-05, "loss": 0.0, "step": 21925 }, { "epoch": 194.24778761061947, "grad_norm": 5.994930688757449e-06, "learning_rate": 2.941733668341708e-05, "loss": 0.0, "step": 21950 }, { "epoch": 194.46902654867256, "grad_norm": 6.0387483244994655e-06, "learning_rate": 2.940791457286432e-05, "loss": 0.0, "step": 21975 }, { "epoch": 194.69026548672565, "grad_norm": 5.504535693034995e-06, "learning_rate": 2.9398492462311556e-05, "loss": 0.0, "step": 22000 }, { "epoch": 194.91150442477877, "grad_norm": 5.963975127087906e-06, "learning_rate": 2.938907035175879e-05, "loss": 0.0, "step": 22025 }, { "epoch": 195.13274336283186, "grad_norm": 5.810727088828571e-06, "learning_rate": 2.937964824120603e-05, "loss": 0.0, "step": 22050 }, { "epoch": 195.35398230088495, "grad_norm": 5.940052687947173e-06, "learning_rate": 2.9370226130653263e-05, "loss": 0.0, "step": 22075 }, { "epoch": 195.57522123893804, "grad_norm": 5.644542397931218e-06, "learning_rate": 2.93608040201005e-05, "loss": 0.0, "step": 22100 }, { "epoch": 195.79646017699116, "grad_norm": 5.661124760081293e-06, "learning_rate": 2.9351381909547736e-05, "loss": 0.0, "step": 22125 }, { "epoch": 196.01769911504425, "grad_norm": 5.248576144367689e-06, "learning_rate": 2.9341959798994974e-05, "loss": 0.0, "step": 22150 }, { "epoch": 196.23893805309734, "grad_norm": 5.558731572818942e-06, "learning_rate": 2.9332537688442205e-05, "loss": 0.0, "step": 22175 }, { "epoch": 196.46017699115043, "grad_norm": 5.507940386451082e-06, "learning_rate": 2.9323115577889443e-05, "loss": 0.0, "step": 22200 }, { "epoch": 196.68141592920355, "grad_norm": 5.653538210026454e-06, "learning_rate": 2.9313693467336684e-05, "loss": 0.0, "step": 22225 }, { "epoch": 196.90265486725664, "grad_norm": 5.606281774817035e-06, "learning_rate": 2.9304271356783915e-05, "loss": 0.0, "step": 22250 }, { "epoch": 197.12389380530973, "grad_norm": 5.342993063095491e-06, "learning_rate": 2.9294849246231153e-05, "loss": 0.0, "step": 22275 }, { "epoch": 197.34513274336283, "grad_norm": 5.398203938966617e-06, "learning_rate": 2.9285427135678388e-05, "loss": 0.0, "step": 22300 }, { "epoch": 197.56637168141592, "grad_norm": 5.345249974197941e-06, "learning_rate": 2.9276005025125626e-05, "loss": 0.0, "step": 22325 }, { "epoch": 197.78761061946904, "grad_norm": 5.383188636187697e-06, "learning_rate": 2.926658291457286e-05, "loss": 0.0, "step": 22350 }, { "epoch": 198.00884955752213, "grad_norm": 5.091599632578436e-06, "learning_rate": 2.9257160804020098e-05, "loss": 0.0, "step": 22375 }, { "epoch": 198.23008849557522, "grad_norm": 5.297221377986716e-06, "learning_rate": 2.9247738693467336e-05, "loss": 0.0, "step": 22400 }, { "epoch": 198.4513274336283, "grad_norm": 5.003206297260476e-06, "learning_rate": 2.923831658291457e-05, "loss": 0.0, "step": 22425 }, { "epoch": 198.67256637168143, "grad_norm": 5.3029434639029205e-06, "learning_rate": 2.922889447236181e-05, "loss": 0.0, "step": 22450 }, { "epoch": 198.89380530973452, "grad_norm": 5.500336101249559e-06, "learning_rate": 2.9219472361809043e-05, "loss": 0.0, "step": 22475 }, { "epoch": 199.1150442477876, "grad_norm": 4.95975291414652e-06, "learning_rate": 2.921005025125628e-05, "loss": 0.0, "step": 22500 }, { "epoch": 199.3362831858407, "grad_norm": 4.85020882479148e-06, "learning_rate": 2.9200628140703512e-05, "loss": 0.0, "step": 22525 }, { "epoch": 199.55752212389382, "grad_norm": 4.918050763080828e-06, "learning_rate": 2.919120603015075e-05, "loss": 0.0, "step": 22550 }, { "epoch": 199.7787610619469, "grad_norm": 5.051259904576e-06, "learning_rate": 2.9181783919597985e-05, "loss": 0.0, "step": 22575 }, { "epoch": 200.0, "grad_norm": 8.257229637820274e-06, "learning_rate": 2.9172361809045223e-05, "loss": 0.0, "step": 22600 }, { "epoch": 200.2212389380531, "grad_norm": 4.758574050356401e-06, "learning_rate": 2.916293969849246e-05, "loss": 0.0, "step": 22625 }, { "epoch": 200.44247787610618, "grad_norm": 4.7863572945061605e-06, "learning_rate": 2.9153517587939695e-05, "loss": 0.0, "step": 22650 }, { "epoch": 200.6637168141593, "grad_norm": 4.952272774971789e-06, "learning_rate": 2.9144095477386933e-05, "loss": 0.0, "step": 22675 }, { "epoch": 200.8849557522124, "grad_norm": 4.912289568892447e-06, "learning_rate": 2.9134673366834167e-05, "loss": 0.0, "step": 22700 }, { "epoch": 201.10619469026548, "grad_norm": 4.576055289362557e-06, "learning_rate": 2.9125251256281405e-05, "loss": 0.0, "step": 22725 }, { "epoch": 201.32743362831857, "grad_norm": 4.65554467155016e-06, "learning_rate": 2.911582914572864e-05, "loss": 0.0, "step": 22750 }, { "epoch": 201.5486725663717, "grad_norm": 4.6550753722840454e-06, "learning_rate": 2.9106407035175878e-05, "loss": 0.0, "step": 22775 }, { "epoch": 201.76991150442478, "grad_norm": 4.791072569787502e-06, "learning_rate": 2.9096984924623112e-05, "loss": 0.0, "step": 22800 }, { "epoch": 201.99115044247787, "grad_norm": 4.805692242371151e-06, "learning_rate": 2.908756281407035e-05, "loss": 0.0, "step": 22825 }, { "epoch": 202.21238938053096, "grad_norm": 4.502639058046043e-06, "learning_rate": 2.9078140703517588e-05, "loss": 0.0, "step": 22850 }, { "epoch": 202.43362831858408, "grad_norm": 4.582840119837783e-06, "learning_rate": 2.906871859296482e-05, "loss": 0.0, "step": 22875 }, { "epoch": 202.65486725663717, "grad_norm": 4.5448850869433954e-06, "learning_rate": 2.9059296482412057e-05, "loss": 0.0, "step": 22900 }, { "epoch": 202.87610619469027, "grad_norm": 4.764942787005566e-06, "learning_rate": 2.9049874371859292e-05, "loss": 0.0, "step": 22925 }, { "epoch": 203.09734513274336, "grad_norm": 4.542527221929049e-06, "learning_rate": 2.904045226130653e-05, "loss": 0.0, "step": 22950 }, { "epoch": 203.31858407079645, "grad_norm": 4.379033271106891e-06, "learning_rate": 2.9031030150753764e-05, "loss": 0.0, "step": 22975 }, { "epoch": 203.53982300884957, "grad_norm": 4.544633156911004e-06, "learning_rate": 2.9021608040201002e-05, "loss": 0.0, "step": 23000 }, { "epoch": 203.76106194690266, "grad_norm": 4.517730758379912e-06, "learning_rate": 2.901218592964824e-05, "loss": 0.0, "step": 23025 }, { "epoch": 203.98230088495575, "grad_norm": 4.727245595859131e-06, "learning_rate": 2.9002763819095475e-05, "loss": 0.0, "step": 23050 }, { "epoch": 204.20353982300884, "grad_norm": 4.11316023019026e-06, "learning_rate": 2.8993341708542713e-05, "loss": 0.0, "step": 23075 }, { "epoch": 204.42477876106196, "grad_norm": 4.371562226879178e-06, "learning_rate": 2.8983919597989947e-05, "loss": 0.0, "step": 23100 }, { "epoch": 204.64601769911505, "grad_norm": 4.291971436032327e-06, "learning_rate": 2.8974497487437185e-05, "loss": 0.0, "step": 23125 }, { "epoch": 204.86725663716814, "grad_norm": 4.477180482354015e-06, "learning_rate": 2.896507537688442e-05, "loss": 0.0, "step": 23150 }, { "epoch": 205.08849557522123, "grad_norm": 4.0163149606087245e-06, "learning_rate": 2.8955653266331658e-05, "loss": 0.0, "step": 23175 }, { "epoch": 205.30973451327435, "grad_norm": 4.058101239934331e-06, "learning_rate": 2.894623115577889e-05, "loss": 0.0, "step": 23200 }, { "epoch": 205.53097345132744, "grad_norm": 4.150575477979146e-06, "learning_rate": 2.8936809045226127e-05, "loss": 0.0, "step": 23225 }, { "epoch": 205.75221238938053, "grad_norm": 4.366595021565445e-06, "learning_rate": 2.8927386934673365e-05, "loss": 0.0, "step": 23250 }, { "epoch": 205.97345132743362, "grad_norm": 4.177671598881716e-06, "learning_rate": 2.89179648241206e-05, "loss": 0.0, "step": 23275 }, { "epoch": 206.1946902654867, "grad_norm": 4.084758529643295e-06, "learning_rate": 2.8908542713567837e-05, "loss": 0.0, "step": 23300 }, { "epoch": 206.41592920353983, "grad_norm": 3.913832188118249e-06, "learning_rate": 2.889912060301507e-05, "loss": 0.0, "step": 23325 }, { "epoch": 206.63716814159292, "grad_norm": 3.9625838326173835e-06, "learning_rate": 2.888969849246231e-05, "loss": 0.0, "step": 23350 }, { "epoch": 206.858407079646, "grad_norm": 4.1443490772508085e-06, "learning_rate": 2.8880276381909544e-05, "loss": 0.0, "step": 23375 }, { "epoch": 207.0796460176991, "grad_norm": 4.025645921501564e-06, "learning_rate": 2.8870854271356782e-05, "loss": 0.0, "step": 23400 }, { "epoch": 207.30088495575222, "grad_norm": 4.0323920984519646e-06, "learning_rate": 2.8861432160804016e-05, "loss": 0.0, "step": 23425 }, { "epoch": 207.52212389380531, "grad_norm": 4.217435161990579e-06, "learning_rate": 2.8852010050251254e-05, "loss": 0.0, "step": 23450 }, { "epoch": 207.7433628318584, "grad_norm": 4.1026014514500275e-06, "learning_rate": 2.8842587939698492e-05, "loss": 0.0, "step": 23475 }, { "epoch": 207.9646017699115, "grad_norm": 4.08825417252956e-06, "learning_rate": 2.8833165829145727e-05, "loss": 0.0, "step": 23500 }, { "epoch": 208.18584070796462, "grad_norm": 3.819317953457357e-06, "learning_rate": 2.8823743718592965e-05, "loss": 0.0, "step": 23525 }, { "epoch": 208.4070796460177, "grad_norm": 3.761010248126695e-06, "learning_rate": 2.8814321608040196e-05, "loss": 0.0, "step": 23550 }, { "epoch": 208.6283185840708, "grad_norm": 3.867386112688109e-06, "learning_rate": 2.8804899497487434e-05, "loss": 0.0, "step": 23575 }, { "epoch": 208.8495575221239, "grad_norm": 4.078326583112357e-06, "learning_rate": 2.879547738693467e-05, "loss": 0.0, "step": 23600 }, { "epoch": 209.07079646017698, "grad_norm": 3.660451966425171e-06, "learning_rate": 2.8786055276381906e-05, "loss": 0.0, "step": 23625 }, { "epoch": 209.2920353982301, "grad_norm": 3.874445610563271e-06, "learning_rate": 2.8776633165829144e-05, "loss": 0.0, "step": 23650 }, { "epoch": 209.5132743362832, "grad_norm": 3.6638944038713817e-06, "learning_rate": 2.876721105527638e-05, "loss": 0.0, "step": 23675 }, { "epoch": 209.73451327433628, "grad_norm": 3.819286575890146e-06, "learning_rate": 2.8757788944723617e-05, "loss": 0.0, "step": 23700 }, { "epoch": 209.95575221238937, "grad_norm": 3.7897311813139822e-06, "learning_rate": 2.874836683417085e-05, "loss": 0.0, "step": 23725 }, { "epoch": 210.1769911504425, "grad_norm": 3.7072175018693088e-06, "learning_rate": 2.873894472361809e-05, "loss": 0.0, "step": 23750 }, { "epoch": 210.39823008849558, "grad_norm": 3.6511382859316655e-06, "learning_rate": 2.8729522613065324e-05, "loss": 0.0, "step": 23775 }, { "epoch": 210.61946902654867, "grad_norm": 3.869078682328109e-06, "learning_rate": 2.872010050251256e-05, "loss": 0.0, "step": 23800 }, { "epoch": 210.84070796460176, "grad_norm": 3.812464228758472e-06, "learning_rate": 2.8710678391959796e-05, "loss": 0.0, "step": 23825 }, { "epoch": 211.06194690265488, "grad_norm": 3.499156491670874e-06, "learning_rate": 2.8701256281407034e-05, "loss": 0.0, "step": 23850 }, { "epoch": 211.28318584070797, "grad_norm": 3.49085166817531e-06, "learning_rate": 2.8691834170854272e-05, "loss": 0.0, "step": 23875 }, { "epoch": 211.50442477876106, "grad_norm": 3.6542332964017987e-06, "learning_rate": 2.8682412060301503e-05, "loss": 0.0, "step": 23900 }, { "epoch": 211.72566371681415, "grad_norm": 3.660668880911544e-06, "learning_rate": 2.867298994974874e-05, "loss": 0.0, "step": 23925 }, { "epoch": 211.94690265486724, "grad_norm": 3.6799556255573407e-06, "learning_rate": 2.8663567839195976e-05, "loss": 0.0, "step": 23950 }, { "epoch": 212.16814159292036, "grad_norm": 3.440866521486896e-06, "learning_rate": 2.8654145728643214e-05, "loss": 0.0, "step": 23975 }, { "epoch": 212.38938053097345, "grad_norm": 3.556890533218393e-06, "learning_rate": 2.8644723618090448e-05, "loss": 0.0, "step": 24000 }, { "epoch": 212.61061946902655, "grad_norm": 3.826519332505995e-06, "learning_rate": 2.8635301507537686e-05, "loss": 0.0, "step": 24025 }, { "epoch": 212.83185840707964, "grad_norm": 4.028297098557232e-06, "learning_rate": 2.862587939698492e-05, "loss": 0.0, "step": 24050 }, { "epoch": 213.05309734513276, "grad_norm": 3.539183580869576e-06, "learning_rate": 2.861645728643216e-05, "loss": 0.0, "step": 24075 }, { "epoch": 213.27433628318585, "grad_norm": 3.6705278034787625e-06, "learning_rate": 2.8607035175879396e-05, "loss": 0.0, "step": 24100 }, { "epoch": 213.49557522123894, "grad_norm": 3.624307737482013e-06, "learning_rate": 2.859761306532663e-05, "loss": 0.0, "step": 24125 }, { "epoch": 213.71681415929203, "grad_norm": 3.972299055021722e-06, "learning_rate": 2.858819095477387e-05, "loss": 0.0, "step": 24150 }, { "epoch": 213.93805309734512, "grad_norm": 3.7415024962683674e-06, "learning_rate": 2.8578768844221103e-05, "loss": 0.0, "step": 24175 }, { "epoch": 214.15929203539824, "grad_norm": 3.774849119508872e-06, "learning_rate": 2.856934673366834e-05, "loss": 0.0, "step": 24200 }, { "epoch": 214.38053097345133, "grad_norm": 3.524487510730978e-06, "learning_rate": 2.8559924623115573e-05, "loss": 0.0, "step": 24225 }, { "epoch": 214.60176991150442, "grad_norm": 3.616952881202451e-06, "learning_rate": 2.855050251256281e-05, "loss": 0.0, "step": 24250 }, { "epoch": 214.8230088495575, "grad_norm": 3.7026677546236897e-06, "learning_rate": 2.854108040201005e-05, "loss": 0.0, "step": 24275 }, { "epoch": 215.04424778761063, "grad_norm": 3.4485769901948515e-06, "learning_rate": 2.8531658291457283e-05, "loss": 0.0, "step": 24300 }, { "epoch": 215.26548672566372, "grad_norm": 3.3579829050722765e-06, "learning_rate": 2.852223618090452e-05, "loss": 0.0, "step": 24325 }, { "epoch": 215.4867256637168, "grad_norm": 3.673024366435129e-06, "learning_rate": 2.8512814070351755e-05, "loss": 0.0, "step": 24350 }, { "epoch": 215.7079646017699, "grad_norm": 3.5174407457816415e-06, "learning_rate": 2.8503391959798993e-05, "loss": 0.0, "step": 24375 }, { "epoch": 215.92920353982302, "grad_norm": 3.7813952076248825e-06, "learning_rate": 2.8493969849246228e-05, "loss": 0.0, "step": 24400 }, { "epoch": 216.1504424778761, "grad_norm": 3.497912530292524e-06, "learning_rate": 2.8484547738693466e-05, "loss": 0.0, "step": 24425 }, { "epoch": 216.3716814159292, "grad_norm": 3.534851430231356e-06, "learning_rate": 2.84751256281407e-05, "loss": 0.0, "step": 24450 }, { "epoch": 216.5929203539823, "grad_norm": 3.4597549074533163e-06, "learning_rate": 2.8465703517587938e-05, "loss": 0.0, "step": 24475 }, { "epoch": 216.81415929203538, "grad_norm": 3.5402974845055724e-06, "learning_rate": 2.8456281407035176e-05, "loss": 0.0, "step": 24500 }, { "epoch": 217.0353982300885, "grad_norm": 3.1519459753326373e-06, "learning_rate": 2.844685929648241e-05, "loss": 0.0, "step": 24525 }, { "epoch": 217.2566371681416, "grad_norm": 3.5098978514724877e-06, "learning_rate": 2.843743718592965e-05, "loss": 0.0, "step": 24550 }, { "epoch": 217.47787610619469, "grad_norm": 3.4379750104562845e-06, "learning_rate": 2.842801507537688e-05, "loss": 0.0, "step": 24575 }, { "epoch": 217.69911504424778, "grad_norm": 3.465434701865888e-06, "learning_rate": 2.8418592964824118e-05, "loss": 0.0, "step": 24600 }, { "epoch": 217.9203539823009, "grad_norm": 3.4902147945103934e-06, "learning_rate": 2.8409170854271352e-05, "loss": 0.0, "step": 24625 }, { "epoch": 218.141592920354, "grad_norm": 3.40101678375504e-06, "learning_rate": 2.839974874371859e-05, "loss": 0.0, "step": 24650 }, { "epoch": 218.36283185840708, "grad_norm": 3.457834054643172e-06, "learning_rate": 2.8390326633165825e-05, "loss": 0.0, "step": 24675 }, { "epoch": 218.58407079646017, "grad_norm": 3.272283038313617e-06, "learning_rate": 2.8381281407035174e-05, "loss": 0.0, "step": 24700 }, { "epoch": 218.8053097345133, "grad_norm": 3.3732633255567634e-06, "learning_rate": 2.837185929648241e-05, "loss": 0.0, "step": 24725 }, { "epoch": 219.02654867256638, "grad_norm": 3.1152435440162662e-06, "learning_rate": 2.8362437185929647e-05, "loss": 0.0, "step": 24750 }, { "epoch": 219.24778761061947, "grad_norm": 2.986584377140389e-06, "learning_rate": 2.8353391959798993e-05, "loss": 0.0, "step": 24775 }, { "epoch": 219.46902654867256, "grad_norm": 2.9653706405952107e-06, "learning_rate": 2.8343969849246228e-05, "loss": 0.0, "step": 24800 }, { "epoch": 219.69026548672565, "grad_norm": 2.9107084174029296e-06, "learning_rate": 2.8334547738693466e-05, "loss": 0.0, "step": 24825 }, { "epoch": 219.91150442477877, "grad_norm": 3.0009534839336993e-06, "learning_rate": 2.83251256281407e-05, "loss": 0.0, "step": 24850 }, { "epoch": 220.13274336283186, "grad_norm": 2.8949837087566266e-06, "learning_rate": 2.8315703517587938e-05, "loss": 0.0, "step": 24875 }, { "epoch": 220.35398230088495, "grad_norm": 2.9404411634459393e-06, "learning_rate": 2.8306281407035176e-05, "loss": 0.0, "step": 24900 }, { "epoch": 220.57522123893804, "grad_norm": 5.873576641082764, "learning_rate": 2.8299497487437182e-05, "loss": 0.0635, "step": 24925 }, { "epoch": 220.79646017699116, "grad_norm": 2.8870577812194824, "learning_rate": 2.8290075376884417e-05, "loss": 0.0802, "step": 24950 }, { "epoch": 221.01769911504425, "grad_norm": 2.590811014175415, "learning_rate": 2.8280653266331655e-05, "loss": 0.0551, "step": 24975 }, { "epoch": 221.23893805309734, "grad_norm": 2.084059000015259, "learning_rate": 2.8271231155778893e-05, "loss": 0.0363, "step": 25000 }, { "epoch": 221.23893805309734, "eval_loss": 0.5292547941207886, "eval_runtime": 71.3615, "eval_samples_per_second": 201.551, "eval_steps_per_second": 1.583, "eval_wer": 25.039449270838755, "step": 25000 }, { "epoch": 221.46017699115043, "grad_norm": 1.8522469997406006, "learning_rate": 2.8261809045226127e-05, "loss": 0.03, "step": 25025 }, { "epoch": 221.68141592920355, "grad_norm": 2.119513988494873, "learning_rate": 2.8252386934673365e-05, "loss": 0.0252, "step": 25050 }, { "epoch": 221.90265486725664, "grad_norm": 1.778257131576538, "learning_rate": 2.82429648241206e-05, "loss": 0.0226, "step": 25075 }, { "epoch": 222.12389380530973, "grad_norm": 1.1784181594848633, "learning_rate": 2.8233542713567838e-05, "loss": 0.0148, "step": 25100 }, { "epoch": 222.34513274336283, "grad_norm": 1.232800841331482, "learning_rate": 2.8224120603015072e-05, "loss": 0.0118, "step": 25125 }, { "epoch": 222.56637168141592, "grad_norm": 1.2326799631118774, "learning_rate": 2.821469849246231e-05, "loss": 0.0126, "step": 25150 }, { "epoch": 222.78761061946904, "grad_norm": 1.2790793180465698, "learning_rate": 2.8205276381909548e-05, "loss": 0.0121, "step": 25175 }, { "epoch": 223.00884955752213, "grad_norm": 1.3625150918960571, "learning_rate": 2.8195854271356782e-05, "loss": 0.0106, "step": 25200 }, { "epoch": 223.23008849557522, "grad_norm": 1.0286155939102173, "learning_rate": 2.818643216080402e-05, "loss": 0.0058, "step": 25225 }, { "epoch": 223.4513274336283, "grad_norm": 1.4569597244262695, "learning_rate": 2.817701005025125e-05, "loss": 0.006, "step": 25250 }, { "epoch": 223.67256637168143, "grad_norm": 0.8602577447891235, "learning_rate": 2.816758793969849e-05, "loss": 0.0062, "step": 25275 }, { "epoch": 223.89380530973452, "grad_norm": 0.6840894222259521, "learning_rate": 2.8158165829145724e-05, "loss": 0.0066, "step": 25300 }, { "epoch": 224.1150442477876, "grad_norm": 0.656204879283905, "learning_rate": 2.8148743718592962e-05, "loss": 0.0049, "step": 25325 }, { "epoch": 224.3362831858407, "grad_norm": 0.7234305739402771, "learning_rate": 2.8139321608040196e-05, "loss": 0.0034, "step": 25350 }, { "epoch": 224.55752212389382, "grad_norm": 0.6161668300628662, "learning_rate": 2.8129899497487434e-05, "loss": 0.0031, "step": 25375 }, { "epoch": 224.7787610619469, "grad_norm": 0.7171826958656311, "learning_rate": 2.8120477386934672e-05, "loss": 0.0032, "step": 25400 }, { "epoch": 225.0, "grad_norm": 1.513700246810913, "learning_rate": 2.8111055276381907e-05, "loss": 0.0032, "step": 25425 }, { "epoch": 225.2212389380531, "grad_norm": 0.7853274941444397, "learning_rate": 2.8101633165829145e-05, "loss": 0.002, "step": 25450 }, { "epoch": 225.44247787610618, "grad_norm": 0.2510075271129608, "learning_rate": 2.809221105527638e-05, "loss": 0.0017, "step": 25475 }, { "epoch": 225.6637168141593, "grad_norm": 0.43564218282699585, "learning_rate": 2.8082788944723617e-05, "loss": 0.0019, "step": 25500 }, { "epoch": 225.8849557522124, "grad_norm": 0.4265574812889099, "learning_rate": 2.8073366834170852e-05, "loss": 0.0021, "step": 25525 }, { "epoch": 226.10619469026548, "grad_norm": 0.49338820576667786, "learning_rate": 2.806394472361809e-05, "loss": 0.0017, "step": 25550 }, { "epoch": 226.32743362831857, "grad_norm": 0.48643386363983154, "learning_rate": 2.805452261306532e-05, "loss": 0.0015, "step": 25575 }, { "epoch": 226.5486725663717, "grad_norm": 0.25217169523239136, "learning_rate": 2.804510050251256e-05, "loss": 0.0011, "step": 25600 }, { "epoch": 226.76991150442478, "grad_norm": 0.33475595712661743, "learning_rate": 2.8035678391959797e-05, "loss": 0.0012, "step": 25625 }, { "epoch": 226.99115044247787, "grad_norm": 0.2797233462333679, "learning_rate": 2.802625628140703e-05, "loss": 0.0014, "step": 25650 }, { "epoch": 227.21238938053096, "grad_norm": 0.07988627254962921, "learning_rate": 2.801683417085427e-05, "loss": 0.0006, "step": 25675 }, { "epoch": 227.43362831858408, "grad_norm": 0.12824037671089172, "learning_rate": 2.8007412060301504e-05, "loss": 0.0008, "step": 25700 }, { "epoch": 227.65486725663717, "grad_norm": 0.6536170244216919, "learning_rate": 2.799798994974874e-05, "loss": 0.0007, "step": 25725 }, { "epoch": 227.87610619469027, "grad_norm": 0.24221020936965942, "learning_rate": 2.7988567839195976e-05, "loss": 0.0008, "step": 25750 }, { "epoch": 228.09734513274336, "grad_norm": 0.8189306855201721, "learning_rate": 2.7979145728643214e-05, "loss": 0.0007, "step": 25775 }, { "epoch": 228.31858407079645, "grad_norm": 0.19710151851177216, "learning_rate": 2.7969723618090452e-05, "loss": 0.0004, "step": 25800 }, { "epoch": 228.53982300884957, "grad_norm": 0.086395762860775, "learning_rate": 2.7960301507537687e-05, "loss": 0.0005, "step": 25825 }, { "epoch": 228.76106194690266, "grad_norm": 0.18860171735286713, "learning_rate": 2.7950879396984925e-05, "loss": 0.0006, "step": 25850 }, { "epoch": 228.98230088495575, "grad_norm": 0.27321094274520874, "learning_rate": 2.794145728643216e-05, "loss": 0.0008, "step": 25875 }, { "epoch": 229.20353982300884, "grad_norm": 0.03148556873202324, "learning_rate": 2.7932035175879397e-05, "loss": 0.0006, "step": 25900 }, { "epoch": 229.42477876106196, "grad_norm": 0.1658611297607422, "learning_rate": 2.7922613065326628e-05, "loss": 0.0003, "step": 25925 }, { "epoch": 229.64601769911505, "grad_norm": 0.07361777126789093, "learning_rate": 2.7913190954773866e-05, "loss": 0.0005, "step": 25950 }, { "epoch": 229.86725663716814, "grad_norm": 0.03050021454691887, "learning_rate": 2.79037688442211e-05, "loss": 0.0003, "step": 25975 }, { "epoch": 230.08849557522123, "grad_norm": 0.41663214564323425, "learning_rate": 2.789434673366834e-05, "loss": 0.0004, "step": 26000 }, { "epoch": 230.30973451327435, "grad_norm": 0.022311236709356308, "learning_rate": 2.7884924623115576e-05, "loss": 0.0002, "step": 26025 }, { "epoch": 230.53097345132744, "grad_norm": 0.04241912066936493, "learning_rate": 2.787550251256281e-05, "loss": 0.0005, "step": 26050 }, { "epoch": 230.75221238938053, "grad_norm": 0.35651203989982605, "learning_rate": 2.786608040201005e-05, "loss": 0.0003, "step": 26075 }, { "epoch": 230.97345132743362, "grad_norm": 0.11513473093509674, "learning_rate": 2.7856658291457283e-05, "loss": 0.0005, "step": 26100 }, { "epoch": 231.1946902654867, "grad_norm": 0.30350300669670105, "learning_rate": 2.784723618090452e-05, "loss": 0.0005, "step": 26125 }, { "epoch": 231.41592920353983, "grad_norm": 0.6337562203407288, "learning_rate": 2.7837814070351756e-05, "loss": 0.0009, "step": 26150 }, { "epoch": 231.63716814159292, "grad_norm": 0.7494382858276367, "learning_rate": 2.7828391959798994e-05, "loss": 0.0009, "step": 26175 }, { "epoch": 231.858407079646, "grad_norm": 0.2127227783203125, "learning_rate": 2.781896984924623e-05, "loss": 0.0006, "step": 26200 }, { "epoch": 232.0796460176991, "grad_norm": 0.2748030424118042, "learning_rate": 2.7809547738693466e-05, "loss": 0.0005, "step": 26225 }, { "epoch": 232.30088495575222, "grad_norm": 0.7703046798706055, "learning_rate": 2.7800125628140704e-05, "loss": 0.0007, "step": 26250 }, { "epoch": 232.52212389380531, "grad_norm": 0.3942156434059143, "learning_rate": 2.7790703517587935e-05, "loss": 0.0008, "step": 26275 }, { "epoch": 232.7433628318584, "grad_norm": 0.18233156204223633, "learning_rate": 2.7781281407035173e-05, "loss": 0.0008, "step": 26300 }, { "epoch": 232.9646017699115, "grad_norm": 0.12765996158123016, "learning_rate": 2.7771859296482408e-05, "loss": 0.0008, "step": 26325 }, { "epoch": 233.18584070796462, "grad_norm": 0.5586936473846436, "learning_rate": 2.7762437185929646e-05, "loss": 0.0006, "step": 26350 }, { "epoch": 233.4070796460177, "grad_norm": 0.14822673797607422, "learning_rate": 2.775301507537688e-05, "loss": 0.0004, "step": 26375 }, { "epoch": 233.6283185840708, "grad_norm": 0.4406028091907501, "learning_rate": 2.7743592964824118e-05, "loss": 0.0004, "step": 26400 }, { "epoch": 233.8495575221239, "grad_norm": 0.3573651611804962, "learning_rate": 2.7734170854271356e-05, "loss": 0.0007, "step": 26425 }, { "epoch": 234.07079646017698, "grad_norm": 0.09157952666282654, "learning_rate": 2.772474874371859e-05, "loss": 0.0005, "step": 26450 }, { "epoch": 234.2920353982301, "grad_norm": 0.2806888222694397, "learning_rate": 2.771532663316583e-05, "loss": 0.0007, "step": 26475 }, { "epoch": 234.5132743362832, "grad_norm": 0.8979818820953369, "learning_rate": 2.7705904522613063e-05, "loss": 0.0014, "step": 26500 }, { "epoch": 234.73451327433628, "grad_norm": 0.416824609041214, "learning_rate": 2.76964824120603e-05, "loss": 0.0013, "step": 26525 }, { "epoch": 234.95575221238937, "grad_norm": 0.5179328322410583, "learning_rate": 2.7687060301507536e-05, "loss": 0.0018, "step": 26550 }, { "epoch": 235.1769911504425, "grad_norm": 0.5296214818954468, "learning_rate": 2.7677638190954774e-05, "loss": 0.0022, "step": 26575 }, { "epoch": 235.39823008849558, "grad_norm": 0.8587225675582886, "learning_rate": 2.7668216080402005e-05, "loss": 0.0023, "step": 26600 }, { "epoch": 235.61946902654867, "grad_norm": 0.6724536418914795, "learning_rate": 2.7658793969849243e-05, "loss": 0.0023, "step": 26625 }, { "epoch": 235.84070796460176, "grad_norm": 0.6511335968971252, "learning_rate": 2.764937185929648e-05, "loss": 0.0024, "step": 26650 }, { "epoch": 236.06194690265488, "grad_norm": 0.36268389225006104, "learning_rate": 2.7639949748743715e-05, "loss": 0.0027, "step": 26675 }, { "epoch": 236.28318584070797, "grad_norm": 0.5200737714767456, "learning_rate": 2.7630527638190953e-05, "loss": 0.0019, "step": 26700 }, { "epoch": 236.50442477876106, "grad_norm": 0.4906392991542816, "learning_rate": 2.7621105527638188e-05, "loss": 0.0018, "step": 26725 }, { "epoch": 236.72566371681415, "grad_norm": 0.6138328313827515, "learning_rate": 2.7611683417085426e-05, "loss": 0.0022, "step": 26750 }, { "epoch": 236.94690265486724, "grad_norm": 0.6424855589866638, "learning_rate": 2.760226130653266e-05, "loss": 0.0022, "step": 26775 }, { "epoch": 237.16814159292036, "grad_norm": 0.4302561283111572, "learning_rate": 2.7592839195979898e-05, "loss": 0.0022, "step": 26800 }, { "epoch": 237.38938053097345, "grad_norm": 0.3582526445388794, "learning_rate": 2.7583417085427133e-05, "loss": 0.0016, "step": 26825 }, { "epoch": 237.61061946902655, "grad_norm": 0.28528761863708496, "learning_rate": 2.757399497487437e-05, "loss": 0.0017, "step": 26850 }, { "epoch": 237.83185840707964, "grad_norm": 0.9956899881362915, "learning_rate": 2.756457286432161e-05, "loss": 0.0023, "step": 26875 }, { "epoch": 238.05309734513276, "grad_norm": 0.7663963437080383, "learning_rate": 2.7555150753768843e-05, "loss": 0.0023, "step": 26900 }, { "epoch": 238.27433628318585, "grad_norm": 0.2790527045726776, "learning_rate": 2.754572864321608e-05, "loss": 0.0017, "step": 26925 }, { "epoch": 238.49557522123894, "grad_norm": 0.4014948010444641, "learning_rate": 2.7536306532663312e-05, "loss": 0.0015, "step": 26950 }, { "epoch": 238.71681415929203, "grad_norm": 0.47131773829460144, "learning_rate": 2.752688442211055e-05, "loss": 0.0017, "step": 26975 }, { "epoch": 238.93805309734512, "grad_norm": 0.5465457439422607, "learning_rate": 2.7517462311557784e-05, "loss": 0.0022, "step": 27000 }, { "epoch": 239.15929203539824, "grad_norm": 0.5457135438919067, "learning_rate": 2.7508040201005022e-05, "loss": 0.0016, "step": 27025 }, { "epoch": 239.38053097345133, "grad_norm": 0.43242281675338745, "learning_rate": 2.7498618090452257e-05, "loss": 0.0018, "step": 27050 }, { "epoch": 239.60176991150442, "grad_norm": 0.37167754769325256, "learning_rate": 2.7489195979899495e-05, "loss": 0.0013, "step": 27075 }, { "epoch": 239.8230088495575, "grad_norm": 0.6087905764579773, "learning_rate": 2.7479773869346733e-05, "loss": 0.0019, "step": 27100 }, { "epoch": 240.04424778761063, "grad_norm": 0.13681365549564362, "learning_rate": 2.7470351758793967e-05, "loss": 0.0017, "step": 27125 }, { "epoch": 240.26548672566372, "grad_norm": 0.5421662330627441, "learning_rate": 2.7460929648241205e-05, "loss": 0.0012, "step": 27150 }, { "epoch": 240.4867256637168, "grad_norm": 0.5412272810935974, "learning_rate": 2.745150753768844e-05, "loss": 0.0011, "step": 27175 }, { "epoch": 240.7079646017699, "grad_norm": 0.3047845661640167, "learning_rate": 2.7442085427135678e-05, "loss": 0.0013, "step": 27200 }, { "epoch": 240.92920353982302, "grad_norm": 0.09835159778594971, "learning_rate": 2.743266331658291e-05, "loss": 0.0007, "step": 27225 }, { "epoch": 241.1504424778761, "grad_norm": 0.25755178928375244, "learning_rate": 2.7423241206030147e-05, "loss": 0.0006, "step": 27250 }, { "epoch": 241.3716814159292, "grad_norm": 0.2646671533584595, "learning_rate": 2.7413819095477388e-05, "loss": 0.0005, "step": 27275 }, { "epoch": 241.5929203539823, "grad_norm": 0.0750853642821312, "learning_rate": 2.740439698492462e-05, "loss": 0.0006, "step": 27300 }, { "epoch": 241.81415929203538, "grad_norm": 0.23961129784584045, "learning_rate": 2.7394974874371857e-05, "loss": 0.001, "step": 27325 }, { "epoch": 242.0353982300885, "grad_norm": 0.2560804486274719, "learning_rate": 2.7385552763819092e-05, "loss": 0.0005, "step": 27350 }, { "epoch": 242.2566371681416, "grad_norm": 0.040487997233867645, "learning_rate": 2.737613065326633e-05, "loss": 0.0004, "step": 27375 }, { "epoch": 242.47787610619469, "grad_norm": 0.13907086849212646, "learning_rate": 2.7366708542713564e-05, "loss": 0.0005, "step": 27400 }, { "epoch": 242.69911504424778, "grad_norm": 0.0969061627984047, "learning_rate": 2.7357286432160802e-05, "loss": 0.0002, "step": 27425 }, { "epoch": 242.9203539823009, "grad_norm": 0.02951914817094803, "learning_rate": 2.7347864321608037e-05, "loss": 0.0002, "step": 27450 }, { "epoch": 243.141592920354, "grad_norm": 0.188517764210701, "learning_rate": 2.7338442211055275e-05, "loss": 0.0004, "step": 27475 }, { "epoch": 243.36283185840708, "grad_norm": 0.1404397040605545, "learning_rate": 2.7329020100502513e-05, "loss": 0.0002, "step": 27500 }, { "epoch": 243.58407079646017, "grad_norm": 0.036429308354854584, "learning_rate": 2.7319597989949747e-05, "loss": 0.0002, "step": 27525 }, { "epoch": 243.8053097345133, "grad_norm": 0.06644515693187714, "learning_rate": 2.7310175879396985e-05, "loss": 0.0002, "step": 27550 }, { "epoch": 244.02654867256638, "grad_norm": 0.09513240307569504, "learning_rate": 2.7300753768844216e-05, "loss": 0.0002, "step": 27575 }, { "epoch": 244.24778761061947, "grad_norm": 0.22917942702770233, "learning_rate": 2.7291331658291454e-05, "loss": 0.0001, "step": 27600 }, { "epoch": 244.46902654867256, "grad_norm": 0.03378679230809212, "learning_rate": 2.728190954773869e-05, "loss": 0.0001, "step": 27625 }, { "epoch": 244.69026548672565, "grad_norm": 0.00838454905897379, "learning_rate": 2.7272487437185927e-05, "loss": 0.0001, "step": 27650 }, { "epoch": 244.91150442477877, "grad_norm": 0.057345833629369736, "learning_rate": 2.726306532663316e-05, "loss": 0.0001, "step": 27675 }, { "epoch": 245.13274336283186, "grad_norm": 0.03553846850991249, "learning_rate": 2.72536432160804e-05, "loss": 0.0001, "step": 27700 }, { "epoch": 245.35398230088495, "grad_norm": 0.06396172195672989, "learning_rate": 2.7244221105527637e-05, "loss": 0.0001, "step": 27725 }, { "epoch": 245.57522123893804, "grad_norm": 0.005931967869400978, "learning_rate": 2.723479899497487e-05, "loss": 0.0001, "step": 27750 }, { "epoch": 245.79646017699116, "grad_norm": 0.03418682515621185, "learning_rate": 2.722537688442211e-05, "loss": 0.0001, "step": 27775 }, { "epoch": 246.01769911504425, "grad_norm": 0.003877992508932948, "learning_rate": 2.7215954773869344e-05, "loss": 0.0, "step": 27800 }, { "epoch": 246.23893805309734, "grad_norm": 0.004232319537550211, "learning_rate": 2.7206532663316582e-05, "loss": 0.0, "step": 27825 }, { "epoch": 246.46017699115043, "grad_norm": 0.0034382783342152834, "learning_rate": 2.7197110552763816e-05, "loss": 0.0, "step": 27850 }, { "epoch": 246.68141592920355, "grad_norm": 0.0032587244641035795, "learning_rate": 2.7187688442211054e-05, "loss": 0.0001, "step": 27875 }, { "epoch": 246.90265486725664, "grad_norm": 0.0030673015862703323, "learning_rate": 2.7178266331658292e-05, "loss": 0.0, "step": 27900 }, { "epoch": 247.12389380530973, "grad_norm": 0.0030242730863392353, "learning_rate": 2.7168844221105523e-05, "loss": 0.0, "step": 27925 }, { "epoch": 247.34513274336283, "grad_norm": 0.004282643552869558, "learning_rate": 2.715942211055276e-05, "loss": 0.0, "step": 27950 }, { "epoch": 247.56637168141592, "grad_norm": 0.005186523776501417, "learning_rate": 2.7149999999999996e-05, "loss": 0.0001, "step": 27975 }, { "epoch": 247.78761061946904, "grad_norm": 0.0025636327918618917, "learning_rate": 2.7140577889447234e-05, "loss": 0.0, "step": 28000 }, { "epoch": 248.00884955752213, "grad_norm": 0.002726911334320903, "learning_rate": 2.713115577889447e-05, "loss": 0.0, "step": 28025 }, { "epoch": 248.23008849557522, "grad_norm": 0.0022653602063655853, "learning_rate": 2.7121733668341706e-05, "loss": 0.0, "step": 28050 }, { "epoch": 248.4513274336283, "grad_norm": 0.0024187308736145496, "learning_rate": 2.711231155778894e-05, "loss": 0.0, "step": 28075 }, { "epoch": 248.67256637168143, "grad_norm": 0.002294718287885189, "learning_rate": 2.710288944723618e-05, "loss": 0.0, "step": 28100 }, { "epoch": 248.89380530973452, "grad_norm": 0.0019996596965938807, "learning_rate": 2.7093467336683417e-05, "loss": 0.0, "step": 28125 }, { "epoch": 249.1150442477876, "grad_norm": 0.0022060812916606665, "learning_rate": 2.708404522613065e-05, "loss": 0.0, "step": 28150 }, { "epoch": 249.3362831858407, "grad_norm": 0.0018345387652516365, "learning_rate": 2.707462311557789e-05, "loss": 0.0, "step": 28175 }, { "epoch": 249.55752212389382, "grad_norm": 0.0020958762615919113, "learning_rate": 2.7065201005025124e-05, "loss": 0.0, "step": 28200 }, { "epoch": 249.7787610619469, "grad_norm": 0.10906489193439484, "learning_rate": 2.705577889447236e-05, "loss": 0.0, "step": 28225 }, { "epoch": 250.0, "grad_norm": 0.0036452373024076223, "learning_rate": 2.7046356783919593e-05, "loss": 0.0, "step": 28250 }, { "epoch": 250.2212389380531, "grad_norm": 0.0018432161305099726, "learning_rate": 2.703693467336683e-05, "loss": 0.0, "step": 28275 }, { "epoch": 250.44247787610618, "grad_norm": 0.0017212564125657082, "learning_rate": 2.7027512562814065e-05, "loss": 0.0, "step": 28300 }, { "epoch": 250.6637168141593, "grad_norm": 0.0017644118051975965, "learning_rate": 2.7018090452261303e-05, "loss": 0.0, "step": 28325 }, { "epoch": 250.8849557522124, "grad_norm": 0.0020208146888762712, "learning_rate": 2.700866834170854e-05, "loss": 0.0, "step": 28350 }, { "epoch": 251.10619469026548, "grad_norm": 0.0015584665816277266, "learning_rate": 2.6999246231155776e-05, "loss": 0.0, "step": 28375 }, { "epoch": 251.32743362831857, "grad_norm": 0.0016726481262594461, "learning_rate": 2.6989824120603014e-05, "loss": 0.0, "step": 28400 }, { "epoch": 251.5486725663717, "grad_norm": 0.0016157561913132668, "learning_rate": 2.6980402010050248e-05, "loss": 0.0, "step": 28425 }, { "epoch": 251.76991150442478, "grad_norm": 0.001734544406645, "learning_rate": 2.6970979899497486e-05, "loss": 0.0, "step": 28450 }, { "epoch": 251.99115044247787, "grad_norm": 0.0016481606289744377, "learning_rate": 2.696155778894472e-05, "loss": 0.0, "step": 28475 }, { "epoch": 252.21238938053096, "grad_norm": 0.001763722044415772, "learning_rate": 2.695213567839196e-05, "loss": 0.0, "step": 28500 }, { "epoch": 252.43362831858408, "grad_norm": 0.0015074543189257383, "learning_rate": 2.6942713567839196e-05, "loss": 0.0, "step": 28525 }, { "epoch": 252.65486725663717, "grad_norm": 0.0014283634955063462, "learning_rate": 2.693329145728643e-05, "loss": 0.0, "step": 28550 }, { "epoch": 252.87610619469027, "grad_norm": 0.001574994414113462, "learning_rate": 2.692386934673367e-05, "loss": 0.0, "step": 28575 }, { "epoch": 253.09734513274336, "grad_norm": 0.0013921967474743724, "learning_rate": 2.69144472361809e-05, "loss": 0.0, "step": 28600 }, { "epoch": 253.31858407079645, "grad_norm": 0.0014846455305814743, "learning_rate": 2.6905025125628138e-05, "loss": 0.0, "step": 28625 }, { "epoch": 253.53982300884957, "grad_norm": 0.0014029371086508036, "learning_rate": 2.6895603015075372e-05, "loss": 0.0, "step": 28650 }, { "epoch": 253.76106194690266, "grad_norm": 0.0013785924529656768, "learning_rate": 2.688618090452261e-05, "loss": 0.0, "step": 28675 }, { "epoch": 253.98230088495575, "grad_norm": 0.0013503056252375245, "learning_rate": 2.6876758793969845e-05, "loss": 0.0, "step": 28700 }, { "epoch": 254.20353982300884, "grad_norm": 0.001305821817368269, "learning_rate": 2.6867336683417083e-05, "loss": 0.0, "step": 28725 }, { "epoch": 254.42477876106196, "grad_norm": 0.0012587920064106584, "learning_rate": 2.685791457286432e-05, "loss": 0.0, "step": 28750 }, { "epoch": 254.64601769911505, "grad_norm": 0.001357461791485548, "learning_rate": 2.6848492462311555e-05, "loss": 0.0, "step": 28775 }, { "epoch": 254.86725663716814, "grad_norm": 0.0013111722655594349, "learning_rate": 2.6839070351758793e-05, "loss": 0.0, "step": 28800 }, { "epoch": 255.08849557522123, "grad_norm": 0.0011951987398788333, "learning_rate": 2.6829648241206028e-05, "loss": 0.0, "step": 28825 }, { "epoch": 255.30973451327435, "grad_norm": 0.0011971668573096395, "learning_rate": 2.6820226130653266e-05, "loss": 0.0, "step": 28850 }, { "epoch": 255.53097345132744, "grad_norm": 0.0011330896522849798, "learning_rate": 2.68108040201005e-05, "loss": 0.0, "step": 28875 }, { "epoch": 255.75221238938053, "grad_norm": 0.0012057736748829484, "learning_rate": 2.6801381909547738e-05, "loss": 0.0, "step": 28900 }, { "epoch": 255.97345132743362, "grad_norm": 0.001124461297877133, "learning_rate": 2.679195979899497e-05, "loss": 0.0, "step": 28925 }, { "epoch": 256.1946902654867, "grad_norm": 0.0011013466864824295, "learning_rate": 2.6782537688442207e-05, "loss": 0.0, "step": 28950 }, { "epoch": 256.4159292035398, "grad_norm": 0.0010865385411307216, "learning_rate": 2.6773115577889445e-05, "loss": 0.0, "step": 28975 }, { "epoch": 256.6371681415929, "grad_norm": 0.0010417738230898976, "learning_rate": 2.676369346733668e-05, "loss": 0.0, "step": 29000 }, { "epoch": 256.85840707964604, "grad_norm": 0.0011104387231171131, "learning_rate": 2.6754271356783918e-05, "loss": 0.0, "step": 29025 }, { "epoch": 257.07964601769913, "grad_norm": 0.0010395923163741827, "learning_rate": 2.6744849246231152e-05, "loss": 0.0, "step": 29050 }, { "epoch": 257.3008849557522, "grad_norm": 0.0010854215361177921, "learning_rate": 2.673542713567839e-05, "loss": 0.0, "step": 29075 }, { "epoch": 257.5221238938053, "grad_norm": 0.0010840095346793532, "learning_rate": 2.6726005025125625e-05, "loss": 0.0, "step": 29100 }, { "epoch": 257.7433628318584, "grad_norm": 0.001023492426611483, "learning_rate": 2.6716582914572863e-05, "loss": 0.0, "step": 29125 }, { "epoch": 257.9646017699115, "grad_norm": 0.0010419946629554033, "learning_rate": 2.67071608040201e-05, "loss": 0.0, "step": 29150 }, { "epoch": 258.1858407079646, "grad_norm": 0.0009355759248137474, "learning_rate": 2.6697738693467335e-05, "loss": 0.0, "step": 29175 }, { "epoch": 258.4070796460177, "grad_norm": 0.0009495199774391949, "learning_rate": 2.6688316582914573e-05, "loss": 0.0, "step": 29200 }, { "epoch": 258.62831858407077, "grad_norm": 0.0009074536501429975, "learning_rate": 2.6678894472361808e-05, "loss": 0.0, "step": 29225 }, { "epoch": 258.8495575221239, "grad_norm": 0.0009922997560352087, "learning_rate": 2.6669472361809045e-05, "loss": 0.0, "step": 29250 }, { "epoch": 259.070796460177, "grad_norm": 0.0009205940878018737, "learning_rate": 2.6660050251256277e-05, "loss": 0.0, "step": 29275 }, { "epoch": 259.2920353982301, "grad_norm": 0.0009285227861255407, "learning_rate": 2.6650628140703515e-05, "loss": 0.0, "step": 29300 }, { "epoch": 259.5132743362832, "grad_norm": 0.0008860624511726201, "learning_rate": 2.664120603015075e-05, "loss": 0.0, "step": 29325 }, { "epoch": 259.7345132743363, "grad_norm": 0.0009510797099210322, "learning_rate": 2.6631783919597987e-05, "loss": 0.0, "step": 29350 }, { "epoch": 259.95575221238937, "grad_norm": 0.0009105527424253523, "learning_rate": 2.6622361809045225e-05, "loss": 0.0, "step": 29375 }, { "epoch": 260.17699115044246, "grad_norm": 0.0008503947756253183, "learning_rate": 2.661293969849246e-05, "loss": 0.0, "step": 29400 }, { "epoch": 260.39823008849555, "grad_norm": 0.000934914976824075, "learning_rate": 2.6603517587939697e-05, "loss": 0.0, "step": 29425 }, { "epoch": 260.6194690265487, "grad_norm": 0.0008501445408910513, "learning_rate": 2.6594095477386932e-05, "loss": 0.0, "step": 29450 }, { "epoch": 260.8407079646018, "grad_norm": 0.000874404504429549, "learning_rate": 2.658467336683417e-05, "loss": 0.0, "step": 29475 }, { "epoch": 261.0619469026549, "grad_norm": 0.0007632880005985498, "learning_rate": 2.6575251256281404e-05, "loss": 0.0, "step": 29500 }, { "epoch": 261.283185840708, "grad_norm": 0.0009120817994698882, "learning_rate": 2.6565829145728642e-05, "loss": 0.0, "step": 29525 }, { "epoch": 261.50442477876106, "grad_norm": 0.0008007265860214829, "learning_rate": 2.6556407035175873e-05, "loss": 0.0, "step": 29550 }, { "epoch": 261.72566371681415, "grad_norm": 0.0008180610602721572, "learning_rate": 2.6546984924623115e-05, "loss": 0.0, "step": 29575 }, { "epoch": 261.94690265486724, "grad_norm": 0.0008290090481750667, "learning_rate": 2.6537562814070353e-05, "loss": 0.0, "step": 29600 }, { "epoch": 262.16814159292034, "grad_norm": 0.0007913378649391234, "learning_rate": 2.6528140703517584e-05, "loss": 0.0, "step": 29625 }, { "epoch": 262.3893805309734, "grad_norm": 0.0007713177474215627, "learning_rate": 2.6518718592964822e-05, "loss": 0.0, "step": 29650 }, { "epoch": 262.6106194690266, "grad_norm": 0.0007906784885562956, "learning_rate": 2.6509296482412056e-05, "loss": 0.0, "step": 29675 }, { "epoch": 262.83185840707966, "grad_norm": 0.000787511351518333, "learning_rate": 2.6499874371859294e-05, "loss": 0.0, "step": 29700 }, { "epoch": 263.05309734513276, "grad_norm": 0.0006872792146168649, "learning_rate": 2.649045226130653e-05, "loss": 0.0, "step": 29725 }, { "epoch": 263.27433628318585, "grad_norm": 0.0007398193702101707, "learning_rate": 2.6481030150753767e-05, "loss": 0.0, "step": 29750 }, { "epoch": 263.49557522123894, "grad_norm": 0.000702401390299201, "learning_rate": 2.6471608040201005e-05, "loss": 0.0, "step": 29775 }, { "epoch": 263.716814159292, "grad_norm": 0.0007661789422854781, "learning_rate": 2.646218592964824e-05, "loss": 0.0, "step": 29800 }, { "epoch": 263.9380530973451, "grad_norm": 0.0006990027613937855, "learning_rate": 2.6452763819095477e-05, "loss": 0.0, "step": 29825 }, { "epoch": 264.1592920353982, "grad_norm": 0.0007370463572442532, "learning_rate": 2.644334170854271e-05, "loss": 0.0, "step": 29850 }, { "epoch": 264.3805309734513, "grad_norm": 0.0006762351258657873, "learning_rate": 2.643391959798995e-05, "loss": 0.0, "step": 29875 }, { "epoch": 264.60176991150445, "grad_norm": 0.0006624853704124689, "learning_rate": 2.642449748743718e-05, "loss": 0.0, "step": 29900 }, { "epoch": 264.82300884955754, "grad_norm": 0.0006976353470236063, "learning_rate": 2.6415075376884422e-05, "loss": 0.0, "step": 29925 }, { "epoch": 265.04424778761063, "grad_norm": 0.0007301270961761475, "learning_rate": 2.6405653266331653e-05, "loss": 0.0, "step": 29950 }, { "epoch": 265.2654867256637, "grad_norm": 0.0006329442257992923, "learning_rate": 2.639623115577889e-05, "loss": 0.0, "step": 29975 }, { "epoch": 265.4867256637168, "grad_norm": 0.0006309518939815462, "learning_rate": 2.638680904522613e-05, "loss": 0.0, "step": 30000 }, { "epoch": 265.4867256637168, "eval_loss": 0.5855502486228943, "eval_runtime": 67.4136, "eval_samples_per_second": 213.355, "eval_steps_per_second": 1.676, "eval_wer": 19.575508505436197, "step": 30000 }, { "epoch": 265.7079646017699, "grad_norm": 0.0006442720768973231, "learning_rate": 2.6377386934673364e-05, "loss": 0.0, "step": 30025 }, { "epoch": 265.929203539823, "grad_norm": 0.0006552784470841289, "learning_rate": 2.63679648241206e-05, "loss": 0.0, "step": 30050 }, { "epoch": 266.1504424778761, "grad_norm": 0.0006232760497368872, "learning_rate": 2.6358542713567836e-05, "loss": 0.0, "step": 30075 }, { "epoch": 266.37168141592923, "grad_norm": 0.0006443847087211907, "learning_rate": 2.6349120603015074e-05, "loss": 0.0, "step": 30100 }, { "epoch": 266.5929203539823, "grad_norm": 0.0005629194201901555, "learning_rate": 2.633969849246231e-05, "loss": 0.0, "step": 30125 }, { "epoch": 266.8141592920354, "grad_norm": 0.0005832294700667262, "learning_rate": 2.6330276381909546e-05, "loss": 0.0, "step": 30150 }, { "epoch": 267.0353982300885, "grad_norm": 0.0006206338875927031, "learning_rate": 2.632085427135678e-05, "loss": 0.0, "step": 30175 }, { "epoch": 267.2566371681416, "grad_norm": 0.0006416141404770315, "learning_rate": 2.631143216080402e-05, "loss": 0.0, "step": 30200 }, { "epoch": 267.4778761061947, "grad_norm": 0.0005980608984827995, "learning_rate": 2.6302010050251257e-05, "loss": 0.0, "step": 30225 }, { "epoch": 267.6991150442478, "grad_norm": 0.0005647449870593846, "learning_rate": 2.6292587939698488e-05, "loss": 0.0, "step": 30250 }, { "epoch": 267.92035398230087, "grad_norm": 0.000651559152174741, "learning_rate": 2.6283165829145726e-05, "loss": 0.0, "step": 30275 }, { "epoch": 268.14159292035396, "grad_norm": 0.0005541588761843741, "learning_rate": 2.627374371859296e-05, "loss": 0.0, "step": 30300 }, { "epoch": 268.3628318584071, "grad_norm": 0.0005548319313675165, "learning_rate": 2.62643216080402e-05, "loss": 0.0, "step": 30325 }, { "epoch": 268.5840707964602, "grad_norm": 0.0005703563801944256, "learning_rate": 2.6254899497487433e-05, "loss": 0.0, "step": 30350 }, { "epoch": 268.8053097345133, "grad_norm": 0.0005664612981490791, "learning_rate": 2.624547738693467e-05, "loss": 0.0, "step": 30375 }, { "epoch": 269.0265486725664, "grad_norm": 0.0005971916252747178, "learning_rate": 2.623605527638191e-05, "loss": 0.0, "step": 30400 }, { "epoch": 269.24778761061947, "grad_norm": 0.0005692649865522981, "learning_rate": 2.6226633165829143e-05, "loss": 0.0, "step": 30425 }, { "epoch": 269.46902654867256, "grad_norm": 0.000539868138730526, "learning_rate": 2.621721105527638e-05, "loss": 0.0, "step": 30450 }, { "epoch": 269.69026548672565, "grad_norm": 0.0005484222783707082, "learning_rate": 2.6207788944723616e-05, "loss": 0.0, "step": 30475 }, { "epoch": 269.91150442477874, "grad_norm": 0.0004959541838616133, "learning_rate": 2.6198366834170854e-05, "loss": 0.0, "step": 30500 }, { "epoch": 270.13274336283183, "grad_norm": 0.0005242442712187767, "learning_rate": 2.6188944723618088e-05, "loss": 0.0, "step": 30525 }, { "epoch": 270.353982300885, "grad_norm": 0.0005047495360486209, "learning_rate": 2.6179522613065326e-05, "loss": 0.0, "step": 30550 }, { "epoch": 270.57522123893807, "grad_norm": 0.00044147457811050117, "learning_rate": 2.6170100502512557e-05, "loss": 0.0, "step": 30575 }, { "epoch": 270.79646017699116, "grad_norm": 0.0005342220538295805, "learning_rate": 2.6160678391959795e-05, "loss": 0.0, "step": 30600 }, { "epoch": 271.01769911504425, "grad_norm": 0.0004356440913397819, "learning_rate": 2.6151256281407033e-05, "loss": 0.0, "step": 30625 }, { "epoch": 271.23893805309734, "grad_norm": 0.0005136929103173316, "learning_rate": 2.6141834170854268e-05, "loss": 0.0, "step": 30650 }, { "epoch": 271.46017699115043, "grad_norm": 0.00047089476720429957, "learning_rate": 2.6132412060301506e-05, "loss": 0.0, "step": 30675 }, { "epoch": 271.6814159292035, "grad_norm": 0.0005520730628632009, "learning_rate": 2.612298994974874e-05, "loss": 0.0, "step": 30700 }, { "epoch": 271.9026548672566, "grad_norm": 0.0005326725076884031, "learning_rate": 2.6113567839195978e-05, "loss": 0.0, "step": 30725 }, { "epoch": 272.12389380530976, "grad_norm": 0.0004779911250807345, "learning_rate": 2.6104145728643213e-05, "loss": 0.0, "step": 30750 }, { "epoch": 272.34513274336285, "grad_norm": 0.00044374819844961166, "learning_rate": 2.609472361809045e-05, "loss": 0.0, "step": 30775 }, { "epoch": 272.56637168141594, "grad_norm": 0.00044733061804436147, "learning_rate": 2.6085301507537685e-05, "loss": 0.0, "step": 30800 }, { "epoch": 272.78761061946904, "grad_norm": 0.00046529111568816006, "learning_rate": 2.6075879396984923e-05, "loss": 0.0, "step": 30825 }, { "epoch": 273.0088495575221, "grad_norm": 0.0004266177420504391, "learning_rate": 2.606645728643216e-05, "loss": 0.0, "step": 30850 }, { "epoch": 273.2300884955752, "grad_norm": 0.00043460825690999627, "learning_rate": 2.6057035175879395e-05, "loss": 0.0, "step": 30875 }, { "epoch": 273.4513274336283, "grad_norm": 0.00040943012572824955, "learning_rate": 2.6047613065326633e-05, "loss": 0.0, "step": 30900 }, { "epoch": 273.6725663716814, "grad_norm": 0.00043179013300687075, "learning_rate": 2.6038190954773865e-05, "loss": 0.0, "step": 30925 }, { "epoch": 273.8938053097345, "grad_norm": 0.00044694042298942804, "learning_rate": 2.6028768844221102e-05, "loss": 0.0, "step": 30950 }, { "epoch": 274.11504424778764, "grad_norm": 0.0004062600783072412, "learning_rate": 2.6019346733668337e-05, "loss": 0.0, "step": 30975 }, { "epoch": 274.3362831858407, "grad_norm": 0.00042280767229385674, "learning_rate": 2.6009924623115575e-05, "loss": 0.0, "step": 31000 }, { "epoch": 274.5575221238938, "grad_norm": 0.00041896020411513746, "learning_rate": 2.6000502512562813e-05, "loss": 0.0, "step": 31025 }, { "epoch": 274.7787610619469, "grad_norm": 0.0004693892551586032, "learning_rate": 2.5991080402010047e-05, "loss": 0.0, "step": 31050 }, { "epoch": 275.0, "grad_norm": 0.0007144628907553852, "learning_rate": 2.5981658291457285e-05, "loss": 0.0, "step": 31075 }, { "epoch": 275.2212389380531, "grad_norm": 0.0003890784864779562, "learning_rate": 2.597223618090452e-05, "loss": 0.0, "step": 31100 }, { "epoch": 275.4424778761062, "grad_norm": 0.0003847080224659294, "learning_rate": 2.5962814070351758e-05, "loss": 0.0, "step": 31125 }, { "epoch": 275.6637168141593, "grad_norm": 0.0004007670213468373, "learning_rate": 2.5953391959798992e-05, "loss": 0.0, "step": 31150 }, { "epoch": 275.88495575221236, "grad_norm": 0.00041698035784065723, "learning_rate": 2.594396984924623e-05, "loss": 0.0, "step": 31175 }, { "epoch": 276.1061946902655, "grad_norm": 0.0003776402154471725, "learning_rate": 2.5934547738693465e-05, "loss": 0.0, "step": 31200 }, { "epoch": 276.3274336283186, "grad_norm": 0.0003564855142030865, "learning_rate": 2.5925125628140703e-05, "loss": 0.0, "step": 31225 }, { "epoch": 276.5486725663717, "grad_norm": 0.0003966580261476338, "learning_rate": 2.591570351758794e-05, "loss": 0.0, "step": 31250 }, { "epoch": 276.7699115044248, "grad_norm": 0.0003865899925585836, "learning_rate": 2.5906281407035172e-05, "loss": 0.0, "step": 31275 }, { "epoch": 276.9911504424779, "grad_norm": 0.0003493339172564447, "learning_rate": 2.589685929648241e-05, "loss": 0.0, "step": 31300 }, { "epoch": 277.21238938053096, "grad_norm": 0.000390585366403684, "learning_rate": 2.5887437185929644e-05, "loss": 0.0, "step": 31325 }, { "epoch": 277.43362831858406, "grad_norm": 0.00034772531944327056, "learning_rate": 2.5878015075376882e-05, "loss": 0.0, "step": 31350 }, { "epoch": 277.65486725663715, "grad_norm": 0.00036494285450316966, "learning_rate": 2.5868592964824117e-05, "loss": 0.0, "step": 31375 }, { "epoch": 277.87610619469024, "grad_norm": 0.0003528801316861063, "learning_rate": 2.5859170854271355e-05, "loss": 0.0, "step": 31400 }, { "epoch": 278.0973451327434, "grad_norm": 0.00034508478711359203, "learning_rate": 2.584974874371859e-05, "loss": 0.0, "step": 31425 }, { "epoch": 278.3185840707965, "grad_norm": 0.00037963199429214, "learning_rate": 2.5840326633165827e-05, "loss": 0.0, "step": 31450 }, { "epoch": 278.53982300884957, "grad_norm": 0.00036127326893620193, "learning_rate": 2.5830904522613065e-05, "loss": 0.0, "step": 31475 }, { "epoch": 278.76106194690266, "grad_norm": 0.0003231636655982584, "learning_rate": 2.58214824120603e-05, "loss": 0.0, "step": 31500 }, { "epoch": 278.98230088495575, "grad_norm": 0.0003470756346359849, "learning_rate": 2.5812060301507538e-05, "loss": 0.0, "step": 31525 }, { "epoch": 279.20353982300884, "grad_norm": 0.00035137683153152466, "learning_rate": 2.5802638190954772e-05, "loss": 0.0, "step": 31550 }, { "epoch": 279.42477876106193, "grad_norm": 0.00036601294414140284, "learning_rate": 2.579321608040201e-05, "loss": 0.0, "step": 31575 }, { "epoch": 279.646017699115, "grad_norm": 0.0003421303990762681, "learning_rate": 2.578379396984924e-05, "loss": 0.0, "step": 31600 }, { "epoch": 279.86725663716817, "grad_norm": 0.0003638338530436158, "learning_rate": 2.577437185929648e-05, "loss": 0.0, "step": 31625 }, { "epoch": 280.08849557522126, "grad_norm": 0.00030515462276525795, "learning_rate": 2.5764949748743714e-05, "loss": 0.0, "step": 31650 }, { "epoch": 280.30973451327435, "grad_norm": 0.0003135701408609748, "learning_rate": 2.575552763819095e-05, "loss": 0.0, "step": 31675 }, { "epoch": 280.53097345132744, "grad_norm": 0.0003205137327313423, "learning_rate": 2.574610552763819e-05, "loss": 0.0, "step": 31700 }, { "epoch": 280.75221238938053, "grad_norm": 0.00035507159191183746, "learning_rate": 2.5736683417085424e-05, "loss": 0.0, "step": 31725 }, { "epoch": 280.9734513274336, "grad_norm": 0.0003225499240215868, "learning_rate": 2.5727261306532662e-05, "loss": 0.0, "step": 31750 }, { "epoch": 281.1946902654867, "grad_norm": 0.00031038347515277565, "learning_rate": 2.5717839195979896e-05, "loss": 0.0, "step": 31775 }, { "epoch": 281.4159292035398, "grad_norm": 0.00031241224496625364, "learning_rate": 2.5708417085427134e-05, "loss": 0.0, "step": 31800 }, { "epoch": 281.6371681415929, "grad_norm": 0.00030319526558741927, "learning_rate": 2.569899497487437e-05, "loss": 0.0, "step": 31825 }, { "epoch": 281.85840707964604, "grad_norm": 0.0002971819194499403, "learning_rate": 2.5689572864321607e-05, "loss": 0.0, "step": 31850 }, { "epoch": 282.07964601769913, "grad_norm": 0.00030758813954889774, "learning_rate": 2.5680150753768845e-05, "loss": 0.0, "step": 31875 }, { "epoch": 282.3008849557522, "grad_norm": 0.00029348809039220214, "learning_rate": 2.567072864321608e-05, "loss": 0.0, "step": 31900 }, { "epoch": 282.5221238938053, "grad_norm": 0.0002968369808513671, "learning_rate": 2.5661306532663317e-05, "loss": 0.0, "step": 31925 }, { "epoch": 282.7433628318584, "grad_norm": 0.00030529630021192133, "learning_rate": 2.565188442211055e-05, "loss": 0.0, "step": 31950 }, { "epoch": 282.9646017699115, "grad_norm": 0.0002708868414629251, "learning_rate": 2.5642462311557786e-05, "loss": 0.0, "step": 31975 }, { "epoch": 283.1858407079646, "grad_norm": 0.0002621460589580238, "learning_rate": 2.563304020100502e-05, "loss": 0.0, "step": 32000 }, { "epoch": 283.4070796460177, "grad_norm": 0.000268550036707893, "learning_rate": 2.562361809045226e-05, "loss": 0.0, "step": 32025 }, { "epoch": 283.62831858407077, "grad_norm": 0.00028951873537153006, "learning_rate": 2.5614195979899493e-05, "loss": 0.0, "step": 32050 }, { "epoch": 283.8495575221239, "grad_norm": 0.00028995011234655976, "learning_rate": 2.560477386934673e-05, "loss": 0.0, "step": 32075 }, { "epoch": 284.070796460177, "grad_norm": 0.00025335431564599276, "learning_rate": 2.559535175879397e-05, "loss": 0.0, "step": 32100 }, { "epoch": 284.2920353982301, "grad_norm": 0.0002572358644101769, "learning_rate": 2.5585929648241204e-05, "loss": 0.0, "step": 32125 }, { "epoch": 284.5132743362832, "grad_norm": 0.0002447709266562015, "learning_rate": 2.557650753768844e-05, "loss": 0.0, "step": 32150 }, { "epoch": 284.7345132743363, "grad_norm": 0.0002768408157862723, "learning_rate": 2.5567085427135676e-05, "loss": 0.0, "step": 32175 }, { "epoch": 284.95575221238937, "grad_norm": 0.00026892844471149147, "learning_rate": 2.5557663316582914e-05, "loss": 0.0, "step": 32200 }, { "epoch": 285.17699115044246, "grad_norm": 0.00025187694700434804, "learning_rate": 2.5548241206030145e-05, "loss": 0.0, "step": 32225 }, { "epoch": 285.39823008849555, "grad_norm": 0.00024158631276804954, "learning_rate": 2.5538819095477387e-05, "loss": 0.0, "step": 32250 }, { "epoch": 285.6194690265487, "grad_norm": 0.00025280812405981123, "learning_rate": 2.5529396984924618e-05, "loss": 0.0, "step": 32275 }, { "epoch": 285.8407079646018, "grad_norm": 0.00024068598577287048, "learning_rate": 2.5519974874371856e-05, "loss": 0.0, "step": 32300 }, { "epoch": 286.0619469026549, "grad_norm": 0.00024800197570584714, "learning_rate": 2.5510552763819094e-05, "loss": 0.0, "step": 32325 }, { "epoch": 286.283185840708, "grad_norm": 0.0002392831665929407, "learning_rate": 2.5501130653266328e-05, "loss": 0.0, "step": 32350 }, { "epoch": 286.50442477876106, "grad_norm": 0.00023487623548135161, "learning_rate": 2.5491708542713566e-05, "loss": 0.0, "step": 32375 }, { "epoch": 286.72566371681415, "grad_norm": 0.00023236408014781773, "learning_rate": 2.54822864321608e-05, "loss": 0.0, "step": 32400 }, { "epoch": 286.94690265486724, "grad_norm": 0.00025816159904934466, "learning_rate": 2.547286432160804e-05, "loss": 0.0, "step": 32425 }, { "epoch": 287.16814159292034, "grad_norm": 0.00022557965712621808, "learning_rate": 2.5463442211055273e-05, "loss": 0.0, "step": 32450 }, { "epoch": 287.3893805309734, "grad_norm": 0.00023580105334986, "learning_rate": 2.545402010050251e-05, "loss": 0.0, "step": 32475 }, { "epoch": 287.6106194690266, "grad_norm": 0.00023050922027323395, "learning_rate": 2.544459798994975e-05, "loss": 0.0, "step": 32500 }, { "epoch": 287.83185840707966, "grad_norm": 0.00023668017820455134, "learning_rate": 2.5435175879396983e-05, "loss": 0.0, "step": 32525 }, { "epoch": 288.05309734513276, "grad_norm": 0.00022391021775547415, "learning_rate": 2.542575376884422e-05, "loss": 0.0, "step": 32550 }, { "epoch": 288.27433628318585, "grad_norm": 0.00022853816335555166, "learning_rate": 2.5416331658291453e-05, "loss": 0.0, "step": 32575 }, { "epoch": 288.49557522123894, "grad_norm": 0.00021486979676410556, "learning_rate": 2.5406909547738694e-05, "loss": 0.0, "step": 32600 }, { "epoch": 288.716814159292, "grad_norm": 0.000232308913837187, "learning_rate": 2.5397487437185925e-05, "loss": 0.0, "step": 32625 }, { "epoch": 288.9380530973451, "grad_norm": 0.00021340376406442374, "learning_rate": 2.5388065326633163e-05, "loss": 0.0, "step": 32650 }, { "epoch": 289.1592920353982, "grad_norm": 0.00020235709962435067, "learning_rate": 2.5378643216080397e-05, "loss": 0.0, "step": 32675 }, { "epoch": 289.3805309734513, "grad_norm": 0.00021279042994137853, "learning_rate": 2.5369221105527635e-05, "loss": 0.0, "step": 32700 }, { "epoch": 289.60176991150445, "grad_norm": 0.00020340039918664843, "learning_rate": 2.5359798994974873e-05, "loss": 0.0, "step": 32725 }, { "epoch": 289.82300884955754, "grad_norm": 0.0001966193231055513, "learning_rate": 2.5350376884422108e-05, "loss": 0.0, "step": 32750 }, { "epoch": 290.04424778761063, "grad_norm": 0.00019858093583025038, "learning_rate": 2.5340954773869346e-05, "loss": 0.0, "step": 32775 }, { "epoch": 290.2654867256637, "grad_norm": 0.0002163898607250303, "learning_rate": 2.533153266331658e-05, "loss": 0.0, "step": 32800 }, { "epoch": 290.4867256637168, "grad_norm": 0.00020158984989393502, "learning_rate": 2.5322110552763818e-05, "loss": 0.0, "step": 32825 }, { "epoch": 290.7079646017699, "grad_norm": 0.00021015657694078982, "learning_rate": 2.5312688442211053e-05, "loss": 0.0, "step": 32850 }, { "epoch": 290.929203539823, "grad_norm": 0.00019666150910779834, "learning_rate": 2.530326633165829e-05, "loss": 0.0, "step": 32875 }, { "epoch": 291.1504424778761, "grad_norm": 0.000192782303201966, "learning_rate": 2.5293844221105522e-05, "loss": 0.0, "step": 32900 }, { "epoch": 291.37168141592923, "grad_norm": 0.00019725615857169032, "learning_rate": 2.528442211055276e-05, "loss": 0.0, "step": 32925 }, { "epoch": 291.5929203539823, "grad_norm": 0.000190188831766136, "learning_rate": 2.5275e-05, "loss": 0.0, "step": 32950 }, { "epoch": 291.8141592920354, "grad_norm": 0.0002039953542407602, "learning_rate": 2.5265577889447232e-05, "loss": 0.0, "step": 32975 }, { "epoch": 292.0353982300885, "grad_norm": 0.00018988942611031234, "learning_rate": 2.525615577889447e-05, "loss": 0.0, "step": 33000 }, { "epoch": 292.2566371681416, "grad_norm": 0.00019118443015031517, "learning_rate": 2.5246733668341705e-05, "loss": 0.0, "step": 33025 }, { "epoch": 292.4778761061947, "grad_norm": 0.00018834375077858567, "learning_rate": 2.5237311557788943e-05, "loss": 0.0, "step": 33050 }, { "epoch": 292.6991150442478, "grad_norm": 0.00019888661336153746, "learning_rate": 2.5227889447236177e-05, "loss": 0.0, "step": 33075 }, { "epoch": 292.92035398230087, "grad_norm": 0.00018130955868400633, "learning_rate": 2.5218467336683415e-05, "loss": 0.0, "step": 33100 }, { "epoch": 293.14159292035396, "grad_norm": 0.00016144991968758404, "learning_rate": 2.5209045226130653e-05, "loss": 0.0, "step": 33125 }, { "epoch": 293.3628318584071, "grad_norm": 0.00016827987565193325, "learning_rate": 2.5199623115577888e-05, "loss": 0.0, "step": 33150 }, { "epoch": 293.5840707964602, "grad_norm": 0.00017660936282481998, "learning_rate": 2.5190201005025126e-05, "loss": 0.0, "step": 33175 }, { "epoch": 293.8053097345133, "grad_norm": 0.00017439434304833412, "learning_rate": 2.518077889447236e-05, "loss": 0.0, "step": 33200 }, { "epoch": 294.0265486725664, "grad_norm": 0.00016504972882103175, "learning_rate": 2.5171356783919598e-05, "loss": 0.0, "step": 33225 }, { "epoch": 294.24778761061947, "grad_norm": 0.0001696324470685795, "learning_rate": 2.516193467336683e-05, "loss": 0.0, "step": 33250 }, { "epoch": 294.46902654867256, "grad_norm": 0.00016237534873653203, "learning_rate": 2.5152512562814067e-05, "loss": 0.0, "step": 33275 }, { "epoch": 294.69026548672565, "grad_norm": 0.0001709469361230731, "learning_rate": 2.51430904522613e-05, "loss": 0.0, "step": 33300 }, { "epoch": 294.91150442477874, "grad_norm": 0.00017386504623573273, "learning_rate": 2.513366834170854e-05, "loss": 0.0, "step": 33325 }, { "epoch": 295.13274336283183, "grad_norm": 0.0001454135635867715, "learning_rate": 2.5124246231155777e-05, "loss": 0.0, "step": 33350 }, { "epoch": 295.353982300885, "grad_norm": 0.00015344942221418023, "learning_rate": 2.5114824120603012e-05, "loss": 0.0, "step": 33375 }, { "epoch": 295.57522123893807, "grad_norm": 0.00016118079656735063, "learning_rate": 2.510540201005025e-05, "loss": 0.0, "step": 33400 }, { "epoch": 295.79646017699116, "grad_norm": 0.00016289942141156644, "learning_rate": 2.5095979899497484e-05, "loss": 0.0, "step": 33425 }, { "epoch": 296.01769911504425, "grad_norm": 0.00015486901975236833, "learning_rate": 2.5086557788944722e-05, "loss": 0.0, "step": 33450 }, { "epoch": 296.23893805309734, "grad_norm": 0.00016463024076074362, "learning_rate": 2.5077135678391957e-05, "loss": 0.0, "step": 33475 }, { "epoch": 296.46017699115043, "grad_norm": 0.00014736141019966453, "learning_rate": 2.5067713567839195e-05, "loss": 0.0, "step": 33500 }, { "epoch": 296.6814159292035, "grad_norm": 0.00015301241364795715, "learning_rate": 2.505829145728643e-05, "loss": 0.0, "step": 33525 }, { "epoch": 296.9026548672566, "grad_norm": 0.00017063177074305713, "learning_rate": 2.5048869346733667e-05, "loss": 0.0, "step": 33550 }, { "epoch": 297.12389380530976, "grad_norm": 0.00013506607501767576, "learning_rate": 2.5039447236180905e-05, "loss": 0.0, "step": 33575 }, { "epoch": 297.34513274336285, "grad_norm": 0.00014337620814330876, "learning_rate": 2.5030025125628136e-05, "loss": 0.0, "step": 33600 }, { "epoch": 297.56637168141594, "grad_norm": 0.00013471918646246195, "learning_rate": 2.5020603015075374e-05, "loss": 0.0, "step": 33625 }, { "epoch": 297.78761061946904, "grad_norm": 0.00015170362894423306, "learning_rate": 2.501118090452261e-05, "loss": 0.0, "step": 33650 }, { "epoch": 298.0088495575221, "grad_norm": 0.00013955141184851527, "learning_rate": 2.5001758793969847e-05, "loss": 0.0, "step": 33675 }, { "epoch": 298.2300884955752, "grad_norm": 0.00013374061381909996, "learning_rate": 2.499233668341708e-05, "loss": 0.0, "step": 33700 }, { "epoch": 298.4513274336283, "grad_norm": 0.0001362937327940017, "learning_rate": 2.498291457286432e-05, "loss": 0.0, "step": 33725 }, { "epoch": 298.6725663716814, "grad_norm": 0.00014944247959647328, "learning_rate": 2.4973492462311557e-05, "loss": 0.0, "step": 33750 }, { "epoch": 298.8938053097345, "grad_norm": 0.00014771398855373263, "learning_rate": 2.4964070351758792e-05, "loss": 0.0, "step": 33775 }, { "epoch": 299.11504424778764, "grad_norm": 0.00013289116031955928, "learning_rate": 2.495464824120603e-05, "loss": 0.0, "step": 33800 }, { "epoch": 299.3362831858407, "grad_norm": 0.00014258245937526226, "learning_rate": 2.4945226130653264e-05, "loss": 0.0, "step": 33825 }, { "epoch": 299.5575221238938, "grad_norm": 0.00012723305553663522, "learning_rate": 2.4935804020100502e-05, "loss": 0.0, "step": 33850 }, { "epoch": 299.7787610619469, "grad_norm": 0.00013949388812761754, "learning_rate": 2.4926381909547737e-05, "loss": 0.0, "step": 33875 }, { "epoch": 300.0, "grad_norm": 0.0002589506038930267, "learning_rate": 2.4916959798994975e-05, "loss": 0.0, "step": 33900 }, { "epoch": 300.2212389380531, "grad_norm": 0.00013153610052540898, "learning_rate": 2.4907537688442206e-05, "loss": 0.0, "step": 33925 }, { "epoch": 300.4424778761062, "grad_norm": 0.00013106218830216676, "learning_rate": 2.4898115577889444e-05, "loss": 0.0, "step": 33950 }, { "epoch": 300.6637168141593, "grad_norm": 0.00013496600149665028, "learning_rate": 2.488869346733668e-05, "loss": 0.0, "step": 33975 }, { "epoch": 300.88495575221236, "grad_norm": 0.00012291381426621228, "learning_rate": 2.4879271356783916e-05, "loss": 0.0, "step": 34000 }, { "epoch": 301.1061946902655, "grad_norm": 0.00011542377615114674, "learning_rate": 2.4869849246231154e-05, "loss": 0.0, "step": 34025 }, { "epoch": 301.3274336283186, "grad_norm": 0.00012475413677748293, "learning_rate": 2.486042713567839e-05, "loss": 0.0, "step": 34050 }, { "epoch": 301.5486725663717, "grad_norm": 0.00012458510173019022, "learning_rate": 2.4851005025125627e-05, "loss": 0.0, "step": 34075 }, { "epoch": 301.7699115044248, "grad_norm": 0.000125250851851888, "learning_rate": 2.484158291457286e-05, "loss": 0.0, "step": 34100 }, { "epoch": 301.9911504424779, "grad_norm": 0.00013050103734713048, "learning_rate": 2.48321608040201e-05, "loss": 0.0, "step": 34125 }, { "epoch": 302.21238938053096, "grad_norm": 0.00011849336442537606, "learning_rate": 2.4822738693467334e-05, "loss": 0.0, "step": 34150 }, { "epoch": 302.43362831858406, "grad_norm": 0.00011788833216996863, "learning_rate": 2.481331658291457e-05, "loss": 0.0, "step": 34175 }, { "epoch": 302.65486725663715, "grad_norm": 0.00011757369065890089, "learning_rate": 2.480389447236181e-05, "loss": 0.0, "step": 34200 }, { "epoch": 302.87610619469024, "grad_norm": 0.0001232140784850344, "learning_rate": 2.4794472361809044e-05, "loss": 0.0, "step": 34225 }, { "epoch": 303.0973451327434, "grad_norm": 0.00011463382543297485, "learning_rate": 2.4785050251256282e-05, "loss": 0.0, "step": 34250 }, { "epoch": 303.3185840707965, "grad_norm": 0.00011811843432951719, "learning_rate": 2.4775628140703513e-05, "loss": 0.0, "step": 34275 }, { "epoch": 303.53982300884957, "grad_norm": 0.00011107433965662494, "learning_rate": 2.476620603015075e-05, "loss": 0.0, "step": 34300 }, { "epoch": 303.76106194690266, "grad_norm": 0.00011947059829253703, "learning_rate": 2.4756783919597985e-05, "loss": 0.0, "step": 34325 }, { "epoch": 303.98230088495575, "grad_norm": 0.00012050070654368028, "learning_rate": 2.4747361809045223e-05, "loss": 0.0, "step": 34350 }, { "epoch": 304.20353982300884, "grad_norm": 0.00010232148633804172, "learning_rate": 2.473793969849246e-05, "loss": 0.0, "step": 34375 }, { "epoch": 304.42477876106193, "grad_norm": 0.00010868684330489486, "learning_rate": 2.4728517587939696e-05, "loss": 0.0, "step": 34400 }, { "epoch": 304.646017699115, "grad_norm": 0.00011718307359842584, "learning_rate": 2.4719095477386934e-05, "loss": 0.0, "step": 34425 }, { "epoch": 304.86725663716817, "grad_norm": 0.00011219052248634398, "learning_rate": 2.4709673366834168e-05, "loss": 0.0, "step": 34450 }, { "epoch": 305.08849557522126, "grad_norm": 0.00010092416778206825, "learning_rate": 2.4700251256281406e-05, "loss": 0.0, "step": 34475 }, { "epoch": 305.30973451327435, "grad_norm": 0.0001031780630000867, "learning_rate": 2.469082914572864e-05, "loss": 0.0, "step": 34500 }, { "epoch": 305.53097345132744, "grad_norm": 9.75314833340235e-05, "learning_rate": 2.468140703517588e-05, "loss": 0.0, "step": 34525 }, { "epoch": 305.75221238938053, "grad_norm": 0.0001074173123924993, "learning_rate": 2.4671984924623113e-05, "loss": 0.0, "step": 34550 }, { "epoch": 305.9734513274336, "grad_norm": 9.951498213922605e-05, "learning_rate": 2.466256281407035e-05, "loss": 0.0, "step": 34575 }, { "epoch": 306.1946902654867, "grad_norm": 0.00010108644346473739, "learning_rate": 2.465314070351759e-05, "loss": 0.0, "step": 34600 }, { "epoch": 306.4159292035398, "grad_norm": 9.667595440987498e-05, "learning_rate": 2.464371859296482e-05, "loss": 0.0, "step": 34625 }, { "epoch": 306.6371681415929, "grad_norm": 0.00010112117161042988, "learning_rate": 2.4634296482412058e-05, "loss": 0.0, "step": 34650 }, { "epoch": 306.85840707964604, "grad_norm": 9.726880671223626e-05, "learning_rate": 2.4624874371859293e-05, "loss": 0.0, "step": 34675 }, { "epoch": 307.07964601769913, "grad_norm": 9.235824109055102e-05, "learning_rate": 2.461545226130653e-05, "loss": 0.0, "step": 34700 }, { "epoch": 307.3008849557522, "grad_norm": 9.137902088696137e-05, "learning_rate": 2.4606030150753765e-05, "loss": 0.0, "step": 34725 }, { "epoch": 307.5221238938053, "grad_norm": 9.322053665528074e-05, "learning_rate": 2.4596608040201003e-05, "loss": 0.0, "step": 34750 }, { "epoch": 307.7433628318584, "grad_norm": 9.850750211626291e-05, "learning_rate": 2.4587185929648238e-05, "loss": 0.0, "step": 34775 }, { "epoch": 307.9646017699115, "grad_norm": 9.199245687341318e-05, "learning_rate": 2.4577763819095476e-05, "loss": 0.0, "step": 34800 }, { "epoch": 308.1858407079646, "grad_norm": 9.294808114646003e-05, "learning_rate": 2.4568341708542713e-05, "loss": 0.0, "step": 34825 }, { "epoch": 308.4070796460177, "grad_norm": 9.610825509298593e-05, "learning_rate": 2.4558919597989948e-05, "loss": 0.0, "step": 34850 }, { "epoch": 308.62831858407077, "grad_norm": 9.497781138634309e-05, "learning_rate": 2.4549497487437186e-05, "loss": 0.0, "step": 34875 }, { "epoch": 308.8495575221239, "grad_norm": 9.55557989072986e-05, "learning_rate": 2.454007537688442e-05, "loss": 0.0, "step": 34900 }, { "epoch": 309.070796460177, "grad_norm": 8.548637561034411e-05, "learning_rate": 2.453065326633166e-05, "loss": 0.0, "step": 34925 }, { "epoch": 309.2920353982301, "grad_norm": 8.890897879609838e-05, "learning_rate": 2.452123115577889e-05, "loss": 0.0, "step": 34950 }, { "epoch": 309.5132743362832, "grad_norm": 8.599321154179052e-05, "learning_rate": 2.4511809045226127e-05, "loss": 0.0, "step": 34975 }, { "epoch": 309.7345132743363, "grad_norm": 8.650489326100796e-05, "learning_rate": 2.4502386934673365e-05, "loss": 0.0, "step": 35000 }, { "epoch": 309.7345132743363, "eval_loss": 0.6322592496871948, "eval_runtime": 66.6409, "eval_samples_per_second": 215.828, "eval_steps_per_second": 1.696, "eval_wer": 19.39343494771888, "step": 35000 }, { "epoch": 309.95575221238937, "grad_norm": 8.255845023086295e-05, "learning_rate": 2.44929648241206e-05, "loss": 0.0, "step": 35025 }, { "epoch": 310.17699115044246, "grad_norm": 7.843151252018288e-05, "learning_rate": 2.4483542713567838e-05, "loss": 0.0, "step": 35050 }, { "epoch": 310.39823008849555, "grad_norm": 7.769047078909352e-05, "learning_rate": 2.4474120603015072e-05, "loss": 0.0, "step": 35075 }, { "epoch": 310.6194690265487, "grad_norm": 8.265196083812043e-05, "learning_rate": 2.446469849246231e-05, "loss": 0.0, "step": 35100 }, { "epoch": 310.8407079646018, "grad_norm": 8.427887223660946e-05, "learning_rate": 2.4455276381909545e-05, "loss": 0.0, "step": 35125 }, { "epoch": 311.0619469026549, "grad_norm": 7.639685645699501e-05, "learning_rate": 2.4445854271356783e-05, "loss": 0.0, "step": 35150 }, { "epoch": 311.283185840708, "grad_norm": 8.015005005290732e-05, "learning_rate": 2.4436432160804017e-05, "loss": 0.0, "step": 35175 }, { "epoch": 311.50442477876106, "grad_norm": 8.494073699694127e-05, "learning_rate": 2.4427010050251255e-05, "loss": 0.0, "step": 35200 }, { "epoch": 311.72566371681415, "grad_norm": 8.326030365424231e-05, "learning_rate": 2.4417587939698493e-05, "loss": 0.0, "step": 35225 }, { "epoch": 311.94690265486724, "grad_norm": 8.534012886229903e-05, "learning_rate": 2.4408165829145724e-05, "loss": 0.0, "step": 35250 }, { "epoch": 312.16814159292034, "grad_norm": 7.726414332864806e-05, "learning_rate": 2.4398743718592966e-05, "loss": 0.0, "step": 35275 }, { "epoch": 312.3893805309734, "grad_norm": 7.730643847025931e-05, "learning_rate": 2.4389321608040197e-05, "loss": 0.0, "step": 35300 }, { "epoch": 312.6106194690266, "grad_norm": 7.683919102419168e-05, "learning_rate": 2.4379899497487435e-05, "loss": 0.0, "step": 35325 }, { "epoch": 312.83185840707966, "grad_norm": 7.544992695329711e-05, "learning_rate": 2.437047738693467e-05, "loss": 0.0, "step": 35350 }, { "epoch": 313.05309734513276, "grad_norm": 6.936533463886008e-05, "learning_rate": 2.4361055276381907e-05, "loss": 0.0, "step": 35375 }, { "epoch": 313.27433628318585, "grad_norm": 7.599592208862305e-05, "learning_rate": 2.4351633165829142e-05, "loss": 0.0, "step": 35400 }, { "epoch": 313.49557522123894, "grad_norm": 6.831400241935626e-05, "learning_rate": 2.434221105527638e-05, "loss": 0.0, "step": 35425 }, { "epoch": 313.716814159292, "grad_norm": 7.268662739079446e-05, "learning_rate": 2.4332788944723618e-05, "loss": 0.0, "step": 35450 }, { "epoch": 313.9380530973451, "grad_norm": 7.134452607715502e-05, "learning_rate": 2.4323366834170852e-05, "loss": 0.0, "step": 35475 }, { "epoch": 314.1592920353982, "grad_norm": 6.739003583788872e-05, "learning_rate": 2.431394472361809e-05, "loss": 0.0, "step": 35500 }, { "epoch": 314.3805309734513, "grad_norm": 7.271253707585856e-05, "learning_rate": 2.4304522613065325e-05, "loss": 0.0, "step": 35525 }, { "epoch": 314.60176991150445, "grad_norm": 6.698689685435966e-05, "learning_rate": 2.4295100502512563e-05, "loss": 0.0, "step": 35550 }, { "epoch": 314.82300884955754, "grad_norm": 7.201715197879821e-05, "learning_rate": 2.4285678391959794e-05, "loss": 0.0, "step": 35575 }, { "epoch": 315.04424778761063, "grad_norm": 6.860110443085432e-05, "learning_rate": 2.427625628140703e-05, "loss": 0.0, "step": 35600 }, { "epoch": 315.2654867256637, "grad_norm": 6.977777957217768e-05, "learning_rate": 2.4266834170854273e-05, "loss": 0.0, "step": 35625 }, { "epoch": 315.4867256637168, "grad_norm": 6.530503742396832e-05, "learning_rate": 2.4257412060301504e-05, "loss": 0.0, "step": 35650 }, { "epoch": 315.7079646017699, "grad_norm": 6.66512714815326e-05, "learning_rate": 2.4247989949748742e-05, "loss": 0.0, "step": 35675 }, { "epoch": 315.929203539823, "grad_norm": 6.733799091307446e-05, "learning_rate": 2.4238567839195977e-05, "loss": 0.0, "step": 35700 }, { "epoch": 316.1504424778761, "grad_norm": 6.162056524772197e-05, "learning_rate": 2.4229145728643214e-05, "loss": 0.0, "step": 35725 }, { "epoch": 316.37168141592923, "grad_norm": 6.105389184085652e-05, "learning_rate": 2.421972361809045e-05, "loss": 0.0, "step": 35750 }, { "epoch": 316.5929203539823, "grad_norm": 6.399855192285031e-05, "learning_rate": 2.4210301507537687e-05, "loss": 0.0, "step": 35775 }, { "epoch": 316.8141592920354, "grad_norm": 6.173732253955677e-05, "learning_rate": 2.420087939698492e-05, "loss": 0.0, "step": 35800 }, { "epoch": 317.0353982300885, "grad_norm": 6.284064147621393e-05, "learning_rate": 2.419145728643216e-05, "loss": 0.0, "step": 35825 }, { "epoch": 317.2566371681416, "grad_norm": 5.810756192659028e-05, "learning_rate": 2.4182035175879397e-05, "loss": 0.0, "step": 35850 }, { "epoch": 317.4778761061947, "grad_norm": 5.8661804359871894e-05, "learning_rate": 2.4172613065326632e-05, "loss": 0.0, "step": 35875 }, { "epoch": 317.6991150442478, "grad_norm": 6.348765600705519e-05, "learning_rate": 2.416319095477387e-05, "loss": 0.0, "step": 35900 }, { "epoch": 317.92035398230087, "grad_norm": 6.310314347501844e-05, "learning_rate": 2.41537688442211e-05, "loss": 0.0, "step": 35925 }, { "epoch": 318.14159292035396, "grad_norm": 5.542759390664287e-05, "learning_rate": 2.414434673366834e-05, "loss": 0.0, "step": 35950 }, { "epoch": 318.3628318584071, "grad_norm": 5.932821659371257e-05, "learning_rate": 2.4134924623115573e-05, "loss": 0.0, "step": 35975 }, { "epoch": 318.5840707964602, "grad_norm": 5.832191163790412e-05, "learning_rate": 2.412550251256281e-05, "loss": 0.0, "step": 36000 }, { "epoch": 318.8053097345133, "grad_norm": 5.8000990975415334e-05, "learning_rate": 2.4116080402010046e-05, "loss": 0.0, "step": 36025 }, { "epoch": 319.0265486725664, "grad_norm": 5.461761975311674e-05, "learning_rate": 2.4106658291457284e-05, "loss": 0.0, "step": 36050 }, { "epoch": 319.24778761061947, "grad_norm": 5.5176642490550876e-05, "learning_rate": 2.4097236180904522e-05, "loss": 0.0, "step": 36075 }, { "epoch": 319.46902654867256, "grad_norm": 5.8144003560300916e-05, "learning_rate": 2.4087814070351756e-05, "loss": 0.0, "step": 36100 }, { "epoch": 319.69026548672565, "grad_norm": 5.655804852722213e-05, "learning_rate": 2.4078391959798994e-05, "loss": 0.0, "step": 36125 }, { "epoch": 319.91150442477874, "grad_norm": 5.2058887376915663e-05, "learning_rate": 2.406896984924623e-05, "loss": 0.0, "step": 36150 }, { "epoch": 320.13274336283183, "grad_norm": 5.3344112529885024e-05, "learning_rate": 2.4059547738693467e-05, "loss": 0.0, "step": 36175 }, { "epoch": 320.353982300885, "grad_norm": 5.196534766582772e-05, "learning_rate": 2.40501256281407e-05, "loss": 0.0, "step": 36200 }, { "epoch": 320.57522123893807, "grad_norm": 5.427128780866042e-05, "learning_rate": 2.404070351758794e-05, "loss": 0.0, "step": 36225 }, { "epoch": 320.79646017699116, "grad_norm": 5.414942279458046e-05, "learning_rate": 2.4031281407035177e-05, "loss": 0.0, "step": 36250 }, { "epoch": 321.01769911504425, "grad_norm": 5.2817820687778294e-05, "learning_rate": 2.4021859296482408e-05, "loss": 0.0, "step": 36275 }, { "epoch": 321.23893805309734, "grad_norm": 4.86894314235542e-05, "learning_rate": 2.4012437185929646e-05, "loss": 0.0, "step": 36300 }, { "epoch": 321.46017699115043, "grad_norm": 5.152302765054628e-05, "learning_rate": 2.400301507537688e-05, "loss": 0.0, "step": 36325 }, { "epoch": 321.6814159292035, "grad_norm": 5.1154784159734845e-05, "learning_rate": 2.399359296482412e-05, "loss": 0.0, "step": 36350 }, { "epoch": 321.9026548672566, "grad_norm": 5.079037873656489e-05, "learning_rate": 2.3984170854271353e-05, "loss": 0.0, "step": 36375 }, { "epoch": 322.12389380530976, "grad_norm": 4.732686284114607e-05, "learning_rate": 2.397474874371859e-05, "loss": 0.0, "step": 36400 }, { "epoch": 322.34513274336285, "grad_norm": 4.647308014682494e-05, "learning_rate": 2.3965326633165826e-05, "loss": 0.0, "step": 36425 }, { "epoch": 322.56637168141594, "grad_norm": 4.949702270096168e-05, "learning_rate": 2.3955904522613064e-05, "loss": 0.0, "step": 36450 }, { "epoch": 322.78761061946904, "grad_norm": 5.1394781621638685e-05, "learning_rate": 2.39464824120603e-05, "loss": 0.0, "step": 36475 }, { "epoch": 323.0088495575221, "grad_norm": 4.4213549699634314e-05, "learning_rate": 2.3937060301507536e-05, "loss": 0.0, "step": 36500 }, { "epoch": 323.2300884955752, "grad_norm": 4.6650879085063934e-05, "learning_rate": 2.3927638190954774e-05, "loss": 0.0, "step": 36525 }, { "epoch": 323.4513274336283, "grad_norm": 4.849870674661361e-05, "learning_rate": 2.391821608040201e-05, "loss": 0.0, "step": 36550 }, { "epoch": 323.6725663716814, "grad_norm": 4.651931885746308e-05, "learning_rate": 2.3908793969849246e-05, "loss": 0.0, "step": 36575 }, { "epoch": 323.8938053097345, "grad_norm": 4.738093775813468e-05, "learning_rate": 2.3899371859296478e-05, "loss": 0.0, "step": 36600 }, { "epoch": 324.11504424778764, "grad_norm": 4.3063457269454375e-05, "learning_rate": 2.3889949748743715e-05, "loss": 0.0, "step": 36625 }, { "epoch": 324.3362831858407, "grad_norm": 4.447849642019719e-05, "learning_rate": 2.388052763819095e-05, "loss": 0.0, "step": 36650 }, { "epoch": 324.5575221238938, "grad_norm": 4.50073421234265e-05, "learning_rate": 2.3871105527638188e-05, "loss": 0.0, "step": 36675 }, { "epoch": 324.7787610619469, "grad_norm": 4.615140278474428e-05, "learning_rate": 2.3861683417085426e-05, "loss": 0.0, "step": 36700 }, { "epoch": 325.0, "grad_norm": 6.978277815505862e-05, "learning_rate": 2.385226130653266e-05, "loss": 0.0, "step": 36725 }, { "epoch": 325.2212389380531, "grad_norm": 4.1824150684988126e-05, "learning_rate": 2.38428391959799e-05, "loss": 0.0, "step": 36750 }, { "epoch": 325.4424778761062, "grad_norm": 4.372162584331818e-05, "learning_rate": 2.3833417085427133e-05, "loss": 0.0, "step": 36775 }, { "epoch": 325.6637168141593, "grad_norm": 4.343141699791886e-05, "learning_rate": 2.382399497487437e-05, "loss": 0.0, "step": 36800 }, { "epoch": 325.88495575221236, "grad_norm": 4.233139770803973e-05, "learning_rate": 2.3814572864321605e-05, "loss": 0.0, "step": 36825 }, { "epoch": 326.1061946902655, "grad_norm": 4.108350913156755e-05, "learning_rate": 2.3805150753768843e-05, "loss": 0.0, "step": 36850 }, { "epoch": 326.3274336283186, "grad_norm": 4.257482578395866e-05, "learning_rate": 2.3795728643216078e-05, "loss": 0.0, "step": 36875 }, { "epoch": 326.5486725663717, "grad_norm": 4.439998883754015e-05, "learning_rate": 2.3786306532663316e-05, "loss": 0.0, "step": 36900 }, { "epoch": 326.7699115044248, "grad_norm": 4.2610445234458894e-05, "learning_rate": 2.3776884422110554e-05, "loss": 0.0, "step": 36925 }, { "epoch": 326.9911504424779, "grad_norm": 4.211545819998719e-05, "learning_rate": 2.3767462311557785e-05, "loss": 0.0, "step": 36950 }, { "epoch": 327.21238938053096, "grad_norm": 3.873577588819899e-05, "learning_rate": 2.3758040201005023e-05, "loss": 0.0, "step": 36975 }, { "epoch": 327.43362831858406, "grad_norm": 3.897224451065995e-05, "learning_rate": 2.3748618090452257e-05, "loss": 0.0, "step": 37000 }, { "epoch": 327.65486725663715, "grad_norm": 4.0182356315199286e-05, "learning_rate": 2.3739195979899495e-05, "loss": 0.0, "step": 37025 }, { "epoch": 327.87610619469024, "grad_norm": 3.8497306377394125e-05, "learning_rate": 2.372977386934673e-05, "loss": 0.0, "step": 37050 }, { "epoch": 328.0973451327434, "grad_norm": 3.627425758168101e-05, "learning_rate": 2.3720351758793968e-05, "loss": 0.0, "step": 37075 }, { "epoch": 328.3185840707965, "grad_norm": 3.450288932071999e-05, "learning_rate": 2.3710929648241206e-05, "loss": 0.0, "step": 37100 }, { "epoch": 328.53982300884957, "grad_norm": 3.6770361475646496e-05, "learning_rate": 2.370150753768844e-05, "loss": 0.0, "step": 37125 }, { "epoch": 328.76106194690266, "grad_norm": 3.762533378903754e-05, "learning_rate": 2.3692085427135678e-05, "loss": 0.0, "step": 37150 }, { "epoch": 328.98230088495575, "grad_norm": 3.7896210415055975e-05, "learning_rate": 2.3682663316582913e-05, "loss": 0.0, "step": 37175 }, { "epoch": 329.20353982300884, "grad_norm": 3.909619772457518e-05, "learning_rate": 2.367324120603015e-05, "loss": 0.0, "step": 37200 }, { "epoch": 329.42477876106193, "grad_norm": 3.5217704862589017e-05, "learning_rate": 2.3663819095477385e-05, "loss": 0.0, "step": 37225 }, { "epoch": 329.646017699115, "grad_norm": 3.7104015063960105e-05, "learning_rate": 2.3654396984924623e-05, "loss": 0.0, "step": 37250 }, { "epoch": 329.86725663716817, "grad_norm": 3.393091537873261e-05, "learning_rate": 2.3644974874371854e-05, "loss": 0.0, "step": 37275 }, { "epoch": 330.08849557522126, "grad_norm": 3.4639415389392525e-05, "learning_rate": 2.3635552763819092e-05, "loss": 0.0, "step": 37300 }, { "epoch": 330.30973451327435, "grad_norm": 3.2594667572993785e-05, "learning_rate": 2.362613065326633e-05, "loss": 0.0, "step": 37325 }, { "epoch": 330.53097345132744, "grad_norm": 3.293615372967906e-05, "learning_rate": 2.3616708542713565e-05, "loss": 0.0, "step": 37350 }, { "epoch": 330.75221238938053, "grad_norm": 3.303391349618323e-05, "learning_rate": 2.3607286432160802e-05, "loss": 0.0, "step": 37375 }, { "epoch": 330.9734513274336, "grad_norm": 3.492028918117285e-05, "learning_rate": 2.3597864321608037e-05, "loss": 0.0, "step": 37400 }, { "epoch": 331.1946902654867, "grad_norm": 3.227649722248316e-05, "learning_rate": 2.3588442211055275e-05, "loss": 0.0, "step": 37425 }, { "epoch": 331.4159292035398, "grad_norm": 3.2753781852079555e-05, "learning_rate": 2.357902010050251e-05, "loss": 0.0, "step": 37450 }, { "epoch": 331.6371681415929, "grad_norm": 3.302624463685788e-05, "learning_rate": 2.3569597989949747e-05, "loss": 0.0, "step": 37475 }, { "epoch": 331.85840707964604, "grad_norm": 3.201248910045251e-05, "learning_rate": 2.3560175879396982e-05, "loss": 0.0, "step": 37500 }, { "epoch": 332.07964601769913, "grad_norm": 3.2837640901561826e-05, "learning_rate": 2.355075376884422e-05, "loss": 0.0, "step": 37525 }, { "epoch": 332.3008849557522, "grad_norm": 3.3889951737364754e-05, "learning_rate": 2.3541331658291458e-05, "loss": 0.0, "step": 37550 }, { "epoch": 332.5221238938053, "grad_norm": 3.147457755403593e-05, "learning_rate": 2.3531909547738692e-05, "loss": 0.0, "step": 37575 }, { "epoch": 332.7433628318584, "grad_norm": 3.0769977456657216e-05, "learning_rate": 2.352248743718593e-05, "loss": 0.0, "step": 37600 }, { "epoch": 332.9646017699115, "grad_norm": 3.121445843135007e-05, "learning_rate": 2.351306532663316e-05, "loss": 0.0, "step": 37625 }, { "epoch": 333.1858407079646, "grad_norm": 2.9658029234269634e-05, "learning_rate": 2.35036432160804e-05, "loss": 0.0, "step": 37650 }, { "epoch": 333.4070796460177, "grad_norm": 2.910417242674157e-05, "learning_rate": 2.3494221105527634e-05, "loss": 0.0, "step": 37675 }, { "epoch": 333.62831858407077, "grad_norm": 2.9713217372773215e-05, "learning_rate": 2.3484798994974872e-05, "loss": 0.0, "step": 37700 }, { "epoch": 333.8495575221239, "grad_norm": 3.066580029553734e-05, "learning_rate": 2.347537688442211e-05, "loss": 0.0, "step": 37725 }, { "epoch": 334.070796460177, "grad_norm": 2.8523536457214504e-05, "learning_rate": 2.3465954773869344e-05, "loss": 0.0, "step": 37750 }, { "epoch": 334.2920353982301, "grad_norm": 2.8692042178590782e-05, "learning_rate": 2.3456532663316582e-05, "loss": 0.0, "step": 37775 }, { "epoch": 334.5132743362832, "grad_norm": 2.8238062441232614e-05, "learning_rate": 2.3447110552763817e-05, "loss": 0.0, "step": 37800 }, { "epoch": 334.7345132743363, "grad_norm": 2.9050059310975485e-05, "learning_rate": 2.3437688442211055e-05, "loss": 0.0, "step": 37825 }, { "epoch": 334.95575221238937, "grad_norm": 2.8530263080028817e-05, "learning_rate": 2.342826633165829e-05, "loss": 0.0, "step": 37850 }, { "epoch": 335.17699115044246, "grad_norm": 2.5022874979185872e-05, "learning_rate": 2.3418844221105527e-05, "loss": 0.0, "step": 37875 }, { "epoch": 335.39823008849555, "grad_norm": 2.7435016818344593e-05, "learning_rate": 2.3409422110552758e-05, "loss": 0.0, "step": 37900 }, { "epoch": 335.6194690265487, "grad_norm": 2.6706829885370098e-05, "learning_rate": 2.34e-05, "loss": 0.0, "step": 37925 }, { "epoch": 335.8407079646018, "grad_norm": 2.839654916897416e-05, "learning_rate": 2.3390577889447238e-05, "loss": 0.0, "step": 37950 }, { "epoch": 336.0619469026549, "grad_norm": 2.6685884222388268e-05, "learning_rate": 2.338115577889447e-05, "loss": 0.0, "step": 37975 }, { "epoch": 336.283185840708, "grad_norm": 2.6279616577085108e-05, "learning_rate": 2.3371733668341707e-05, "loss": 0.0, "step": 38000 }, { "epoch": 336.50442477876106, "grad_norm": 2.7127691282657906e-05, "learning_rate": 2.336231155778894e-05, "loss": 0.0, "step": 38025 }, { "epoch": 336.72566371681415, "grad_norm": 2.542462243582122e-05, "learning_rate": 2.335288944723618e-05, "loss": 0.0, "step": 38050 }, { "epoch": 336.94690265486724, "grad_norm": 2.5927920432877727e-05, "learning_rate": 2.3343467336683414e-05, "loss": 0.0, "step": 38075 }, { "epoch": 337.16814159292034, "grad_norm": 2.5630006348364986e-05, "learning_rate": 2.333404522613065e-05, "loss": 0.0, "step": 38100 }, { "epoch": 337.3893805309734, "grad_norm": 2.5149707653326914e-05, "learning_rate": 2.3324623115577886e-05, "loss": 0.0, "step": 38125 }, { "epoch": 337.6106194690266, "grad_norm": 2.3885075279395096e-05, "learning_rate": 2.3315201005025124e-05, "loss": 0.0, "step": 38150 }, { "epoch": 337.83185840707966, "grad_norm": 2.492171734047588e-05, "learning_rate": 2.3305778894472362e-05, "loss": 0.0, "step": 38175 }, { "epoch": 338.05309734513276, "grad_norm": 2.3462005628971383e-05, "learning_rate": 2.3296356783919596e-05, "loss": 0.0, "step": 38200 }, { "epoch": 338.27433628318585, "grad_norm": 2.456297261232976e-05, "learning_rate": 2.3286934673366834e-05, "loss": 0.0, "step": 38225 }, { "epoch": 338.49557522123894, "grad_norm": 2.3422049707733095e-05, "learning_rate": 2.3277512562814066e-05, "loss": 0.0, "step": 38250 }, { "epoch": 338.716814159292, "grad_norm": 2.443935227347538e-05, "learning_rate": 2.3268090452261303e-05, "loss": 0.0, "step": 38275 }, { "epoch": 338.9380530973451, "grad_norm": 2.373207280470524e-05, "learning_rate": 2.3258668341708538e-05, "loss": 0.0, "step": 38300 }, { "epoch": 339.1592920353982, "grad_norm": 2.3990274712559767e-05, "learning_rate": 2.3249246231155776e-05, "loss": 0.0, "step": 38325 }, { "epoch": 339.3805309734513, "grad_norm": 2.2932519641472027e-05, "learning_rate": 2.3239824120603014e-05, "loss": 0.0, "step": 38350 }, { "epoch": 339.60176991150445, "grad_norm": 2.2336151232593693e-05, "learning_rate": 2.323040201005025e-05, "loss": 0.0, "step": 38375 }, { "epoch": 339.82300884955754, "grad_norm": 2.3381522623822093e-05, "learning_rate": 2.3220979899497486e-05, "loss": 0.0, "step": 38400 }, { "epoch": 340.04424778761063, "grad_norm": 2.223625051556155e-05, "learning_rate": 2.321155778894472e-05, "loss": 0.0, "step": 38425 }, { "epoch": 340.2654867256637, "grad_norm": 2.287335337314289e-05, "learning_rate": 2.320213567839196e-05, "loss": 0.0, "step": 38450 }, { "epoch": 340.4867256637168, "grad_norm": 2.2107537006377243e-05, "learning_rate": 2.3192713567839193e-05, "loss": 0.0, "step": 38475 }, { "epoch": 340.7079646017699, "grad_norm": 2.167862476198934e-05, "learning_rate": 2.318329145728643e-05, "loss": 0.0, "step": 38500 }, { "epoch": 340.929203539823, "grad_norm": 2.218607005488593e-05, "learning_rate": 2.3173869346733666e-05, "loss": 0.0, "step": 38525 }, { "epoch": 341.1504424778761, "grad_norm": 2.1691987058147788e-05, "learning_rate": 2.3164447236180904e-05, "loss": 0.0, "step": 38550 }, { "epoch": 341.37168141592923, "grad_norm": 2.1724086764152162e-05, "learning_rate": 2.315502512562814e-05, "loss": 0.0, "step": 38575 }, { "epoch": 341.5929203539823, "grad_norm": 2.0526062144199386e-05, "learning_rate": 2.3145603015075373e-05, "loss": 0.0, "step": 38600 }, { "epoch": 341.8141592920354, "grad_norm": 2.2893653294886462e-05, "learning_rate": 2.313618090452261e-05, "loss": 0.0, "step": 38625 }, { "epoch": 342.0353982300885, "grad_norm": 2.018298619077541e-05, "learning_rate": 2.3126758793969845e-05, "loss": 0.0, "step": 38650 }, { "epoch": 342.2566371681416, "grad_norm": 1.9592207536334172e-05, "learning_rate": 2.3117336683417083e-05, "loss": 0.0, "step": 38675 }, { "epoch": 342.4778761061947, "grad_norm": 2.0602983568096533e-05, "learning_rate": 2.3107914572864318e-05, "loss": 0.0, "step": 38700 }, { "epoch": 342.6991150442478, "grad_norm": 1.9658753444673494e-05, "learning_rate": 2.3098492462311556e-05, "loss": 0.0, "step": 38725 }, { "epoch": 342.92035398230087, "grad_norm": 2.1203186406637542e-05, "learning_rate": 2.308907035175879e-05, "loss": 0.0, "step": 38750 }, { "epoch": 343.14159292035396, "grad_norm": 2.082904575217981e-05, "learning_rate": 2.3079648241206028e-05, "loss": 0.0, "step": 38775 }, { "epoch": 343.3628318584071, "grad_norm": 1.968567266885657e-05, "learning_rate": 2.3070226130653266e-05, "loss": 0.0, "step": 38800 }, { "epoch": 343.5840707964602, "grad_norm": 1.9223803974455222e-05, "learning_rate": 2.30608040201005e-05, "loss": 0.0, "step": 38825 }, { "epoch": 343.8053097345133, "grad_norm": 2.002057954086922e-05, "learning_rate": 2.305138190954774e-05, "loss": 0.0, "step": 38850 }, { "epoch": 344.0265486725664, "grad_norm": 1.8362736227572896e-05, "learning_rate": 2.3041959798994973e-05, "loss": 0.0, "step": 38875 }, { "epoch": 344.24778761061947, "grad_norm": 1.850931948865764e-05, "learning_rate": 2.303253768844221e-05, "loss": 0.0, "step": 38900 }, { "epoch": 344.46902654867256, "grad_norm": 1.8592088963487186e-05, "learning_rate": 2.3023115577889442e-05, "loss": 0.0, "step": 38925 }, { "epoch": 344.69026548672565, "grad_norm": 1.889626400952693e-05, "learning_rate": 2.301369346733668e-05, "loss": 0.0, "step": 38950 }, { "epoch": 344.91150442477874, "grad_norm": 1.8426879250910133e-05, "learning_rate": 2.3004271356783918e-05, "loss": 0.0, "step": 38975 }, { "epoch": 345.13274336283183, "grad_norm": 1.773323856468778e-05, "learning_rate": 2.2994849246231153e-05, "loss": 0.0, "step": 39000 }, { "epoch": 345.353982300885, "grad_norm": 1.6632953702355735e-05, "learning_rate": 2.298542713567839e-05, "loss": 0.0, "step": 39025 }, { "epoch": 345.57522123893807, "grad_norm": 1.7300339095527306e-05, "learning_rate": 2.2976005025125625e-05, "loss": 0.0, "step": 39050 }, { "epoch": 345.79646017699116, "grad_norm": 1.8377482774667442e-05, "learning_rate": 2.2966582914572863e-05, "loss": 0.0, "step": 39075 }, { "epoch": 346.01769911504425, "grad_norm": 1.606610749149695e-05, "learning_rate": 2.2957160804020097e-05, "loss": 0.0, "step": 39100 }, { "epoch": 346.23893805309734, "grad_norm": 1.6662779671605676e-05, "learning_rate": 2.2947738693467335e-05, "loss": 0.0, "step": 39125 }, { "epoch": 346.46017699115043, "grad_norm": 1.6820493328850716e-05, "learning_rate": 2.293831658291457e-05, "loss": 0.0, "step": 39150 }, { "epoch": 346.6814159292035, "grad_norm": 1.759049882821273e-05, "learning_rate": 2.2928894472361808e-05, "loss": 0.0, "step": 39175 }, { "epoch": 346.9026548672566, "grad_norm": 1.6760652215452865e-05, "learning_rate": 2.2919472361809046e-05, "loss": 0.0, "step": 39200 }, { "epoch": 347.12389380530976, "grad_norm": 1.6134867109940387e-05, "learning_rate": 2.291005025125628e-05, "loss": 0.0, "step": 39225 }, { "epoch": 347.34513274336285, "grad_norm": 1.6022366253309883e-05, "learning_rate": 2.2900628140703518e-05, "loss": 0.0, "step": 39250 }, { "epoch": 347.56637168141594, "grad_norm": 1.5738729416625574e-05, "learning_rate": 2.289120603015075e-05, "loss": 0.0, "step": 39275 }, { "epoch": 347.78761061946904, "grad_norm": 1.6201769540202804e-05, "learning_rate": 2.2881783919597987e-05, "loss": 0.0, "step": 39300 }, { "epoch": 348.0088495575221, "grad_norm": 1.5416842870763503e-05, "learning_rate": 2.2872361809045222e-05, "loss": 0.0, "step": 39325 }, { "epoch": 348.2300884955752, "grad_norm": 1.5922621969366446e-05, "learning_rate": 2.286293969849246e-05, "loss": 0.0, "step": 39350 }, { "epoch": 348.4513274336283, "grad_norm": 1.6403042536694556e-05, "learning_rate": 2.2853517587939694e-05, "loss": 0.0, "step": 39375 }, { "epoch": 348.6725663716814, "grad_norm": 1.5326928405556828e-05, "learning_rate": 2.2844095477386932e-05, "loss": 0.0, "step": 39400 }, { "epoch": 348.8938053097345, "grad_norm": 1.672539656283334e-05, "learning_rate": 2.283467336683417e-05, "loss": 0.0, "step": 39425 }, { "epoch": 349.11504424778764, "grad_norm": 1.4742015991942026e-05, "learning_rate": 2.2825251256281405e-05, "loss": 0.0, "step": 39450 }, { "epoch": 349.3362831858407, "grad_norm": 1.5151170373428613e-05, "learning_rate": 2.2815829145728643e-05, "loss": 0.0, "step": 39475 }, { "epoch": 349.5575221238938, "grad_norm": 1.5255665857694112e-05, "learning_rate": 2.2806407035175877e-05, "loss": 0.0, "step": 39500 }, { "epoch": 349.7787610619469, "grad_norm": 1.521185549790971e-05, "learning_rate": 2.2796984924623115e-05, "loss": 0.0, "step": 39525 }, { "epoch": 350.0, "grad_norm": 2.730271080508828e-05, "learning_rate": 2.278756281407035e-05, "loss": 0.0, "step": 39550 }, { "epoch": 350.2212389380531, "grad_norm": 1.5008437912911177e-05, "learning_rate": 2.2778140703517588e-05, "loss": 0.0, "step": 39575 }, { "epoch": 350.4424778761062, "grad_norm": 1.4099452528171241e-05, "learning_rate": 2.2768718592964825e-05, "loss": 0.0, "step": 39600 }, { "epoch": 350.6637168141593, "grad_norm": 1.4171494512993377e-05, "learning_rate": 2.2759296482412057e-05, "loss": 0.0, "step": 39625 }, { "epoch": 350.88495575221236, "grad_norm": 1.4037770597497001e-05, "learning_rate": 2.2749874371859295e-05, "loss": 0.0, "step": 39650 }, { "epoch": 351.1061946902655, "grad_norm": 1.3417326954368036e-05, "learning_rate": 2.274045226130653e-05, "loss": 0.0, "step": 39675 }, { "epoch": 351.3274336283186, "grad_norm": 1.3449342077365145e-05, "learning_rate": 2.2731030150753767e-05, "loss": 0.0, "step": 39700 }, { "epoch": 351.5486725663717, "grad_norm": 1.3649006177729461e-05, "learning_rate": 2.2721608040201e-05, "loss": 0.0, "step": 39725 }, { "epoch": 351.7699115044248, "grad_norm": 1.3327002307050861e-05, "learning_rate": 2.271218592964824e-05, "loss": 0.0, "step": 39750 }, { "epoch": 351.9911504424779, "grad_norm": 1.4666225069959182e-05, "learning_rate": 2.2702763819095474e-05, "loss": 0.0, "step": 39775 }, { "epoch": 352.21238938053096, "grad_norm": 1.2239691386639606e-05, "learning_rate": 2.2693341708542712e-05, "loss": 0.0, "step": 39800 }, { "epoch": 352.43362831858406, "grad_norm": 1.2883128874818794e-05, "learning_rate": 2.268391959798995e-05, "loss": 0.0, "step": 39825 }, { "epoch": 352.65486725663715, "grad_norm": 1.3538296116166748e-05, "learning_rate": 2.2674497487437184e-05, "loss": 0.0, "step": 39850 }, { "epoch": 352.87610619469024, "grad_norm": 1.2635979146580212e-05, "learning_rate": 2.2665075376884422e-05, "loss": 0.0, "step": 39875 }, { "epoch": 353.0973451327434, "grad_norm": 1.18849557111389e-05, "learning_rate": 2.2655653266331657e-05, "loss": 0.0, "step": 39900 }, { "epoch": 353.3185840707965, "grad_norm": 1.2495696864789352e-05, "learning_rate": 2.2646231155778895e-05, "loss": 0.0, "step": 39925 }, { "epoch": 353.53982300884957, "grad_norm": 1.294226422032807e-05, "learning_rate": 2.2636809045226126e-05, "loss": 0.0, "step": 39950 }, { "epoch": 353.76106194690266, "grad_norm": 1.2478597454901319e-05, "learning_rate": 2.2627386934673364e-05, "loss": 0.0, "step": 39975 }, { "epoch": 353.98230088495575, "grad_norm": 1.3348410902835894e-05, "learning_rate": 2.26179648241206e-05, "loss": 0.0, "step": 40000 }, { "epoch": 353.98230088495575, "eval_loss": 0.6857056617736816, "eval_runtime": 65.9691, "eval_samples_per_second": 218.026, "eval_steps_per_second": 1.713, "eval_wer": 19.431583693145363, "step": 40000 }, { "epoch": 354.20353982300884, "grad_norm": 1.2093851182726212e-05, "learning_rate": 2.2608542713567836e-05, "loss": 0.0, "step": 40025 }, { "epoch": 354.42477876106193, "grad_norm": 1.198268546431791e-05, "learning_rate": 2.2599120603015074e-05, "loss": 0.0, "step": 40050 }, { "epoch": 354.646017699115, "grad_norm": 1.2703018910542596e-05, "learning_rate": 2.258969849246231e-05, "loss": 0.0, "step": 40075 }, { "epoch": 354.86725663716817, "grad_norm": 1.1772157449740916e-05, "learning_rate": 2.2580276381909547e-05, "loss": 0.0, "step": 40100 }, { "epoch": 355.08849557522126, "grad_norm": 1.1352371984685306e-05, "learning_rate": 2.257085427135678e-05, "loss": 0.0, "step": 40125 }, { "epoch": 355.30973451327435, "grad_norm": 1.2429938578861766e-05, "learning_rate": 2.256143216080402e-05, "loss": 0.0, "step": 40150 }, { "epoch": 355.53097345132744, "grad_norm": 1.1604310202528723e-05, "learning_rate": 2.2552010050251254e-05, "loss": 0.0, "step": 40175 }, { "epoch": 355.75221238938053, "grad_norm": 1.224351490236586e-05, "learning_rate": 2.254258793969849e-05, "loss": 0.0, "step": 40200 }, { "epoch": 355.9734513274336, "grad_norm": 1.1346145583956968e-05, "learning_rate": 2.253316582914573e-05, "loss": 0.0, "step": 40225 }, { "epoch": 356.1946902654867, "grad_norm": 1.1107085811090656e-05, "learning_rate": 2.2523743718592964e-05, "loss": 0.0, "step": 40250 }, { "epoch": 356.4159292035398, "grad_norm": 1.1077167073381133e-05, "learning_rate": 2.2514321608040202e-05, "loss": 0.0, "step": 40275 }, { "epoch": 356.6371681415929, "grad_norm": 1.089137549570296e-05, "learning_rate": 2.2504899497487433e-05, "loss": 0.0, "step": 40300 }, { "epoch": 356.85840707964604, "grad_norm": 1.1142683433718048e-05, "learning_rate": 2.249547738693467e-05, "loss": 0.0, "step": 40325 }, { "epoch": 357.07964601769913, "grad_norm": 1.0590083547867835e-05, "learning_rate": 2.2486055276381906e-05, "loss": 0.0, "step": 40350 }, { "epoch": 357.3008849557522, "grad_norm": 1.0789841326186433e-05, "learning_rate": 2.2476633165829144e-05, "loss": 0.0, "step": 40375 }, { "epoch": 357.5221238938053, "grad_norm": 1.088298813556321e-05, "learning_rate": 2.2467211055276378e-05, "loss": 0.0, "step": 40400 }, { "epoch": 357.7433628318584, "grad_norm": 1.0766530976979993e-05, "learning_rate": 2.2457788944723616e-05, "loss": 0.0, "step": 40425 }, { "epoch": 357.9646017699115, "grad_norm": 1.0569984624453355e-05, "learning_rate": 2.2448366834170854e-05, "loss": 0.0, "step": 40450 }, { "epoch": 358.1858407079646, "grad_norm": 1.0150124580832198e-05, "learning_rate": 2.243894472361809e-05, "loss": 0.0, "step": 40475 }, { "epoch": 358.4070796460177, "grad_norm": 1.0320564797439147e-05, "learning_rate": 2.2429522613065326e-05, "loss": 0.0, "step": 40500 }, { "epoch": 358.62831858407077, "grad_norm": 1.0634631507855374e-05, "learning_rate": 2.242010050251256e-05, "loss": 0.0, "step": 40525 }, { "epoch": 358.8495575221239, "grad_norm": 9.951399079000112e-06, "learning_rate": 2.24106783919598e-05, "loss": 0.0, "step": 40550 }, { "epoch": 359.070796460177, "grad_norm": 1.0261622264806647e-05, "learning_rate": 2.240125628140703e-05, "loss": 0.0, "step": 40575 }, { "epoch": 359.2920353982301, "grad_norm": 9.681112715043128e-06, "learning_rate": 2.239183417085427e-05, "loss": 0.0, "step": 40600 }, { "epoch": 359.5132743362832, "grad_norm": 1.0559730071690865e-05, "learning_rate": 2.2382412060301503e-05, "loss": 0.0, "step": 40625 }, { "epoch": 359.7345132743363, "grad_norm": 1.0007597666117363e-05, "learning_rate": 2.237298994974874e-05, "loss": 0.0, "step": 40650 }, { "epoch": 359.95575221238937, "grad_norm": 1.0078827472170815e-05, "learning_rate": 2.236356783919598e-05, "loss": 0.0, "step": 40675 }, { "epoch": 360.17699115044246, "grad_norm": 9.668625352787785e-06, "learning_rate": 2.2354145728643213e-05, "loss": 0.0, "step": 40700 }, { "epoch": 360.39823008849555, "grad_norm": 9.435795618628617e-06, "learning_rate": 2.234472361809045e-05, "loss": 0.0, "step": 40725 }, { "epoch": 360.6194690265487, "grad_norm": 9.353101631859317e-06, "learning_rate": 2.2335301507537685e-05, "loss": 0.0, "step": 40750 }, { "epoch": 360.8407079646018, "grad_norm": 9.553785275784321e-06, "learning_rate": 2.2325879396984923e-05, "loss": 0.0, "step": 40775 }, { "epoch": 361.0619469026549, "grad_norm": 9.114929525821935e-06, "learning_rate": 2.2316457286432158e-05, "loss": 0.0, "step": 40800 }, { "epoch": 361.283185840708, "grad_norm": 9.34261242946377e-06, "learning_rate": 2.2307035175879396e-05, "loss": 0.0, "step": 40825 }, { "epoch": 361.50442477876106, "grad_norm": 9.073716682905797e-06, "learning_rate": 2.2297613065326634e-05, "loss": 0.0, "step": 40850 }, { "epoch": 361.72566371681415, "grad_norm": 9.192441211780533e-06, "learning_rate": 2.2288190954773868e-05, "loss": 0.0, "step": 40875 }, { "epoch": 361.94690265486724, "grad_norm": 9.210906682710629e-06, "learning_rate": 2.2278768844221106e-05, "loss": 0.0, "step": 40900 }, { "epoch": 362.16814159292034, "grad_norm": 8.570344107283745e-06, "learning_rate": 2.2269346733668337e-05, "loss": 0.0, "step": 40925 }, { "epoch": 362.3893805309734, "grad_norm": 8.548876394343097e-06, "learning_rate": 2.225992462311558e-05, "loss": 0.0, "step": 40950 }, { "epoch": 362.6106194690266, "grad_norm": 8.80750758369686e-06, "learning_rate": 2.225050251256281e-05, "loss": 0.0, "step": 40975 }, { "epoch": 362.83185840707966, "grad_norm": 8.878200787876267e-06, "learning_rate": 2.2241080402010048e-05, "loss": 0.0, "step": 41000 }, { "epoch": 363.05309734513276, "grad_norm": 8.4573603089666e-06, "learning_rate": 2.2231658291457282e-05, "loss": 0.0, "step": 41025 }, { "epoch": 363.27433628318585, "grad_norm": 8.678251106175594e-06, "learning_rate": 2.222223618090452e-05, "loss": 0.0, "step": 41050 }, { "epoch": 363.49557522123894, "grad_norm": 8.475627510051709e-06, "learning_rate": 2.2212814070351758e-05, "loss": 0.0, "step": 41075 }, { "epoch": 363.716814159292, "grad_norm": 8.792138942226302e-06, "learning_rate": 2.2203391959798993e-05, "loss": 0.0, "step": 41100 }, { "epoch": 363.9380530973451, "grad_norm": 8.444429113296792e-06, "learning_rate": 2.219396984924623e-05, "loss": 0.0, "step": 41125 }, { "epoch": 364.1592920353982, "grad_norm": 7.893530892033596e-06, "learning_rate": 2.2184547738693465e-05, "loss": 0.0, "step": 41150 }, { "epoch": 364.3805309734513, "grad_norm": 8.323923793795984e-06, "learning_rate": 2.2175125628140703e-05, "loss": 0.0, "step": 41175 }, { "epoch": 364.60176991150445, "grad_norm": 8.176364644896239e-06, "learning_rate": 2.2165703517587938e-05, "loss": 0.0, "step": 41200 }, { "epoch": 364.82300884955754, "grad_norm": 8.182892997865565e-06, "learning_rate": 2.2156281407035176e-05, "loss": 0.0, "step": 41225 }, { "epoch": 365.04424778761063, "grad_norm": 7.867778549552895e-06, "learning_rate": 2.2146859296482407e-05, "loss": 0.0, "step": 41250 }, { "epoch": 365.2654867256637, "grad_norm": 7.977713721629698e-06, "learning_rate": 2.2137437185929645e-05, "loss": 0.0, "step": 41275 }, { "epoch": 365.4867256637168, "grad_norm": 8.128724402922671e-06, "learning_rate": 2.2128015075376883e-05, "loss": 0.0, "step": 41300 }, { "epoch": 365.7079646017699, "grad_norm": 8.137687473208643e-06, "learning_rate": 2.2118592964824117e-05, "loss": 0.0, "step": 41325 }, { "epoch": 365.929203539823, "grad_norm": 8.3194172475487e-06, "learning_rate": 2.2109170854271355e-05, "loss": 0.0, "step": 41350 }, { "epoch": 366.1504424778761, "grad_norm": 7.27056840332807e-06, "learning_rate": 2.209974874371859e-05, "loss": 0.0, "step": 41375 }, { "epoch": 366.37168141592923, "grad_norm": 7.338244358834345e-06, "learning_rate": 2.2090326633165827e-05, "loss": 0.0, "step": 41400 }, { "epoch": 366.5929203539823, "grad_norm": 7.962185009091627e-06, "learning_rate": 2.2080904522613062e-05, "loss": 0.0, "step": 41425 }, { "epoch": 366.8141592920354, "grad_norm": 7.490396910725394e-06, "learning_rate": 2.20714824120603e-05, "loss": 0.0, "step": 41450 }, { "epoch": 367.0353982300885, "grad_norm": 7.658780305064283e-06, "learning_rate": 2.2062060301507534e-05, "loss": 0.0, "step": 41475 }, { "epoch": 367.2566371681416, "grad_norm": 7.379717771982541e-06, "learning_rate": 2.2052638190954772e-05, "loss": 0.0, "step": 41500 }, { "epoch": 367.4778761061947, "grad_norm": 7.632736924279016e-06, "learning_rate": 2.204321608040201e-05, "loss": 0.0, "step": 41525 }, { "epoch": 367.6991150442478, "grad_norm": 7.464186182915e-06, "learning_rate": 2.2033793969849245e-05, "loss": 0.0, "step": 41550 }, { "epoch": 367.92035398230087, "grad_norm": 7.182838544395054e-06, "learning_rate": 2.2024371859296483e-05, "loss": 0.0, "step": 41575 }, { "epoch": 368.14159292035396, "grad_norm": 7.195204489107709e-06, "learning_rate": 2.2014949748743714e-05, "loss": 0.0, "step": 41600 }, { "epoch": 368.3628318584071, "grad_norm": 6.793235570512479e-06, "learning_rate": 2.2005527638190952e-05, "loss": 0.0, "step": 41625 }, { "epoch": 368.5840707964602, "grad_norm": 7.366745194303803e-06, "learning_rate": 2.1996105527638186e-05, "loss": 0.0, "step": 41650 }, { "epoch": 368.8053097345133, "grad_norm": 7.446371455444023e-06, "learning_rate": 2.1986683417085424e-05, "loss": 0.0, "step": 41675 }, { "epoch": 369.0265486725664, "grad_norm": 6.776625923521351e-06, "learning_rate": 2.1977261306532662e-05, "loss": 0.0, "step": 41700 }, { "epoch": 369.24778761061947, "grad_norm": 7.105489657988073e-06, "learning_rate": 2.1967839195979897e-05, "loss": 0.0, "step": 41725 }, { "epoch": 369.46902654867256, "grad_norm": 6.8291860770841595e-06, "learning_rate": 2.1958417085427135e-05, "loss": 0.0, "step": 41750 }, { "epoch": 369.69026548672565, "grad_norm": 6.98080566507997e-06, "learning_rate": 2.194899497487437e-05, "loss": 0.0, "step": 41775 }, { "epoch": 369.91150442477874, "grad_norm": 6.804919394198805e-06, "learning_rate": 2.1939572864321607e-05, "loss": 0.0, "step": 41800 }, { "epoch": 370.13274336283183, "grad_norm": 6.473972007370321e-06, "learning_rate": 2.1930150753768842e-05, "loss": 0.0, "step": 41825 }, { "epoch": 370.353982300885, "grad_norm": 6.7558767113951035e-06, "learning_rate": 2.192072864321608e-05, "loss": 0.0, "step": 41850 }, { "epoch": 370.57522123893807, "grad_norm": 6.568179742316715e-06, "learning_rate": 2.1911306532663314e-05, "loss": 0.0, "step": 41875 }, { "epoch": 370.79646017699116, "grad_norm": 6.764080808352446e-06, "learning_rate": 2.1901884422110552e-05, "loss": 0.0, "step": 41900 }, { "epoch": 371.01769911504425, "grad_norm": 6.3747265812708065e-06, "learning_rate": 2.189246231155779e-05, "loss": 0.0, "step": 41925 }, { "epoch": 371.23893805309734, "grad_norm": 6.339946139632957e-06, "learning_rate": 2.188304020100502e-05, "loss": 0.0, "step": 41950 }, { "epoch": 371.46017699115043, "grad_norm": 6.556510470545618e-06, "learning_rate": 2.187361809045226e-05, "loss": 0.0, "step": 41975 }, { "epoch": 371.6814159292035, "grad_norm": 6.371596555254655e-06, "learning_rate": 2.1864195979899494e-05, "loss": 0.0, "step": 42000 }, { "epoch": 371.9026548672566, "grad_norm": 6.344841949612601e-06, "learning_rate": 2.185477386934673e-05, "loss": 0.0, "step": 42025 }, { "epoch": 372.12389380530976, "grad_norm": 6.093975571275223e-06, "learning_rate": 2.1845351758793966e-05, "loss": 0.0, "step": 42050 }, { "epoch": 372.34513274336285, "grad_norm": 6.380771992553491e-06, "learning_rate": 2.1835929648241204e-05, "loss": 0.0, "step": 42075 }, { "epoch": 372.56637168141594, "grad_norm": 6.322971330519067e-06, "learning_rate": 2.182650753768844e-05, "loss": 0.0, "step": 42100 }, { "epoch": 372.78761061946904, "grad_norm": 6.174647751322482e-06, "learning_rate": 2.1817085427135677e-05, "loss": 0.0, "step": 42125 }, { "epoch": 373.0088495575221, "grad_norm": 6.022155957907671e-06, "learning_rate": 2.1807663316582914e-05, "loss": 0.0, "step": 42150 }, { "epoch": 373.2300884955752, "grad_norm": 5.9282538131810725e-06, "learning_rate": 2.179824120603015e-05, "loss": 0.0, "step": 42175 }, { "epoch": 373.4513274336283, "grad_norm": 6.242733434191905e-06, "learning_rate": 2.1788819095477387e-05, "loss": 0.0, "step": 42200 }, { "epoch": 373.6725663716814, "grad_norm": 6.183843197504757e-06, "learning_rate": 2.177939698492462e-05, "loss": 0.0, "step": 42225 }, { "epoch": 373.8938053097345, "grad_norm": 6.0375386965461075e-06, "learning_rate": 2.176997487437186e-05, "loss": 0.0, "step": 42250 }, { "epoch": 374.11504424778764, "grad_norm": 5.5657633311057e-06, "learning_rate": 2.176055276381909e-05, "loss": 0.0, "step": 42275 }, { "epoch": 374.3362831858407, "grad_norm": 5.630624400509987e-06, "learning_rate": 2.175113065326633e-05, "loss": 0.0, "step": 42300 }, { "epoch": 374.5575221238938, "grad_norm": 5.611534561467124e-06, "learning_rate": 2.1741708542713566e-05, "loss": 0.0, "step": 42325 }, { "epoch": 374.7787610619469, "grad_norm": 6.1535006352642085e-06, "learning_rate": 2.17322864321608e-05, "loss": 0.0, "step": 42350 }, { "epoch": 375.0, "grad_norm": 1.0398673111922108e-05, "learning_rate": 2.172286432160804e-05, "loss": 0.0, "step": 42375 }, { "epoch": 375.2212389380531, "grad_norm": 5.627638984151417e-06, "learning_rate": 2.1713442211055273e-05, "loss": 0.0, "step": 42400 }, { "epoch": 375.4424778761062, "grad_norm": 5.6930493883555755e-06, "learning_rate": 2.170402010050251e-05, "loss": 0.0, "step": 42425 }, { "epoch": 375.6637168141593, "grad_norm": 5.6218946156150196e-06, "learning_rate": 2.1694597989949746e-05, "loss": 0.0, "step": 42450 }, { "epoch": 375.88495575221236, "grad_norm": 5.865671937499428e-06, "learning_rate": 2.1685175879396984e-05, "loss": 0.0, "step": 42475 }, { "epoch": 376.1061946902655, "grad_norm": 5.619004696200136e-06, "learning_rate": 2.167575376884422e-05, "loss": 0.0, "step": 42500 }, { "epoch": 376.3274336283186, "grad_norm": 5.517078534467146e-06, "learning_rate": 2.1666331658291456e-05, "loss": 0.0, "step": 42525 }, { "epoch": 376.5486725663717, "grad_norm": 5.478769708133768e-06, "learning_rate": 2.1656909547738694e-05, "loss": 0.0, "step": 42550 }, { "epoch": 376.7699115044248, "grad_norm": 5.304046680976171e-06, "learning_rate": 2.164748743718593e-05, "loss": 0.0, "step": 42575 }, { "epoch": 376.9911504424779, "grad_norm": 5.847313786944142e-06, "learning_rate": 2.1638065326633167e-05, "loss": 0.0, "step": 42600 }, { "epoch": 377.21238938053096, "grad_norm": 5.2688824325741734e-06, "learning_rate": 2.1628643216080398e-05, "loss": 0.0, "step": 42625 }, { "epoch": 377.43362831858406, "grad_norm": 5.27960719409748e-06, "learning_rate": 2.1619221105527636e-05, "loss": 0.0, "step": 42650 }, { "epoch": 377.65486725663715, "grad_norm": 5.501143277797382e-06, "learning_rate": 2.160979899497487e-05, "loss": 0.0, "step": 42675 }, { "epoch": 377.87610619469024, "grad_norm": 5.447770945465891e-06, "learning_rate": 2.1600376884422108e-05, "loss": 0.0, "step": 42700 }, { "epoch": 378.0973451327434, "grad_norm": 5.086575583845843e-06, "learning_rate": 2.1590954773869343e-05, "loss": 0.0, "step": 42725 }, { "epoch": 378.3185840707965, "grad_norm": 5.349172170099337e-06, "learning_rate": 2.158153266331658e-05, "loss": 0.0, "step": 42750 }, { "epoch": 378.53982300884957, "grad_norm": 5.16194268129766e-06, "learning_rate": 2.157211055276382e-05, "loss": 0.0, "step": 42775 }, { "epoch": 378.76106194690266, "grad_norm": 4.979154709872091e-06, "learning_rate": 2.1562688442211053e-05, "loss": 0.0, "step": 42800 }, { "epoch": 378.98230088495575, "grad_norm": 5.340259576769313e-06, "learning_rate": 2.155326633165829e-05, "loss": 0.0, "step": 42825 }, { "epoch": 379.20353982300884, "grad_norm": 4.831492333323695e-06, "learning_rate": 2.1543844221105526e-05, "loss": 0.0, "step": 42850 }, { "epoch": 379.42477876106193, "grad_norm": 4.80363451060839e-06, "learning_rate": 2.1534422110552764e-05, "loss": 0.0, "step": 42875 }, { "epoch": 379.646017699115, "grad_norm": 4.987770353181986e-06, "learning_rate": 2.1524999999999998e-05, "loss": 0.0, "step": 42900 }, { "epoch": 379.86725663716817, "grad_norm": 5.172420060262084e-06, "learning_rate": 2.1515577889447236e-05, "loss": 0.0, "step": 42925 }, { "epoch": 380.08849557522126, "grad_norm": 4.943723979522474e-06, "learning_rate": 2.1506155778894474e-05, "loss": 0.0, "step": 42950 }, { "epoch": 380.30973451327435, "grad_norm": 5.088031684863381e-06, "learning_rate": 2.1496733668341705e-05, "loss": 0.0, "step": 42975 }, { "epoch": 380.53097345132744, "grad_norm": 4.945995442540152e-06, "learning_rate": 2.1487311557788943e-05, "loss": 0.0, "step": 43000 }, { "epoch": 380.75221238938053, "grad_norm": 5.13183022121666e-06, "learning_rate": 2.1477889447236178e-05, "loss": 0.0, "step": 43025 }, { "epoch": 380.9734513274336, "grad_norm": 5.019007858209079e-06, "learning_rate": 2.1468467336683415e-05, "loss": 0.0, "step": 43050 }, { "epoch": 381.1946902654867, "grad_norm": 4.685995008912869e-06, "learning_rate": 2.145904522613065e-05, "loss": 0.0, "step": 43075 }, { "epoch": 381.4159292035398, "grad_norm": 4.931760031468002e-06, "learning_rate": 2.1449623115577888e-05, "loss": 0.0, "step": 43100 }, { "epoch": 381.6371681415929, "grad_norm": 4.94511732540559e-06, "learning_rate": 2.1440201005025122e-05, "loss": 0.0, "step": 43125 }, { "epoch": 381.85840707964604, "grad_norm": 4.9641894293017685e-06, "learning_rate": 2.143077889447236e-05, "loss": 0.0, "step": 43150 }, { "epoch": 382.07964601769913, "grad_norm": 4.620179879566422e-06, "learning_rate": 2.1421356783919598e-05, "loss": 0.0, "step": 43175 }, { "epoch": 382.3008849557522, "grad_norm": 4.6091822696325835e-06, "learning_rate": 2.1411934673366833e-05, "loss": 0.0, "step": 43200 }, { "epoch": 382.5221238938053, "grad_norm": 4.8915690058493055e-06, "learning_rate": 2.140251256281407e-05, "loss": 0.0, "step": 43225 }, { "epoch": 382.7433628318584, "grad_norm": 4.582039309752872e-06, "learning_rate": 2.1393090452261305e-05, "loss": 0.0, "step": 43250 }, { "epoch": 382.9646017699115, "grad_norm": 4.609927600540686e-06, "learning_rate": 2.1383668341708543e-05, "loss": 0.0, "step": 43275 }, { "epoch": 383.1858407079646, "grad_norm": 4.731911758426577e-06, "learning_rate": 2.1374246231155774e-05, "loss": 0.0, "step": 43300 }, { "epoch": 383.4070796460177, "grad_norm": 4.715760042017791e-06, "learning_rate": 2.1364824120603012e-05, "loss": 0.0, "step": 43325 }, { "epoch": 383.62831858407077, "grad_norm": 4.682397047872655e-06, "learning_rate": 2.1355402010050247e-05, "loss": 0.0, "step": 43350 }, { "epoch": 383.8495575221239, "grad_norm": 4.710725534096127e-06, "learning_rate": 2.1345979899497485e-05, "loss": 0.0, "step": 43375 }, { "epoch": 384.070796460177, "grad_norm": 4.361379524198128e-06, "learning_rate": 2.1336557788944723e-05, "loss": 0.0, "step": 43400 }, { "epoch": 384.2920353982301, "grad_norm": 4.351585630502086e-06, "learning_rate": 2.1327135678391957e-05, "loss": 0.0, "step": 43425 }, { "epoch": 384.5132743362832, "grad_norm": 4.552513473754516e-06, "learning_rate": 2.1317713567839195e-05, "loss": 0.0, "step": 43450 }, { "epoch": 384.7345132743363, "grad_norm": 4.465673555387184e-06, "learning_rate": 2.130829145728643e-05, "loss": 0.0, "step": 43475 }, { "epoch": 384.95575221238937, "grad_norm": 4.737195467896527e-06, "learning_rate": 2.1298869346733668e-05, "loss": 0.0, "step": 43500 }, { "epoch": 385.17699115044246, "grad_norm": 4.225391421641689e-06, "learning_rate": 2.1289447236180902e-05, "loss": 0.0, "step": 43525 }, { "epoch": 385.39823008849555, "grad_norm": 4.649395123124123e-06, "learning_rate": 2.128002512562814e-05, "loss": 0.0, "step": 43550 }, { "epoch": 385.6194690265487, "grad_norm": 4.510551661951467e-06, "learning_rate": 2.1270603015075378e-05, "loss": 0.0, "step": 43575 }, { "epoch": 385.8407079646018, "grad_norm": 4.397004886413924e-06, "learning_rate": 2.126118090452261e-05, "loss": 0.0, "step": 43600 }, { "epoch": 386.0619469026549, "grad_norm": 4.172797616774915e-06, "learning_rate": 2.125175879396985e-05, "loss": 0.0, "step": 43625 }, { "epoch": 386.283185840708, "grad_norm": 4.366389930510195e-06, "learning_rate": 2.124233668341708e-05, "loss": 0.0, "step": 43650 }, { "epoch": 386.50442477876106, "grad_norm": 4.27088389187702e-06, "learning_rate": 2.123291457286432e-05, "loss": 0.0, "step": 43675 }, { "epoch": 386.72566371681415, "grad_norm": 4.297149644116871e-06, "learning_rate": 2.1223492462311554e-05, "loss": 0.0, "step": 43700 }, { "epoch": 386.94690265486724, "grad_norm": 4.253152837918606e-06, "learning_rate": 2.1214070351758792e-05, "loss": 0.0, "step": 43725 }, { "epoch": 387.16814159292034, "grad_norm": 4.1697376218508e-06, "learning_rate": 2.1204648241206027e-05, "loss": 0.0, "step": 43750 }, { "epoch": 387.3893805309734, "grad_norm": 4.08541245633387e-06, "learning_rate": 2.1195226130653265e-05, "loss": 0.0, "step": 43775 }, { "epoch": 387.6106194690266, "grad_norm": 4.2793440115929116e-06, "learning_rate": 2.1185804020100502e-05, "loss": 0.0, "step": 43800 }, { "epoch": 387.83185840707966, "grad_norm": 4.246413482178468e-06, "learning_rate": 2.1176381909547737e-05, "loss": 0.0, "step": 43825 }, { "epoch": 388.05309734513276, "grad_norm": 3.926607405446703e-06, "learning_rate": 2.1166959798994975e-05, "loss": 0.0, "step": 43850 }, { "epoch": 388.27433628318585, "grad_norm": 3.953588020522147e-06, "learning_rate": 2.115753768844221e-05, "loss": 0.0, "step": 43875 }, { "epoch": 388.49557522123894, "grad_norm": 4.005443770438433e-06, "learning_rate": 2.1148115577889447e-05, "loss": 0.0, "step": 43900 }, { "epoch": 388.716814159292, "grad_norm": 4.178363269602414e-06, "learning_rate": 2.113869346733668e-05, "loss": 0.0, "step": 43925 }, { "epoch": 388.9380530973451, "grad_norm": 4.076013283338398e-06, "learning_rate": 2.1129271356783916e-05, "loss": 0.0, "step": 43950 }, { "epoch": 389.1592920353982, "grad_norm": 3.956217824452324e-06, "learning_rate": 2.111984924623115e-05, "loss": 0.0, "step": 43975 }, { "epoch": 389.3805309734513, "grad_norm": 4.115338015253656e-06, "learning_rate": 2.111042713567839e-05, "loss": 0.0, "step": 44000 }, { "epoch": 389.60176991150445, "grad_norm": 4.0892355173127726e-06, "learning_rate": 2.1101005025125627e-05, "loss": 0.0, "step": 44025 }, { "epoch": 389.82300884955754, "grad_norm": 4.052439635415794e-06, "learning_rate": 2.109158291457286e-05, "loss": 0.0, "step": 44050 }, { "epoch": 390.04424778761063, "grad_norm": 3.772972149818088e-06, "learning_rate": 2.10821608040201e-05, "loss": 0.0, "step": 44075 }, { "epoch": 390.2654867256637, "grad_norm": 4.031032858620165e-06, "learning_rate": 2.1072738693467334e-05, "loss": 0.0, "step": 44100 }, { "epoch": 390.4867256637168, "grad_norm": 3.997376552433707e-06, "learning_rate": 2.1063316582914572e-05, "loss": 0.0, "step": 44125 }, { "epoch": 390.7079646017699, "grad_norm": 3.919841219612863e-06, "learning_rate": 2.1053894472361806e-05, "loss": 0.0, "step": 44150 }, { "epoch": 390.929203539823, "grad_norm": 3.934241249226034e-06, "learning_rate": 2.1044472361809044e-05, "loss": 0.0, "step": 44175 }, { "epoch": 391.1504424778761, "grad_norm": 3.6503092815109994e-06, "learning_rate": 2.1035050251256282e-05, "loss": 0.0, "step": 44200 }, { "epoch": 391.37168141592923, "grad_norm": 3.957887201977428e-06, "learning_rate": 2.1025628140703517e-05, "loss": 0.0, "step": 44225 }, { "epoch": 391.5929203539823, "grad_norm": 3.96182804252021e-06, "learning_rate": 2.1016206030150755e-05, "loss": 0.0, "step": 44250 }, { "epoch": 391.8141592920354, "grad_norm": 3.896494945365703e-06, "learning_rate": 2.1006783919597986e-05, "loss": 0.0, "step": 44275 }, { "epoch": 392.0353982300885, "grad_norm": 3.6471078601607587e-06, "learning_rate": 2.0997361809045224e-05, "loss": 0.0, "step": 44300 }, { "epoch": 392.2566371681416, "grad_norm": 3.6104206628806423e-06, "learning_rate": 2.0987939698492458e-05, "loss": 0.0, "step": 44325 }, { "epoch": 392.4778761061947, "grad_norm": 3.747919208763051e-06, "learning_rate": 2.0978517587939696e-05, "loss": 0.0, "step": 44350 }, { "epoch": 392.6991150442478, "grad_norm": 3.988660864706617e-06, "learning_rate": 2.096909547738693e-05, "loss": 0.0, "step": 44375 }, { "epoch": 392.92035398230087, "grad_norm": 3.7486029214051086e-06, "learning_rate": 2.095967336683417e-05, "loss": 0.0, "step": 44400 }, { "epoch": 393.14159292035396, "grad_norm": 3.5626787848741515e-06, "learning_rate": 2.0950251256281407e-05, "loss": 0.0, "step": 44425 }, { "epoch": 393.3628318584071, "grad_norm": 3.7923480249446584e-06, "learning_rate": 2.094082914572864e-05, "loss": 0.0, "step": 44450 }, { "epoch": 393.5840707964602, "grad_norm": 3.6812375583394896e-06, "learning_rate": 2.093140703517588e-05, "loss": 0.0, "step": 44475 }, { "epoch": 393.8053097345133, "grad_norm": 3.6996293601987418e-06, "learning_rate": 2.0921984924623114e-05, "loss": 0.0, "step": 44500 }, { "epoch": 394.0265486725664, "grad_norm": 3.4894146665465087e-06, "learning_rate": 2.091256281407035e-05, "loss": 0.0, "step": 44525 }, { "epoch": 394.24778761061947, "grad_norm": 3.533129302013549e-06, "learning_rate": 2.0903140703517586e-05, "loss": 0.0, "step": 44550 }, { "epoch": 394.46902654867256, "grad_norm": 3.665757276394288e-06, "learning_rate": 2.0893718592964824e-05, "loss": 0.0, "step": 44575 }, { "epoch": 394.69026548672565, "grad_norm": 3.6431567878025817e-06, "learning_rate": 2.0884296482412055e-05, "loss": 0.0, "step": 44600 }, { "epoch": 394.91150442477874, "grad_norm": 3.835895768133923e-06, "learning_rate": 2.0874874371859293e-05, "loss": 0.0, "step": 44625 }, { "epoch": 395.13274336283183, "grad_norm": 3.4665019938984187e-06, "learning_rate": 2.086545226130653e-05, "loss": 0.0, "step": 44650 }, { "epoch": 395.353982300885, "grad_norm": 3.601419621190871e-06, "learning_rate": 2.0856030150753765e-05, "loss": 0.0, "step": 44675 }, { "epoch": 395.57522123893807, "grad_norm": 3.6155461202724837e-06, "learning_rate": 2.0846608040201003e-05, "loss": 0.0, "step": 44700 }, { "epoch": 395.79646017699116, "grad_norm": 3.4598422189446865e-06, "learning_rate": 2.0837185929648238e-05, "loss": 0.0, "step": 44725 }, { "epoch": 396.01769911504425, "grad_norm": 3.3549274576216703e-06, "learning_rate": 2.0827763819095476e-05, "loss": 0.0, "step": 44750 }, { "epoch": 396.23893805309734, "grad_norm": 3.518303401506273e-06, "learning_rate": 2.081834170854271e-05, "loss": 0.0, "step": 44775 }, { "epoch": 396.46017699115043, "grad_norm": 3.6285175610828446e-06, "learning_rate": 2.080891959798995e-05, "loss": 0.0, "step": 44800 }, { "epoch": 396.6814159292035, "grad_norm": 3.4613253774296027e-06, "learning_rate": 2.0799497487437186e-05, "loss": 0.0, "step": 44825 }, { "epoch": 396.9026548672566, "grad_norm": 3.7476902434718795e-06, "learning_rate": 2.079007537688442e-05, "loss": 0.0, "step": 44850 }, { "epoch": 397.12389380530976, "grad_norm": 3.423253474466037e-06, "learning_rate": 2.078065326633166e-05, "loss": 0.0, "step": 44875 }, { "epoch": 397.34513274336285, "grad_norm": 3.3888222787936684e-06, "learning_rate": 2.0771231155778893e-05, "loss": 0.0, "step": 44900 }, { "epoch": 397.56637168141594, "grad_norm": 3.424486749281641e-06, "learning_rate": 2.076180904522613e-05, "loss": 0.0, "step": 44925 }, { "epoch": 397.78761061946904, "grad_norm": 3.770234798139427e-06, "learning_rate": 2.0752386934673362e-05, "loss": 0.0, "step": 44950 }, { "epoch": 398.0088495575221, "grad_norm": 3.298993988209986e-06, "learning_rate": 2.07429648241206e-05, "loss": 0.0, "step": 44975 }, { "epoch": 398.2300884955752, "grad_norm": 3.589509560697479e-06, "learning_rate": 2.0733542713567835e-05, "loss": 0.0, "step": 45000 }, { "epoch": 398.2300884955752, "eval_loss": 0.7216207981109619, "eval_runtime": 66.5289, "eval_samples_per_second": 216.192, "eval_steps_per_second": 1.699, "eval_wer": 19.650938979347657, "step": 45000 }, { "epoch": 398.4513274336283, "grad_norm": 3.4010197396128206e-06, "learning_rate": 2.0724120603015073e-05, "loss": 0.0, "step": 45025 }, { "epoch": 398.6725663716814, "grad_norm": 3.478217195151956e-06, "learning_rate": 2.071469849246231e-05, "loss": 0.0, "step": 45050 }, { "epoch": 398.8938053097345, "grad_norm": 3.51021708411281e-06, "learning_rate": 2.0705276381909545e-05, "loss": 0.0, "step": 45075 }, { "epoch": 399.11504424778764, "grad_norm": 3.449275482125813e-06, "learning_rate": 2.0695854271356783e-05, "loss": 0.0, "step": 45100 }, { "epoch": 399.3362831858407, "grad_norm": 3.456630565779051e-06, "learning_rate": 2.0686432160804018e-05, "loss": 0.0, "step": 45125 }, { "epoch": 399.5575221238938, "grad_norm": 3.533165227054269e-06, "learning_rate": 2.0677010050251256e-05, "loss": 0.0, "step": 45150 }, { "epoch": 399.7787610619469, "grad_norm": 3.5161465348210186e-06, "learning_rate": 2.066758793969849e-05, "loss": 0.0, "step": 45175 }, { "epoch": 400.0, "grad_norm": 5.406152922660112e-06, "learning_rate": 2.0658165829145728e-05, "loss": 0.0, "step": 45200 }, { "epoch": 400.2212389380531, "grad_norm": 3.4149356906709727e-06, "learning_rate": 2.0648743718592963e-05, "loss": 0.0, "step": 45225 }, { "epoch": 400.4424778761062, "grad_norm": 3.3792243812058587e-06, "learning_rate": 2.06393216080402e-05, "loss": 0.0, "step": 45250 }, { "epoch": 400.6637168141593, "grad_norm": 3.351225132064428e-06, "learning_rate": 2.062989949748744e-05, "loss": 0.0, "step": 45275 }, { "epoch": 400.88495575221236, "grad_norm": 3.5591772302723257e-06, "learning_rate": 2.062047738693467e-05, "loss": 0.0, "step": 45300 }, { "epoch": 401.1061946902655, "grad_norm": 3.3068838547478663e-06, "learning_rate": 2.0611055276381908e-05, "loss": 0.0, "step": 45325 }, { "epoch": 401.3274336283186, "grad_norm": 3.5122370718454476e-06, "learning_rate": 2.0601633165829142e-05, "loss": 0.0, "step": 45350 }, { "epoch": 401.5486725663717, "grad_norm": 3.4444587981852237e-06, "learning_rate": 2.059221105527638e-05, "loss": 0.0, "step": 45375 }, { "epoch": 401.7699115044248, "grad_norm": 3.5380808185436763e-06, "learning_rate": 2.0582788944723615e-05, "loss": 0.0, "step": 45400 }, { "epoch": 401.9911504424779, "grad_norm": 3.455360456428025e-06, "learning_rate": 2.0573366834170852e-05, "loss": 0.0, "step": 45425 }, { "epoch": 402.21238938053096, "grad_norm": 3.2504053706361447e-06, "learning_rate": 2.056394472361809e-05, "loss": 0.0, "step": 45450 }, { "epoch": 402.43362831858406, "grad_norm": 3.2005780212784884e-06, "learning_rate": 2.0554522613065325e-05, "loss": 0.0, "step": 45475 }, { "epoch": 402.65486725663715, "grad_norm": 3.545672825566726e-06, "learning_rate": 2.0545100502512563e-05, "loss": 0.0, "step": 45500 }, { "epoch": 402.87610619469024, "grad_norm": 3.5241548630438047e-06, "learning_rate": 2.0535678391959797e-05, "loss": 0.0, "step": 45525 }, { "epoch": 403.0973451327434, "grad_norm": 3.229779849789338e-06, "learning_rate": 2.0526256281407035e-05, "loss": 0.0, "step": 45550 }, { "epoch": 403.3185840707965, "grad_norm": 3.3634530609560898e-06, "learning_rate": 2.051683417085427e-05, "loss": 0.0, "step": 45575 }, { "epoch": 403.53982300884957, "grad_norm": 3.3253147648792947e-06, "learning_rate": 2.0507412060301508e-05, "loss": 0.0, "step": 45600 }, { "epoch": 403.76106194690266, "grad_norm": 3.161938820994692e-06, "learning_rate": 2.049798994974874e-05, "loss": 0.0, "step": 45625 }, { "epoch": 403.98230088495575, "grad_norm": 3.285730144853005e-06, "learning_rate": 2.0488567839195977e-05, "loss": 0.0, "step": 45650 }, { "epoch": 404.20353982300884, "grad_norm": 3.1653305541112786e-06, "learning_rate": 2.0479145728643215e-05, "loss": 0.0, "step": 45675 }, { "epoch": 404.42477876106193, "grad_norm": 3.3149808587040752e-06, "learning_rate": 2.046972361809045e-05, "loss": 0.0, "step": 45700 }, { "epoch": 404.646017699115, "grad_norm": 3.448523784754798e-06, "learning_rate": 2.0460301507537687e-05, "loss": 0.0, "step": 45725 }, { "epoch": 404.86725663716817, "grad_norm": 3.1359627428173553e-06, "learning_rate": 2.0450879396984922e-05, "loss": 0.0, "step": 45750 }, { "epoch": 405.08849557522126, "grad_norm": 2.996815965161659e-06, "learning_rate": 2.044145728643216e-05, "loss": 0.0, "step": 45775 }, { "epoch": 405.30973451327435, "grad_norm": 3.1636177482141647e-06, "learning_rate": 2.0432035175879394e-05, "loss": 0.0, "step": 45800 }, { "epoch": 405.53097345132744, "grad_norm": 3.1500521799898706e-06, "learning_rate": 2.0422613065326632e-05, "loss": 0.0, "step": 45825 }, { "epoch": 405.75221238938053, "grad_norm": 3.3916041957127163e-06, "learning_rate": 2.0413190954773867e-05, "loss": 0.0, "step": 45850 }, { "epoch": 405.9734513274336, "grad_norm": 3.3309815989923663e-06, "learning_rate": 2.0403768844221105e-05, "loss": 0.0, "step": 45875 }, { "epoch": 406.1946902654867, "grad_norm": 3.1558925002173055e-06, "learning_rate": 2.0394346733668343e-05, "loss": 0.0, "step": 45900 }, { "epoch": 406.4159292035398, "grad_norm": 3.1820104595681187e-06, "learning_rate": 2.0384924623115577e-05, "loss": 0.0, "step": 45925 }, { "epoch": 406.6371681415929, "grad_norm": 3.114612354693236e-06, "learning_rate": 2.0375502512562815e-05, "loss": 0.0, "step": 45950 }, { "epoch": 406.85840707964604, "grad_norm": 3.197205387550639e-06, "learning_rate": 2.0366080402010046e-05, "loss": 0.0, "step": 45975 }, { "epoch": 407.07964601769913, "grad_norm": 3.020868007297395e-06, "learning_rate": 2.0356658291457284e-05, "loss": 0.0, "step": 46000 }, { "epoch": 407.3008849557522, "grad_norm": 3.100511321463273e-06, "learning_rate": 2.034723618090452e-05, "loss": 0.0, "step": 46025 }, { "epoch": 407.5221238938053, "grad_norm": 3.176959808115498e-06, "learning_rate": 2.0337814070351757e-05, "loss": 0.0, "step": 46050 }, { "epoch": 407.7433628318584, "grad_norm": 3.3049375360860722e-06, "learning_rate": 2.0328391959798995e-05, "loss": 0.0, "step": 46075 }, { "epoch": 407.9646017699115, "grad_norm": 3.1778336051502265e-06, "learning_rate": 2.031896984924623e-05, "loss": 0.0, "step": 46100 }, { "epoch": 408.1858407079646, "grad_norm": 3.302928462289856e-06, "learning_rate": 2.0309547738693467e-05, "loss": 0.0, "step": 46125 }, { "epoch": 408.4070796460177, "grad_norm": 3.04612012769212e-06, "learning_rate": 2.03001256281407e-05, "loss": 0.0, "step": 46150 }, { "epoch": 408.62831858407077, "grad_norm": 3.054924036405282e-06, "learning_rate": 2.029070351758794e-05, "loss": 0.0, "step": 46175 }, { "epoch": 408.8495575221239, "grad_norm": 3.196163561369758e-06, "learning_rate": 2.0281281407035174e-05, "loss": 0.0, "step": 46200 }, { "epoch": 409.070796460177, "grad_norm": 3.0519934171024943e-06, "learning_rate": 2.0271859296482412e-05, "loss": 0.0, "step": 46225 }, { "epoch": 409.2920353982301, "grad_norm": 2.9519987947423942e-06, "learning_rate": 2.0262437185929643e-05, "loss": 0.0, "step": 46250 }, { "epoch": 409.5132743362832, "grad_norm": 3.231162736483384e-06, "learning_rate": 2.0253015075376884e-05, "loss": 0.0, "step": 46275 }, { "epoch": 409.7345132743363, "grad_norm": 3.2884086067497265e-06, "learning_rate": 2.0243592964824122e-05, "loss": 0.0, "step": 46300 }, { "epoch": 409.95575221238937, "grad_norm": 3.073728748859139e-06, "learning_rate": 2.0234170854271353e-05, "loss": 0.0, "step": 46325 }, { "epoch": 410.17699115044246, "grad_norm": 2.9951029318908695e-06, "learning_rate": 2.022474874371859e-05, "loss": 0.0, "step": 46350 }, { "epoch": 410.39823008849555, "grad_norm": 3.1762967864779057e-06, "learning_rate": 2.0215326633165826e-05, "loss": 0.0, "step": 46375 }, { "epoch": 410.6194690265487, "grad_norm": 2.9752327463938855e-06, "learning_rate": 2.0205904522613064e-05, "loss": 0.0, "step": 46400 }, { "epoch": 410.8407079646018, "grad_norm": 3.271665491411113e-06, "learning_rate": 2.01964824120603e-05, "loss": 0.0, "step": 46425 }, { "epoch": 411.0619469026549, "grad_norm": 3.0426192552113207e-06, "learning_rate": 2.0187060301507536e-05, "loss": 0.0, "step": 46450 }, { "epoch": 411.283185840708, "grad_norm": 2.9339951197471237e-06, "learning_rate": 2.017763819095477e-05, "loss": 0.0, "step": 46475 }, { "epoch": 411.50442477876106, "grad_norm": 3.0151004466461018e-06, "learning_rate": 2.016821608040201e-05, "loss": 0.0, "step": 46500 }, { "epoch": 411.72566371681415, "grad_norm": 3.0302560389827704e-06, "learning_rate": 2.0158793969849247e-05, "loss": 0.0, "step": 46525 }, { "epoch": 411.94690265486724, "grad_norm": 3.044438471988542e-06, "learning_rate": 2.014937185929648e-05, "loss": 0.0, "step": 46550 }, { "epoch": 412.16814159292034, "grad_norm": 2.9556360914284596e-06, "learning_rate": 2.013994974874372e-05, "loss": 0.0, "step": 46575 }, { "epoch": 412.3893805309734, "grad_norm": 2.864688212866895e-06, "learning_rate": 2.013052763819095e-05, "loss": 0.0, "step": 46600 }, { "epoch": 412.6106194690266, "grad_norm": 3.010057298524771e-06, "learning_rate": 2.0121105527638188e-05, "loss": 0.0, "step": 46625 }, { "epoch": 412.83185840707966, "grad_norm": 2.996119519593776e-06, "learning_rate": 2.0111683417085423e-05, "loss": 0.0, "step": 46650 }, { "epoch": 413.05309734513276, "grad_norm": 2.832148766174214e-06, "learning_rate": 2.010226130653266e-05, "loss": 0.0, "step": 46675 }, { "epoch": 413.27433628318585, "grad_norm": 2.827322987286607e-06, "learning_rate": 2.0092839195979895e-05, "loss": 0.0, "step": 46700 }, { "epoch": 413.49557522123894, "grad_norm": 3.0704277378390543e-06, "learning_rate": 2.0083417085427133e-05, "loss": 0.0, "step": 46725 }, { "epoch": 413.716814159292, "grad_norm": 2.994358283103793e-06, "learning_rate": 2.007399497487437e-05, "loss": 0.0, "step": 46750 }, { "epoch": 413.9380530973451, "grad_norm": 2.950395582956844e-06, "learning_rate": 2.0064572864321606e-05, "loss": 0.0, "step": 46775 }, { "epoch": 414.1592920353982, "grad_norm": 2.840069328158279e-06, "learning_rate": 2.0055150753768844e-05, "loss": 0.0, "step": 46800 }, { "epoch": 414.3805309734513, "grad_norm": 2.8571248549269512e-06, "learning_rate": 2.0045728643216078e-05, "loss": 0.0, "step": 46825 }, { "epoch": 414.60176991150445, "grad_norm": 3.0070889351918595e-06, "learning_rate": 2.0036306532663316e-05, "loss": 0.0, "step": 46850 }, { "epoch": 414.82300884955754, "grad_norm": 2.859450205505709e-06, "learning_rate": 2.002688442211055e-05, "loss": 0.0, "step": 46875 }, { "epoch": 415.04424778761063, "grad_norm": 2.8063739136996446e-06, "learning_rate": 2.001746231155779e-05, "loss": 0.0, "step": 46900 }, { "epoch": 415.2654867256637, "grad_norm": 3.003320216521388e-06, "learning_rate": 2.0008040201005026e-05, "loss": 0.0, "step": 46925 }, { "epoch": 415.4867256637168, "grad_norm": 2.935774773504818e-06, "learning_rate": 1.9998618090452258e-05, "loss": 0.0, "step": 46950 }, { "epoch": 415.7079646017699, "grad_norm": 3.0851301744405646e-06, "learning_rate": 1.9989195979899496e-05, "loss": 0.0, "step": 46975 }, { "epoch": 415.929203539823, "grad_norm": 3.058685706491815e-06, "learning_rate": 1.997977386934673e-05, "loss": 0.0, "step": 47000 }, { "epoch": 416.1504424778761, "grad_norm": 2.8489828309830045e-06, "learning_rate": 1.9970351758793968e-05, "loss": 0.0, "step": 47025 }, { "epoch": 416.37168141592923, "grad_norm": 2.9955031095596496e-06, "learning_rate": 1.9960929648241203e-05, "loss": 0.0, "step": 47050 }, { "epoch": 416.5929203539823, "grad_norm": 3.118707127214293e-06, "learning_rate": 1.995150753768844e-05, "loss": 0.0, "step": 47075 }, { "epoch": 416.8141592920354, "grad_norm": 3.076553639402846e-06, "learning_rate": 1.9942085427135675e-05, "loss": 0.0, "step": 47100 }, { "epoch": 417.0353982300885, "grad_norm": 2.7537369078345364e-06, "learning_rate": 1.9932663316582913e-05, "loss": 0.0, "step": 47125 }, { "epoch": 417.2566371681416, "grad_norm": 3.1093720735952957e-06, "learning_rate": 1.992324120603015e-05, "loss": 0.0, "step": 47150 }, { "epoch": 417.4778761061947, "grad_norm": 2.91061360258027e-06, "learning_rate": 1.9913819095477385e-05, "loss": 0.0, "step": 47175 }, { "epoch": 417.6991150442478, "grad_norm": 3.0529065497830743e-06, "learning_rate": 1.9904396984924623e-05, "loss": 0.0, "step": 47200 }, { "epoch": 417.92035398230087, "grad_norm": 3.0268888622231316e-06, "learning_rate": 1.9894974874371858e-05, "loss": 0.0, "step": 47225 }, { "epoch": 418.14159292035396, "grad_norm": 2.9803863981214818e-06, "learning_rate": 1.9885552763819096e-05, "loss": 0.0, "step": 47250 }, { "epoch": 418.3628318584071, "grad_norm": 3.137235353278811e-06, "learning_rate": 1.9876130653266327e-05, "loss": 0.0, "step": 47275 }, { "epoch": 418.5840707964602, "grad_norm": 2.9242128221085295e-06, "learning_rate": 1.9866708542713565e-05, "loss": 0.0, "step": 47300 }, { "epoch": 418.8053097345133, "grad_norm": 3.1807164759811712e-06, "learning_rate": 1.98572864321608e-05, "loss": 0.0, "step": 47325 }, { "epoch": 419.0265486725664, "grad_norm": 2.834618271663203e-06, "learning_rate": 1.9847864321608037e-05, "loss": 0.0, "step": 47350 }, { "epoch": 419.24778761061947, "grad_norm": 2.9742027436441276e-06, "learning_rate": 1.9838442211055275e-05, "loss": 0.0, "step": 47375 }, { "epoch": 419.46902654867256, "grad_norm": 2.7457219857751625e-06, "learning_rate": 1.982902010050251e-05, "loss": 0.0, "step": 47400 }, { "epoch": 419.69026548672565, "grad_norm": 2.9694401746382937e-06, "learning_rate": 1.9819597989949748e-05, "loss": 0.0, "step": 47425 }, { "epoch": 419.91150442477874, "grad_norm": 2.895500074373558e-06, "learning_rate": 1.9810175879396982e-05, "loss": 0.0, "step": 47450 }, { "epoch": 420.13274336283183, "grad_norm": 3.1771578505868092e-06, "learning_rate": 1.980075376884422e-05, "loss": 0.0, "step": 47475 }, { "epoch": 420.353982300885, "grad_norm": 2.801159098453354e-06, "learning_rate": 1.9791331658291455e-05, "loss": 0.0, "step": 47500 }, { "epoch": 420.57522123893807, "grad_norm": 2.8174581530038267e-06, "learning_rate": 1.9781909547738693e-05, "loss": 0.0, "step": 47525 }, { "epoch": 420.79646017699116, "grad_norm": 2.925635044448427e-06, "learning_rate": 1.977248743718593e-05, "loss": 0.0, "step": 47550 }, { "epoch": 421.01769911504425, "grad_norm": 2.80713857137016e-06, "learning_rate": 1.9763065326633165e-05, "loss": 0.0, "step": 47575 }, { "epoch": 421.23893805309734, "grad_norm": 2.7711550956155406e-06, "learning_rate": 1.9753643216080403e-05, "loss": 0.0, "step": 47600 }, { "epoch": 421.46017699115043, "grad_norm": 2.8746733278239844e-06, "learning_rate": 1.9744221105527634e-05, "loss": 0.0, "step": 47625 }, { "epoch": 421.6814159292035, "grad_norm": 2.887062919398886e-06, "learning_rate": 1.9734798994974872e-05, "loss": 0.0, "step": 47650 }, { "epoch": 421.9026548672566, "grad_norm": 3.103341441601515e-06, "learning_rate": 1.9725376884422107e-05, "loss": 0.0, "step": 47675 }, { "epoch": 422.12389380530976, "grad_norm": 2.8726053642458282e-06, "learning_rate": 1.9715954773869345e-05, "loss": 0.0, "step": 47700 }, { "epoch": 422.34513274336285, "grad_norm": 2.9708226065849885e-06, "learning_rate": 1.970653266331658e-05, "loss": 0.0, "step": 47725 }, { "epoch": 422.56637168141594, "grad_norm": 2.8035974537488073e-06, "learning_rate": 1.9697110552763817e-05, "loss": 0.0, "step": 47750 }, { "epoch": 422.78761061946904, "grad_norm": 2.977450094476808e-06, "learning_rate": 1.9687688442211055e-05, "loss": 0.0, "step": 47775 }, { "epoch": 423.0088495575221, "grad_norm": 2.6973259537044214e-06, "learning_rate": 1.967826633165829e-05, "loss": 0.0, "step": 47800 }, { "epoch": 423.2300884955752, "grad_norm": 2.8973629468964646e-06, "learning_rate": 1.9668844221105527e-05, "loss": 0.0, "step": 47825 }, { "epoch": 423.4513274336283, "grad_norm": 3.022923237949726e-06, "learning_rate": 1.9659422110552762e-05, "loss": 0.0, "step": 47850 }, { "epoch": 423.6725663716814, "grad_norm": 2.8608553748199483e-06, "learning_rate": 1.965e-05, "loss": 0.0, "step": 47875 }, { "epoch": 423.8938053097345, "grad_norm": 2.992167765114573e-06, "learning_rate": 1.9640577889447234e-05, "loss": 0.0, "step": 47900 }, { "epoch": 424.11504424778764, "grad_norm": 2.817371523633483e-06, "learning_rate": 1.9631155778894472e-05, "loss": 0.0, "step": 47925 }, { "epoch": 424.3362831858407, "grad_norm": 2.841584318957757e-06, "learning_rate": 1.9621733668341704e-05, "loss": 0.0, "step": 47950 }, { "epoch": 424.5575221238938, "grad_norm": 2.8600186396943172e-06, "learning_rate": 1.961231155778894e-05, "loss": 0.0, "step": 47975 }, { "epoch": 424.7787610619469, "grad_norm": 3.074656660828623e-06, "learning_rate": 1.960288944723618e-05, "loss": 0.0, "step": 48000 }, { "epoch": 425.0, "grad_norm": 4.966138931195019e-06, "learning_rate": 1.9593467336683414e-05, "loss": 0.0, "step": 48025 }, { "epoch": 425.2212389380531, "grad_norm": 2.8140352696937043e-06, "learning_rate": 1.9584045226130652e-05, "loss": 0.0, "step": 48050 }, { "epoch": 425.4424778761062, "grad_norm": 3.0156932098179823e-06, "learning_rate": 1.9574623115577886e-05, "loss": 0.0, "step": 48075 }, { "epoch": 425.6637168141593, "grad_norm": 2.901095740526216e-06, "learning_rate": 1.9565201005025124e-05, "loss": 0.0, "step": 48100 }, { "epoch": 425.88495575221236, "grad_norm": 2.8949270927114412e-06, "learning_rate": 1.955577889447236e-05, "loss": 0.0, "step": 48125 }, { "epoch": 426.1061946902655, "grad_norm": 2.9892830752942245e-06, "learning_rate": 1.9546356783919597e-05, "loss": 0.0, "step": 48150 }, { "epoch": 426.3274336283186, "grad_norm": 2.865730039047776e-06, "learning_rate": 1.9536934673366835e-05, "loss": 0.0, "step": 48175 }, { "epoch": 426.5486725663717, "grad_norm": 2.7791438697022386e-06, "learning_rate": 1.952751256281407e-05, "loss": 0.0, "step": 48200 }, { "epoch": 426.7699115044248, "grad_norm": 2.8314416340435855e-06, "learning_rate": 1.9518090452261307e-05, "loss": 0.0, "step": 48225 }, { "epoch": 426.9911504424779, "grad_norm": 2.8140907488705125e-06, "learning_rate": 1.9508668341708542e-05, "loss": 0.0, "step": 48250 }, { "epoch": 427.21238938053096, "grad_norm": 2.712297828111332e-06, "learning_rate": 1.949924623115578e-05, "loss": 0.0, "step": 48275 }, { "epoch": 427.43362831858406, "grad_norm": 2.8046558782079956e-06, "learning_rate": 1.948982412060301e-05, "loss": 0.0, "step": 48300 }, { "epoch": 427.65486725663715, "grad_norm": 2.962072358059231e-06, "learning_rate": 1.948040201005025e-05, "loss": 0.0, "step": 48325 }, { "epoch": 427.87610619469024, "grad_norm": 2.9585091851913603e-06, "learning_rate": 1.9470979899497483e-05, "loss": 0.0, "step": 48350 }, { "epoch": 428.0973451327434, "grad_norm": 2.8744516384904273e-06, "learning_rate": 1.946155778894472e-05, "loss": 0.0, "step": 48375 }, { "epoch": 428.3185840707965, "grad_norm": 2.7880892048415262e-06, "learning_rate": 1.945213567839196e-05, "loss": 0.0, "step": 48400 }, { "epoch": 428.53982300884957, "grad_norm": 2.8465747163863853e-06, "learning_rate": 1.9442713567839194e-05, "loss": 0.0, "step": 48425 }, { "epoch": 428.76106194690266, "grad_norm": 2.8101883344788803e-06, "learning_rate": 1.943329145728643e-05, "loss": 0.0, "step": 48450 }, { "epoch": 428.98230088495575, "grad_norm": 2.871239757951116e-06, "learning_rate": 1.9423869346733666e-05, "loss": 0.0, "step": 48475 }, { "epoch": 429.20353982300884, "grad_norm": 2.6927680210064864e-06, "learning_rate": 1.9414447236180904e-05, "loss": 0.0, "step": 48500 }, { "epoch": 429.42477876106193, "grad_norm": 2.7510582185641397e-06, "learning_rate": 1.940502512562814e-05, "loss": 0.0, "step": 48525 }, { "epoch": 429.646017699115, "grad_norm": 2.9106447527738055e-06, "learning_rate": 1.9395603015075376e-05, "loss": 0.0, "step": 48550 }, { "epoch": 429.86725663716817, "grad_norm": 2.8013826067763148e-06, "learning_rate": 1.938618090452261e-05, "loss": 0.0, "step": 48575 }, { "epoch": 430.08849557522126, "grad_norm": 2.830956645993865e-06, "learning_rate": 1.937675879396985e-05, "loss": 0.0, "step": 48600 }, { "epoch": 430.30973451327435, "grad_norm": 2.759868493740214e-06, "learning_rate": 1.9367336683417087e-05, "loss": 0.0, "step": 48625 }, { "epoch": 430.53097345132744, "grad_norm": 2.810343175951857e-06, "learning_rate": 1.9357914572864318e-05, "loss": 0.0, "step": 48650 }, { "epoch": 430.75221238938053, "grad_norm": 2.884175728468108e-06, "learning_rate": 1.9348492462311556e-05, "loss": 0.0, "step": 48675 }, { "epoch": 430.9734513274336, "grad_norm": 2.8644940357480664e-06, "learning_rate": 1.933907035175879e-05, "loss": 0.0, "step": 48700 }, { "epoch": 431.1946902654867, "grad_norm": 2.7434166440798435e-06, "learning_rate": 1.932964824120603e-05, "loss": 0.0, "step": 48725 }, { "epoch": 431.4159292035398, "grad_norm": 2.942423179774778e-06, "learning_rate": 1.9320226130653263e-05, "loss": 0.0, "step": 48750 }, { "epoch": 431.6371681415929, "grad_norm": 2.762283656920772e-06, "learning_rate": 1.93108040201005e-05, "loss": 0.0, "step": 48775 }, { "epoch": 431.85840707964604, "grad_norm": 2.8748586373694707e-06, "learning_rate": 1.930138190954774e-05, "loss": 0.0, "step": 48800 }, { "epoch": 432.07964601769913, "grad_norm": 2.6902514491666807e-06, "learning_rate": 1.9291959798994973e-05, "loss": 0.0, "step": 48825 }, { "epoch": 432.3008849557522, "grad_norm": 2.8041865789418807e-06, "learning_rate": 1.928253768844221e-05, "loss": 0.0, "step": 48850 }, { "epoch": 432.5221238938053, "grad_norm": 2.7194739686819958e-06, "learning_rate": 1.9273115577889446e-05, "loss": 0.0, "step": 48875 }, { "epoch": 432.7433628318584, "grad_norm": 2.734955160121899e-06, "learning_rate": 1.9263693467336684e-05, "loss": 0.0, "step": 48900 }, { "epoch": 432.9646017699115, "grad_norm": 2.9412794901872985e-06, "learning_rate": 1.9254271356783915e-05, "loss": 0.0, "step": 48925 }, { "epoch": 433.1858407079646, "grad_norm": 2.7099554245069157e-06, "learning_rate": 1.9245226130653265e-05, "loss": 0.0, "step": 48950 }, { "epoch": 433.4070796460177, "grad_norm": 2.7584451345319394e-06, "learning_rate": 1.9235804020100502e-05, "loss": 0.0, "step": 48975 }, { "epoch": 433.62831858407077, "grad_norm": 2.700095819818671e-06, "learning_rate": 1.922638190954774e-05, "loss": 0.0, "step": 49000 }, { "epoch": 433.8495575221239, "grad_norm": 2.8912531888636295e-06, "learning_rate": 1.921695979899497e-05, "loss": 0.0, "step": 49025 }, { "epoch": 434.070796460177, "grad_norm": 2.6133909614145523e-06, "learning_rate": 1.920753768844221e-05, "loss": 0.0, "step": 49050 }, { "epoch": 434.2920353982301, "grad_norm": 2.687436335691018e-06, "learning_rate": 1.9198115577889444e-05, "loss": 0.0, "step": 49075 }, { "epoch": 434.5132743362832, "grad_norm": 2.800868742269813e-06, "learning_rate": 1.9188693467336682e-05, "loss": 0.0, "step": 49100 }, { "epoch": 434.7345132743363, "grad_norm": 2.8251261028344743e-06, "learning_rate": 1.9179271356783916e-05, "loss": 0.0, "step": 49125 }, { "epoch": 434.95575221238937, "grad_norm": 2.845092467396171e-06, "learning_rate": 1.9169849246231154e-05, "loss": 0.0, "step": 49150 }, { "epoch": 435.17699115044246, "grad_norm": 2.574101927166339e-06, "learning_rate": 1.916042713567839e-05, "loss": 0.0, "step": 49175 }, { "epoch": 435.39823008849555, "grad_norm": 2.7141547889186768e-06, "learning_rate": 1.9151005025125627e-05, "loss": 0.0, "step": 49200 }, { "epoch": 435.6194690265487, "grad_norm": 2.7489734293340007e-06, "learning_rate": 1.9141582914572865e-05, "loss": 0.0, "step": 49225 }, { "epoch": 435.8407079646018, "grad_norm": 3.0035005238460144e-06, "learning_rate": 1.91321608040201e-05, "loss": 0.0, "step": 49250 }, { "epoch": 436.0619469026549, "grad_norm": 2.671403763088165e-06, "learning_rate": 1.9122738693467337e-05, "loss": 0.0, "step": 49275 }, { "epoch": 436.283185840708, "grad_norm": 2.702410938582034e-06, "learning_rate": 1.9113316582914572e-05, "loss": 0.0, "step": 49300 }, { "epoch": 436.50442477876106, "grad_norm": 2.8210909022163833e-06, "learning_rate": 1.910389447236181e-05, "loss": 0.0, "step": 49325 }, { "epoch": 436.72566371681415, "grad_norm": 2.657197001099121e-06, "learning_rate": 1.909447236180904e-05, "loss": 0.0, "step": 49350 }, { "epoch": 436.94690265486724, "grad_norm": 2.9096586331434082e-06, "learning_rate": 1.908505025125628e-05, "loss": 0.0, "step": 49375 }, { "epoch": 437.16814159292034, "grad_norm": 2.580890850367723e-06, "learning_rate": 1.9075628140703513e-05, "loss": 0.0, "step": 49400 }, { "epoch": 437.3893805309734, "grad_norm": 2.685339268282405e-06, "learning_rate": 1.906620603015075e-05, "loss": 0.0, "step": 49425 }, { "epoch": 437.6106194690266, "grad_norm": 2.7802718705061125e-06, "learning_rate": 1.905678391959799e-05, "loss": 0.0, "step": 49450 }, { "epoch": 437.83185840707966, "grad_norm": 2.855181492122938e-06, "learning_rate": 1.9047361809045224e-05, "loss": 0.0, "step": 49475 }, { "epoch": 438.05309734513276, "grad_norm": 2.7274575131741585e-06, "learning_rate": 1.903793969849246e-05, "loss": 0.0, "step": 49500 }, { "epoch": 438.27433628318585, "grad_norm": 2.8134286367276218e-06, "learning_rate": 1.9028517587939696e-05, "loss": 0.0, "step": 49525 }, { "epoch": 438.49557522123894, "grad_norm": 2.7437515655037714e-06, "learning_rate": 1.9019095477386934e-05, "loss": 0.0, "step": 49550 }, { "epoch": 438.716814159292, "grad_norm": 2.7432595288701123e-06, "learning_rate": 1.900967336683417e-05, "loss": 0.0, "step": 49575 }, { "epoch": 438.9380530973451, "grad_norm": 2.7114510885439813e-06, "learning_rate": 1.9000251256281407e-05, "loss": 0.0, "step": 49600 }, { "epoch": 439.1592920353982, "grad_norm": 2.5858835215331055e-06, "learning_rate": 1.8990829145728645e-05, "loss": 0.0, "step": 49625 }, { "epoch": 439.3805309734513, "grad_norm": 2.623474074425758e-06, "learning_rate": 1.8981407035175876e-05, "loss": 0.0, "step": 49650 }, { "epoch": 439.60176991150445, "grad_norm": 2.6527973204792943e-06, "learning_rate": 1.8971984924623117e-05, "loss": 0.0, "step": 49675 }, { "epoch": 439.82300884955754, "grad_norm": 2.7917894840356894e-06, "learning_rate": 1.8962562814070348e-05, "loss": 0.0, "step": 49700 }, { "epoch": 440.04424778761063, "grad_norm": 2.8247927730262745e-06, "learning_rate": 1.8953140703517586e-05, "loss": 0.0, "step": 49725 }, { "epoch": 440.2654867256637, "grad_norm": 2.5932622520485893e-06, "learning_rate": 1.894371859296482e-05, "loss": 0.0, "step": 49750 }, { "epoch": 440.4867256637168, "grad_norm": 2.7305568437441252e-06, "learning_rate": 1.893429648241206e-05, "loss": 0.0, "step": 49775 }, { "epoch": 440.7079646017699, "grad_norm": 2.862411065507331e-06, "learning_rate": 1.8924874371859293e-05, "loss": 0.0, "step": 49800 }, { "epoch": 440.929203539823, "grad_norm": 2.816199867083924e-06, "learning_rate": 1.891545226130653e-05, "loss": 0.0, "step": 49825 }, { "epoch": 441.1504424778761, "grad_norm": 2.6459024411451537e-06, "learning_rate": 1.890603015075377e-05, "loss": 0.0, "step": 49850 }, { "epoch": 441.37168141592923, "grad_norm": 2.685958406800637e-06, "learning_rate": 1.8896608040201003e-05, "loss": 0.0, "step": 49875 }, { "epoch": 441.5929203539823, "grad_norm": 2.760064489848446e-06, "learning_rate": 1.888718592964824e-05, "loss": 0.0, "step": 49900 }, { "epoch": 441.8141592920354, "grad_norm": 2.759834160315222e-06, "learning_rate": 1.8877763819095476e-05, "loss": 0.0, "step": 49925 }, { "epoch": 442.0353982300885, "grad_norm": 2.649991074576974e-06, "learning_rate": 1.8868341708542714e-05, "loss": 0.0, "step": 49950 }, { "epoch": 442.2566371681416, "grad_norm": 2.5629210540500935e-06, "learning_rate": 1.8858919597989945e-05, "loss": 0.0, "step": 49975 }, { "epoch": 442.4778761061947, "grad_norm": 2.846403731382452e-06, "learning_rate": 1.8849497487437183e-05, "loss": 0.0, "step": 50000 }, { "epoch": 442.4778761061947, "eval_loss": 0.7510932087898254, "eval_runtime": 66.628, "eval_samples_per_second": 215.87, "eval_steps_per_second": 1.696, "eval_wer": 20.008149959250204, "step": 50000 }, { "epoch": 442.6991150442478, "grad_norm": 2.5836607164819725e-06, "learning_rate": 1.8840075376884417e-05, "loss": 0.0, "step": 50025 }, { "epoch": 442.92035398230087, "grad_norm": 2.808739054671605e-06, "learning_rate": 1.8830653266331655e-05, "loss": 0.0, "step": 50050 }, { "epoch": 443.14159292035396, "grad_norm": 2.6756968054542085e-06, "learning_rate": 1.8821231155778893e-05, "loss": 0.0, "step": 50075 }, { "epoch": 443.3628318584071, "grad_norm": 3.2579469007032458e-06, "learning_rate": 1.8811809045226128e-05, "loss": 0.0, "step": 50100 }, { "epoch": 443.5840707964602, "grad_norm": 2.50075777330494e-06, "learning_rate": 1.8802386934673366e-05, "loss": 0.0, "step": 50125 }, { "epoch": 443.8053097345133, "grad_norm": 2.5762346922419965e-06, "learning_rate": 1.87929648241206e-05, "loss": 0.0, "step": 50150 }, { "epoch": 444.0265486725664, "grad_norm": 2.640048933244543e-06, "learning_rate": 1.8783542713567838e-05, "loss": 0.0, "step": 50175 }, { "epoch": 444.24778761061947, "grad_norm": 2.5845190521067707e-06, "learning_rate": 1.8774120603015073e-05, "loss": 0.0, "step": 50200 }, { "epoch": 444.46902654867256, "grad_norm": 2.574469817773206e-06, "learning_rate": 1.876469849246231e-05, "loss": 0.0, "step": 50225 }, { "epoch": 444.69026548672565, "grad_norm": 2.6701522983785253e-06, "learning_rate": 1.875527638190955e-05, "loss": 0.0, "step": 50250 }, { "epoch": 444.91150442477874, "grad_norm": 2.6997888653568225e-06, "learning_rate": 1.8745854271356783e-05, "loss": 0.0, "step": 50275 }, { "epoch": 445.13274336283183, "grad_norm": 2.5578228814993054e-06, "learning_rate": 1.8736432160804018e-05, "loss": 0.0, "step": 50300 }, { "epoch": 445.353982300885, "grad_norm": 2.6898451324086636e-06, "learning_rate": 1.8727010050251252e-05, "loss": 0.0, "step": 50325 }, { "epoch": 445.57522123893807, "grad_norm": 2.729714879023959e-06, "learning_rate": 1.871758793969849e-05, "loss": 0.0, "step": 50350 }, { "epoch": 445.79646017699116, "grad_norm": 2.546138830439304e-06, "learning_rate": 1.8708165829145728e-05, "loss": 0.0, "step": 50375 }, { "epoch": 446.01769911504425, "grad_norm": 2.5031181394297164e-06, "learning_rate": 1.8698743718592963e-05, "loss": 0.0, "step": 50400 }, { "epoch": 446.23893805309734, "grad_norm": 2.50719313044101e-06, "learning_rate": 1.86893216080402e-05, "loss": 0.0, "step": 50425 }, { "epoch": 446.46017699115043, "grad_norm": 2.6820205221156357e-06, "learning_rate": 1.8679899497487435e-05, "loss": 0.0, "step": 50450 }, { "epoch": 446.6814159292035, "grad_norm": 2.6285185867891414e-06, "learning_rate": 1.8670477386934673e-05, "loss": 0.0, "step": 50475 }, { "epoch": 446.9026548672566, "grad_norm": 2.6183640784438467e-06, "learning_rate": 1.8661055276381908e-05, "loss": 0.0, "step": 50500 }, { "epoch": 447.12389380530976, "grad_norm": 2.7547628178581363e-06, "learning_rate": 1.8651633165829142e-05, "loss": 0.0, "step": 50525 }, { "epoch": 447.34513274336285, "grad_norm": 2.57356646216067e-06, "learning_rate": 1.864221105527638e-05, "loss": 0.0, "step": 50550 }, { "epoch": 447.56637168141594, "grad_norm": 2.6282780254405225e-06, "learning_rate": 1.8632788944723618e-05, "loss": 0.0, "step": 50575 }, { "epoch": 447.78761061946904, "grad_norm": 2.5603526410122868e-06, "learning_rate": 1.8623366834170853e-05, "loss": 0.0, "step": 50600 }, { "epoch": 448.0088495575221, "grad_norm": 2.3692878130532335e-06, "learning_rate": 1.861394472361809e-05, "loss": 0.0, "step": 50625 }, { "epoch": 448.2300884955752, "grad_norm": 2.406576868452248e-06, "learning_rate": 1.8604522613065325e-05, "loss": 0.0, "step": 50650 }, { "epoch": 448.4513274336283, "grad_norm": 2.6928332772513386e-06, "learning_rate": 1.859510050251256e-05, "loss": 0.0, "step": 50675 }, { "epoch": 448.6725663716814, "grad_norm": 3.041812533410848e-06, "learning_rate": 1.8585678391959797e-05, "loss": 0.0, "step": 50700 }, { "epoch": 448.8938053097345, "grad_norm": 2.7379983293940313e-06, "learning_rate": 1.8576256281407032e-05, "loss": 0.0, "step": 50725 }, { "epoch": 449.11504424778764, "grad_norm": 2.5442527658015024e-06, "learning_rate": 1.856683417085427e-05, "loss": 0.0, "step": 50750 }, { "epoch": 449.3362831858407, "grad_norm": 2.620769691930036e-06, "learning_rate": 1.8557412060301508e-05, "loss": 0.0, "step": 50775 }, { "epoch": 449.5575221238938, "grad_norm": 2.722497129070689e-06, "learning_rate": 1.8547989949748742e-05, "loss": 0.0, "step": 50800 }, { "epoch": 449.7787610619469, "grad_norm": 2.6128184344997862e-06, "learning_rate": 1.853856783919598e-05, "loss": 0.0, "step": 50825 }, { "epoch": 450.0, "grad_norm": 4.2140495679632295e-06, "learning_rate": 1.8529145728643215e-05, "loss": 0.0, "step": 50850 }, { "epoch": 450.2212389380531, "grad_norm": 2.459795950926491e-06, "learning_rate": 1.851972361809045e-05, "loss": 0.0, "step": 50875 }, { "epoch": 450.4424778761062, "grad_norm": 2.566005377957481e-06, "learning_rate": 1.8510301507537687e-05, "loss": 0.0, "step": 50900 }, { "epoch": 450.6637168141593, "grad_norm": 2.5826695946307154e-06, "learning_rate": 1.8500879396984922e-05, "loss": 0.0, "step": 50925 }, { "epoch": 450.88495575221236, "grad_norm": 2.77034428108891e-06, "learning_rate": 1.849145728643216e-05, "loss": 0.0, "step": 50950 }, { "epoch": 451.1061946902655, "grad_norm": 2.4520238639524905e-06, "learning_rate": 1.8482412060301506e-05, "loss": 0.0, "step": 50975 }, { "epoch": 451.3274336283186, "grad_norm": 2.492153271305142e-06, "learning_rate": 1.8472989949748744e-05, "loss": 0.0, "step": 51000 }, { "epoch": 451.5486725663717, "grad_norm": 2.8243096039659576e-06, "learning_rate": 1.846356783919598e-05, "loss": 0.0, "step": 51025 }, { "epoch": 451.7699115044248, "grad_norm": 2.700361619645264e-06, "learning_rate": 1.8454145728643213e-05, "loss": 0.0, "step": 51050 }, { "epoch": 451.9911504424779, "grad_norm": 2.6007703581854003e-06, "learning_rate": 1.844472361809045e-05, "loss": 0.0, "step": 51075 }, { "epoch": 452.21238938053096, "grad_norm": 2.5717495191202033e-06, "learning_rate": 1.8435301507537685e-05, "loss": 0.0, "step": 51100 }, { "epoch": 452.43362831858406, "grad_norm": 2.523006060073385e-06, "learning_rate": 1.8425879396984923e-05, "loss": 0.0, "step": 51125 }, { "epoch": 452.65486725663715, "grad_norm": 2.8293536615819903e-06, "learning_rate": 1.8416457286432158e-05, "loss": 0.0, "step": 51150 }, { "epoch": 452.87610619469024, "grad_norm": 2.6349243853474036e-06, "learning_rate": 1.8407035175879396e-05, "loss": 0.0, "step": 51175 }, { "epoch": 453.0973451327434, "grad_norm": 2.5529361664666794e-06, "learning_rate": 1.8397613065326634e-05, "loss": 0.0, "step": 51200 }, { "epoch": 453.3185840707965, "grad_norm": 2.4559985831729136e-06, "learning_rate": 1.838819095477387e-05, "loss": 0.0, "step": 51225 }, { "epoch": 453.53982300884957, "grad_norm": 2.4931005100370385e-06, "learning_rate": 1.8378768844221103e-05, "loss": 0.0, "step": 51250 }, { "epoch": 453.76106194690266, "grad_norm": 2.5593342343199765e-06, "learning_rate": 1.836934673366834e-05, "loss": 0.0, "step": 51275 }, { "epoch": 453.98230088495575, "grad_norm": 2.58477257375489e-06, "learning_rate": 1.8359924623115575e-05, "loss": 0.0, "step": 51300 }, { "epoch": 454.20353982300884, "grad_norm": 2.439564013911877e-06, "learning_rate": 1.8350502512562813e-05, "loss": 0.0, "step": 51325 }, { "epoch": 454.42477876106193, "grad_norm": 2.5281967737100786e-06, "learning_rate": 1.8341080402010048e-05, "loss": 0.0, "step": 51350 }, { "epoch": 454.646017699115, "grad_norm": 2.4738776573940413e-06, "learning_rate": 1.8331658291457286e-05, "loss": 0.0, "step": 51375 }, { "epoch": 454.86725663716817, "grad_norm": 2.611870741020539e-06, "learning_rate": 1.832223618090452e-05, "loss": 0.0, "step": 51400 }, { "epoch": 455.08849557522126, "grad_norm": 2.427132812954369e-06, "learning_rate": 1.8312814070351758e-05, "loss": 0.0, "step": 51425 }, { "epoch": 455.30973451327435, "grad_norm": 2.5273686787841143e-06, "learning_rate": 1.8303391959798993e-05, "loss": 0.0, "step": 51450 }, { "epoch": 455.53097345132744, "grad_norm": 2.533107362978626e-06, "learning_rate": 1.829396984924623e-05, "loss": 0.0, "step": 51475 }, { "epoch": 455.75221238938053, "grad_norm": 2.573655137894093e-06, "learning_rate": 1.8284547738693465e-05, "loss": 0.0, "step": 51500 }, { "epoch": 455.9734513274336, "grad_norm": 2.692726184250205e-06, "learning_rate": 1.8275125628140703e-05, "loss": 0.0, "step": 51525 }, { "epoch": 456.1946902654867, "grad_norm": 2.4501844109181548e-06, "learning_rate": 1.8265703517587938e-05, "loss": 0.0, "step": 51550 }, { "epoch": 456.4159292035398, "grad_norm": 2.59432954408112e-06, "learning_rate": 1.8256281407035172e-05, "loss": 0.0, "step": 51575 }, { "epoch": 456.6371681415929, "grad_norm": 2.526461003071745e-06, "learning_rate": 1.824685929648241e-05, "loss": 0.0, "step": 51600 }, { "epoch": 456.85840707964604, "grad_norm": 2.5812209969444666e-06, "learning_rate": 1.8237437185929648e-05, "loss": 0.0, "step": 51625 }, { "epoch": 457.07964601769913, "grad_norm": 2.737058366619749e-06, "learning_rate": 1.8228015075376883e-05, "loss": 0.0, "step": 51650 }, { "epoch": 457.3008849557522, "grad_norm": 2.8245876819710247e-06, "learning_rate": 1.821859296482412e-05, "loss": 0.0, "step": 51675 }, { "epoch": 457.5221238938053, "grad_norm": 2.8330528039077763e-06, "learning_rate": 1.8209170854271355e-05, "loss": 0.0, "step": 51700 }, { "epoch": 457.7433628318584, "grad_norm": 2.5094357170019066e-06, "learning_rate": 1.819974874371859e-05, "loss": 0.0, "step": 51725 }, { "epoch": 457.9646017699115, "grad_norm": 2.6102839001396205e-06, "learning_rate": 1.8190326633165828e-05, "loss": 0.0, "step": 51750 }, { "epoch": 458.1858407079646, "grad_norm": 2.637468469401938e-06, "learning_rate": 1.8180904522613062e-05, "loss": 0.0, "step": 51775 }, { "epoch": 458.4070796460177, "grad_norm": 2.7027026590076275e-06, "learning_rate": 1.81714824120603e-05, "loss": 0.0, "step": 51800 }, { "epoch": 458.62831858407077, "grad_norm": 2.5772344542929204e-06, "learning_rate": 1.8162060301507538e-05, "loss": 0.0, "step": 51825 }, { "epoch": 458.8495575221239, "grad_norm": 2.577135319370427e-06, "learning_rate": 1.8152638190954772e-05, "loss": 0.0, "step": 51850 }, { "epoch": 459.070796460177, "grad_norm": 2.534354507588432e-06, "learning_rate": 1.8143216080402007e-05, "loss": 0.0, "step": 51875 }, { "epoch": 459.2920353982301, "grad_norm": 2.4631845008116215e-06, "learning_rate": 1.8133793969849245e-05, "loss": 0.0, "step": 51900 }, { "epoch": 459.5132743362832, "grad_norm": 2.7923676952923415e-06, "learning_rate": 1.812437185929648e-05, "loss": 0.0, "step": 51925 }, { "epoch": 459.7345132743363, "grad_norm": 2.4481719265168067e-06, "learning_rate": 1.8114949748743717e-05, "loss": 0.0, "step": 51950 }, { "epoch": 459.95575221238937, "grad_norm": 2.587538119769306e-06, "learning_rate": 1.8105527638190952e-05, "loss": 0.0, "step": 51975 }, { "epoch": 460.17699115044246, "grad_norm": 2.447449787723599e-06, "learning_rate": 1.809610552763819e-05, "loss": 0.0, "step": 52000 }, { "epoch": 460.39823008849555, "grad_norm": 2.606778934932663e-06, "learning_rate": 1.8086683417085428e-05, "loss": 0.0, "step": 52025 }, { "epoch": 460.6194690265487, "grad_norm": 2.403493226665887e-06, "learning_rate": 1.8077261306532662e-05, "loss": 0.0, "step": 52050 }, { "epoch": 460.8407079646018, "grad_norm": 2.784099024211173e-06, "learning_rate": 1.8067839195979897e-05, "loss": 0.0, "step": 52075 }, { "epoch": 461.0619469026549, "grad_norm": 2.493517285984126e-06, "learning_rate": 1.8058417085427135e-05, "loss": 0.0, "step": 52100 }, { "epoch": 461.283185840708, "grad_norm": 2.441639253447647e-06, "learning_rate": 1.804899497487437e-05, "loss": 0.0, "step": 52125 }, { "epoch": 461.50442477876106, "grad_norm": 2.4447592750220792e-06, "learning_rate": 1.8039572864321607e-05, "loss": 0.0, "step": 52150 }, { "epoch": 461.72566371681415, "grad_norm": 2.412454023215105e-06, "learning_rate": 1.8030150753768842e-05, "loss": 0.0, "step": 52175 }, { "epoch": 461.94690265486724, "grad_norm": 2.4651419607835123e-06, "learning_rate": 1.8020728643216076e-05, "loss": 0.0, "step": 52200 }, { "epoch": 462.16814159292034, "grad_norm": 2.6935201731248526e-06, "learning_rate": 1.8011306532663314e-05, "loss": 0.0, "step": 52225 }, { "epoch": 462.3893805309734, "grad_norm": 2.4088624286378035e-06, "learning_rate": 1.8001884422110552e-05, "loss": 0.0, "step": 52250 }, { "epoch": 462.6106194690266, "grad_norm": 2.643320158313145e-06, "learning_rate": 1.7992462311557787e-05, "loss": 0.0, "step": 52275 }, { "epoch": 462.83185840707966, "grad_norm": 2.6432803679199424e-06, "learning_rate": 1.7983040201005025e-05, "loss": 0.0, "step": 52300 }, { "epoch": 463.05309734513276, "grad_norm": 2.48851779360848e-06, "learning_rate": 1.797361809045226e-05, "loss": 0.0, "step": 52325 }, { "epoch": 463.27433628318585, "grad_norm": 2.47975071943074e-06, "learning_rate": 1.7964195979899497e-05, "loss": 0.0, "step": 52350 }, { "epoch": 463.49557522123894, "grad_norm": 2.445808149786899e-06, "learning_rate": 1.795477386934673e-05, "loss": 0.0, "step": 52375 }, { "epoch": 463.716814159292, "grad_norm": 2.48322112383903e-06, "learning_rate": 1.7945351758793966e-05, "loss": 0.0, "step": 52400 }, { "epoch": 463.9380530973451, "grad_norm": 2.4229225346061867e-06, "learning_rate": 1.7935929648241204e-05, "loss": 0.0, "step": 52425 }, { "epoch": 464.1592920353982, "grad_norm": 2.4798489448585315e-06, "learning_rate": 1.7926507537688442e-05, "loss": 0.0, "step": 52450 }, { "epoch": 464.3805309734513, "grad_norm": 2.4226719688158482e-06, "learning_rate": 1.7917085427135677e-05, "loss": 0.0, "step": 52475 }, { "epoch": 464.60176991150445, "grad_norm": 2.4646217298140982e-06, "learning_rate": 1.7907663316582915e-05, "loss": 0.0, "step": 52500 }, { "epoch": 464.82300884955754, "grad_norm": 2.6280692964064656e-06, "learning_rate": 1.789824120603015e-05, "loss": 0.0, "step": 52525 }, { "epoch": 465.04424778761063, "grad_norm": 2.2911897303856676e-06, "learning_rate": 1.7888819095477384e-05, "loss": 0.0, "step": 52550 }, { "epoch": 465.2654867256637, "grad_norm": 2.3517993668065174e-06, "learning_rate": 1.787939698492462e-05, "loss": 0.0, "step": 52575 }, { "epoch": 465.4867256637168, "grad_norm": 2.7998275982099585e-06, "learning_rate": 1.7869974874371856e-05, "loss": 0.0, "step": 52600 }, { "epoch": 465.7079646017699, "grad_norm": 2.4783557819318958e-06, "learning_rate": 1.7860552763819094e-05, "loss": 0.0, "step": 52625 }, { "epoch": 465.929203539823, "grad_norm": 2.438493083900539e-06, "learning_rate": 1.7851130653266332e-05, "loss": 0.0, "step": 52650 }, { "epoch": 466.1504424778761, "grad_norm": 2.6007144242612412e-06, "learning_rate": 1.7841708542713566e-05, "loss": 0.0, "step": 52675 }, { "epoch": 466.37168141592923, "grad_norm": 2.712269633775577e-06, "learning_rate": 1.7832286432160804e-05, "loss": 0.0, "step": 52700 }, { "epoch": 466.5929203539823, "grad_norm": 2.349800979573047e-06, "learning_rate": 1.782286432160804e-05, "loss": 0.0, "step": 52725 }, { "epoch": 466.8141592920354, "grad_norm": 2.4265636966447346e-06, "learning_rate": 1.7813442211055273e-05, "loss": 0.0, "step": 52750 }, { "epoch": 467.0353982300885, "grad_norm": 2.4047831175266765e-06, "learning_rate": 1.780402010050251e-05, "loss": 0.0, "step": 52775 }, { "epoch": 467.2566371681416, "grad_norm": 2.610750016174279e-06, "learning_rate": 1.7794597989949746e-05, "loss": 0.0, "step": 52800 }, { "epoch": 467.4778761061947, "grad_norm": 2.4527455479983473e-06, "learning_rate": 1.7785175879396984e-05, "loss": 0.0, "step": 52825 }, { "epoch": 467.6991150442478, "grad_norm": 2.474863549650763e-06, "learning_rate": 1.7775753768844222e-05, "loss": 0.0, "step": 52850 }, { "epoch": 467.92035398230087, "grad_norm": 2.4742107598285656e-06, "learning_rate": 1.7766331658291456e-05, "loss": 0.0, "step": 52875 }, { "epoch": 468.14159292035396, "grad_norm": 2.5593444661353715e-06, "learning_rate": 1.775690954773869e-05, "loss": 0.0, "step": 52900 }, { "epoch": 468.3628318584071, "grad_norm": 2.505693146304111e-06, "learning_rate": 1.774748743718593e-05, "loss": 0.0, "step": 52925 }, { "epoch": 468.5840707964602, "grad_norm": 2.582510887805256e-06, "learning_rate": 1.7738065326633163e-05, "loss": 0.0, "step": 52950 }, { "epoch": 468.8053097345133, "grad_norm": 2.4047749320743605e-06, "learning_rate": 1.77286432160804e-05, "loss": 0.0, "step": 52975 }, { "epoch": 469.0265486725664, "grad_norm": 2.3159736883826554e-06, "learning_rate": 1.7719597989949747e-05, "loss": 0.0, "step": 53000 }, { "epoch": 469.24778761061947, "grad_norm": 2.5044546418939717e-06, "learning_rate": 1.7710175879396982e-05, "loss": 0.0, "step": 53025 }, { "epoch": 469.46902654867256, "grad_norm": 2.4503503937012283e-06, "learning_rate": 1.770075376884422e-05, "loss": 0.0, "step": 53050 }, { "epoch": 469.69026548672565, "grad_norm": 2.604824430818553e-06, "learning_rate": 1.7691331658291458e-05, "loss": 0.0, "step": 53075 }, { "epoch": 469.91150442477874, "grad_norm": 2.328453547306708e-06, "learning_rate": 1.7681909547738692e-05, "loss": 0.0, "step": 53100 }, { "epoch": 470.13274336283183, "grad_norm": 2.4079338345472934e-06, "learning_rate": 1.7672487437185927e-05, "loss": 0.0, "step": 53125 }, { "epoch": 470.353982300885, "grad_norm": 2.5730869310791604e-06, "learning_rate": 1.7663065326633165e-05, "loss": 0.0, "step": 53150 }, { "epoch": 470.57522123893807, "grad_norm": 2.3676948330830783e-06, "learning_rate": 1.76536432160804e-05, "loss": 0.0, "step": 53175 }, { "epoch": 470.79646017699116, "grad_norm": 2.392448095633881e-06, "learning_rate": 1.7644221105527637e-05, "loss": 0.0, "step": 53200 }, { "epoch": 471.01769911504425, "grad_norm": 2.3017428247840144e-06, "learning_rate": 1.7634798994974872e-05, "loss": 0.0, "step": 53225 }, { "epoch": 471.23893805309734, "grad_norm": 2.324481329196715e-06, "learning_rate": 1.762537688442211e-05, "loss": 0.0, "step": 53250 }, { "epoch": 471.46017699115043, "grad_norm": 2.2619324226980098e-06, "learning_rate": 1.7615954773869344e-05, "loss": 0.0, "step": 53275 }, { "epoch": 471.6814159292035, "grad_norm": 2.6303741833544336e-06, "learning_rate": 1.7606532663316582e-05, "loss": 0.0, "step": 53300 }, { "epoch": 471.9026548672566, "grad_norm": 2.470807203280856e-06, "learning_rate": 1.7597110552763817e-05, "loss": 0.0, "step": 53325 }, { "epoch": 472.12389380530976, "grad_norm": 2.3562988644698635e-06, "learning_rate": 1.7587688442211055e-05, "loss": 0.0, "step": 53350 }, { "epoch": 472.34513274336285, "grad_norm": 2.5061976884899195e-06, "learning_rate": 1.757826633165829e-05, "loss": 0.0, "step": 53375 }, { "epoch": 472.56637168141594, "grad_norm": 2.491158056727727e-06, "learning_rate": 1.7568844221105527e-05, "loss": 0.0, "step": 53400 }, { "epoch": 472.78761061946904, "grad_norm": 2.9218956569820875e-06, "learning_rate": 1.7559422110552762e-05, "loss": 0.0, "step": 53425 }, { "epoch": 473.0088495575221, "grad_norm": 2.3921506908664014e-06, "learning_rate": 1.755e-05, "loss": 0.0, "step": 53450 }, { "epoch": 473.2300884955752, "grad_norm": 2.4834296254994115e-06, "learning_rate": 1.7540577889447234e-05, "loss": 0.0, "step": 53475 }, { "epoch": 473.4513274336283, "grad_norm": 2.483423259036499e-06, "learning_rate": 1.7531155778894472e-05, "loss": 0.0, "step": 53500 }, { "epoch": 473.6725663716814, "grad_norm": 2.3926402263896307e-06, "learning_rate": 1.7521733668341707e-05, "loss": 0.0, "step": 53525 }, { "epoch": 473.8938053097345, "grad_norm": 2.3037969185679685e-06, "learning_rate": 1.7512311557788945e-05, "loss": 0.0, "step": 53550 }, { "epoch": 474.11504424778764, "grad_norm": 2.3717686872259947e-06, "learning_rate": 1.750288944723618e-05, "loss": 0.0, "step": 53575 }, { "epoch": 474.3362831858407, "grad_norm": 2.297096898473683e-06, "learning_rate": 1.7493467336683414e-05, "loss": 0.0, "step": 53600 }, { "epoch": 474.5575221238938, "grad_norm": 2.32602042160579e-06, "learning_rate": 1.748404522613065e-05, "loss": 0.0, "step": 53625 }, { "epoch": 474.7787610619469, "grad_norm": 2.4084229153231718e-06, "learning_rate": 1.7474623115577886e-05, "loss": 0.0, "step": 53650 }, { "epoch": 475.0, "grad_norm": 4.047346010338515e-06, "learning_rate": 1.7465201005025124e-05, "loss": 0.0, "step": 53675 }, { "epoch": 475.2212389380531, "grad_norm": 2.308554712726618e-06, "learning_rate": 1.7455778894472362e-05, "loss": 0.0, "step": 53700 }, { "epoch": 475.4424778761062, "grad_norm": 2.567288902355358e-06, "learning_rate": 1.7446356783919597e-05, "loss": 0.0, "step": 53725 }, { "epoch": 475.6637168141593, "grad_norm": 2.498889898561174e-06, "learning_rate": 1.743693467336683e-05, "loss": 0.0, "step": 53750 }, { "epoch": 475.88495575221236, "grad_norm": 2.3596683149662567e-06, "learning_rate": 1.742751256281407e-05, "loss": 0.0, "step": 53775 }, { "epoch": 476.1061946902655, "grad_norm": 2.3876175418990897e-06, "learning_rate": 1.7418090452261304e-05, "loss": 0.0, "step": 53800 }, { "epoch": 476.3274336283186, "grad_norm": 2.63274500866828e-06, "learning_rate": 1.740866834170854e-05, "loss": 0.0, "step": 53825 }, { "epoch": 476.5486725663717, "grad_norm": 2.362333361816127e-06, "learning_rate": 1.7399246231155776e-05, "loss": 0.0, "step": 53850 }, { "epoch": 476.7699115044248, "grad_norm": 2.377772943873424e-06, "learning_rate": 1.7389824120603014e-05, "loss": 0.0, "step": 53875 }, { "epoch": 476.9911504424779, "grad_norm": 2.5179035674227634e-06, "learning_rate": 1.7380402010050252e-05, "loss": 0.0, "step": 53900 }, { "epoch": 477.21238938053096, "grad_norm": 2.318895894859452e-06, "learning_rate": 1.7370979899497486e-05, "loss": 0.0, "step": 53925 }, { "epoch": 477.43362831858406, "grad_norm": 2.3598379357281374e-06, "learning_rate": 1.736155778894472e-05, "loss": 0.0, "step": 53950 }, { "epoch": 477.65486725663715, "grad_norm": 2.4320424927282147e-06, "learning_rate": 1.735213567839196e-05, "loss": 0.0, "step": 53975 }, { "epoch": 477.87610619469024, "grad_norm": 2.342974312341539e-06, "learning_rate": 1.7342713567839193e-05, "loss": 0.0, "step": 54000 }, { "epoch": 478.0973451327434, "grad_norm": 2.2943588646739954e-06, "learning_rate": 1.733329145728643e-05, "loss": 0.0, "step": 54025 }, { "epoch": 478.3185840707965, "grad_norm": 2.2839283246867126e-06, "learning_rate": 1.7323869346733666e-05, "loss": 0.0, "step": 54050 }, { "epoch": 478.53982300884957, "grad_norm": 2.5582432954252e-06, "learning_rate": 1.7314447236180904e-05, "loss": 0.0, "step": 54075 }, { "epoch": 478.76106194690266, "grad_norm": 2.404645783826709e-06, "learning_rate": 1.730502512562814e-05, "loss": 0.0, "step": 54100 }, { "epoch": 478.98230088495575, "grad_norm": 2.382188313276856e-06, "learning_rate": 1.7295603015075376e-05, "loss": 0.0, "step": 54125 }, { "epoch": 479.20353982300884, "grad_norm": 2.3399181827699067e-06, "learning_rate": 1.728618090452261e-05, "loss": 0.0, "step": 54150 }, { "epoch": 479.42477876106193, "grad_norm": 2.378886620135745e-06, "learning_rate": 1.727675879396985e-05, "loss": 0.0, "step": 54175 }, { "epoch": 479.646017699115, "grad_norm": 2.236791942777927e-06, "learning_rate": 1.7267336683417083e-05, "loss": 0.0, "step": 54200 }, { "epoch": 479.86725663716817, "grad_norm": 2.291723831149284e-06, "learning_rate": 1.725791457286432e-05, "loss": 0.0, "step": 54225 }, { "epoch": 480.08849557522126, "grad_norm": 2.2041583633836126e-06, "learning_rate": 1.7248492462311556e-05, "loss": 0.0, "step": 54250 }, { "epoch": 480.30973451327435, "grad_norm": 2.448513669150998e-06, "learning_rate": 1.723907035175879e-05, "loss": 0.0, "step": 54275 }, { "epoch": 480.53097345132744, "grad_norm": 2.530321808080771e-06, "learning_rate": 1.7229648241206028e-05, "loss": 0.0, "step": 54300 }, { "epoch": 480.75221238938053, "grad_norm": 2.4651005787745817e-06, "learning_rate": 1.7220226130653266e-05, "loss": 0.0, "step": 54325 }, { "epoch": 480.9734513274336, "grad_norm": 2.5006620489875786e-06, "learning_rate": 1.72108040201005e-05, "loss": 0.0, "step": 54350 }, { "epoch": 481.1946902654867, "grad_norm": 2.198681841036887e-06, "learning_rate": 1.720138190954774e-05, "loss": 0.0, "step": 54375 }, { "epoch": 481.4159292035398, "grad_norm": 2.283863295815536e-06, "learning_rate": 1.7191959798994973e-05, "loss": 0.0, "step": 54400 }, { "epoch": 481.6371681415929, "grad_norm": 2.3540646907349583e-06, "learning_rate": 1.7182537688442208e-05, "loss": 0.0, "step": 54425 }, { "epoch": 481.85840707964604, "grad_norm": 2.280231910845032e-06, "learning_rate": 1.7173115577889446e-05, "loss": 0.0, "step": 54450 }, { "epoch": 482.07964601769913, "grad_norm": 2.2237786652112845e-06, "learning_rate": 1.716369346733668e-05, "loss": 0.0, "step": 54475 }, { "epoch": 482.3008849557522, "grad_norm": 2.3458487703464925e-06, "learning_rate": 1.7154271356783918e-05, "loss": 0.0, "step": 54500 }, { "epoch": 482.5221238938053, "grad_norm": 2.177742089770618e-06, "learning_rate": 1.7144849246231156e-05, "loss": 0.0, "step": 54525 }, { "epoch": 482.7433628318584, "grad_norm": 2.262646148665226e-06, "learning_rate": 1.713542713567839e-05, "loss": 0.0, "step": 54550 }, { "epoch": 482.9646017699115, "grad_norm": 2.6594109385769116e-06, "learning_rate": 1.712600502512563e-05, "loss": 0.0, "step": 54575 }, { "epoch": 483.1858407079646, "grad_norm": 2.6345842343289405e-06, "learning_rate": 1.7116582914572863e-05, "loss": 0.0, "step": 54600 }, { "epoch": 483.4070796460177, "grad_norm": 2.66127221948409e-06, "learning_rate": 1.7107160804020098e-05, "loss": 0.0, "step": 54625 }, { "epoch": 483.62831858407077, "grad_norm": 2.28798216994619e-06, "learning_rate": 1.7097738693467335e-05, "loss": 0.0, "step": 54650 }, { "epoch": 483.8495575221239, "grad_norm": 2.385197376497672e-06, "learning_rate": 1.708831658291457e-05, "loss": 0.0, "step": 54675 }, { "epoch": 484.070796460177, "grad_norm": 2.402812924628961e-06, "learning_rate": 1.7078894472361808e-05, "loss": 0.0, "step": 54700 }, { "epoch": 484.2920353982301, "grad_norm": 2.3140294160839403e-06, "learning_rate": 1.7069472361809046e-05, "loss": 0.0, "step": 54725 }, { "epoch": 484.5132743362832, "grad_norm": 2.3801740098861046e-06, "learning_rate": 1.706005025125628e-05, "loss": 0.0, "step": 54750 }, { "epoch": 484.7345132743363, "grad_norm": 2.268875959998695e-06, "learning_rate": 1.7050628140703515e-05, "loss": 0.0, "step": 54775 }, { "epoch": 484.95575221238937, "grad_norm": 2.3081599920260487e-06, "learning_rate": 1.7041206030150753e-05, "loss": 0.0, "step": 54800 }, { "epoch": 485.17699115044246, "grad_norm": 2.3107859306037426e-06, "learning_rate": 1.7031783919597987e-05, "loss": 0.0, "step": 54825 }, { "epoch": 485.39823008849555, "grad_norm": 2.3513125597673934e-06, "learning_rate": 1.7022361809045225e-05, "loss": 0.0, "step": 54850 }, { "epoch": 485.6194690265487, "grad_norm": 2.6042598619824275e-06, "learning_rate": 1.701293969849246e-05, "loss": 0.0, "step": 54875 }, { "epoch": 485.8407079646018, "grad_norm": 2.2892822926223744e-06, "learning_rate": 1.7003517587939694e-05, "loss": 0.0, "step": 54900 }, { "epoch": 486.0619469026549, "grad_norm": 2.3698762561252806e-06, "learning_rate": 1.6994095477386936e-05, "loss": 0.0, "step": 54925 }, { "epoch": 486.283185840708, "grad_norm": 2.355194055780885e-06, "learning_rate": 1.698467336683417e-05, "loss": 0.0, "step": 54950 }, { "epoch": 486.50442477876106, "grad_norm": 2.3505883746111067e-06, "learning_rate": 1.6975251256281405e-05, "loss": 0.0, "step": 54975 }, { "epoch": 486.72566371681415, "grad_norm": 2.399460072410875e-06, "learning_rate": 1.6965829145728643e-05, "loss": 0.0, "step": 55000 }, { "epoch": 486.72566371681415, "eval_loss": 0.7904303073883057, "eval_runtime": 66.1846, "eval_samples_per_second": 217.316, "eval_steps_per_second": 1.707, "eval_wer": 20.459865785777453, "step": 55000 }, { "epoch": 486.94690265486724, "grad_norm": 2.5407496195839485e-06, "learning_rate": 1.6956407035175877e-05, "loss": 0.0, "step": 55025 }, { "epoch": 487.16814159292034, "grad_norm": 2.2144549802760594e-06, "learning_rate": 1.6947361809045224e-05, "loss": 0.0, "step": 55050 }, { "epoch": 487.3893805309734, "grad_norm": 2.271754738103482e-06, "learning_rate": 1.693793969849246e-05, "loss": 0.0, "step": 55075 }, { "epoch": 487.6106194690266, "grad_norm": 2.243612698293873e-06, "learning_rate": 1.6928517587939696e-05, "loss": 0.0, "step": 55100 }, { "epoch": 487.83185840707966, "grad_norm": 2.2436893232224975e-06, "learning_rate": 1.6919095477386934e-05, "loss": 0.0, "step": 55125 }, { "epoch": 488.05309734513276, "grad_norm": 2.4534683689125814e-06, "learning_rate": 1.690967336683417e-05, "loss": 0.0, "step": 55150 }, { "epoch": 488.27433628318585, "grad_norm": 2.3609482013853267e-06, "learning_rate": 1.6900251256281406e-05, "loss": 0.0, "step": 55175 }, { "epoch": 488.49557522123894, "grad_norm": 2.180064484491595e-06, "learning_rate": 1.689082914572864e-05, "loss": 0.0, "step": 55200 }, { "epoch": 488.716814159292, "grad_norm": 2.2105423340690322e-06, "learning_rate": 1.688140703517588e-05, "loss": 0.0, "step": 55225 }, { "epoch": 488.9380530973451, "grad_norm": 2.385672360105673e-06, "learning_rate": 1.6871984924623113e-05, "loss": 0.0, "step": 55250 }, { "epoch": 489.1592920353982, "grad_norm": 2.2432025161833735e-06, "learning_rate": 1.686256281407035e-05, "loss": 0.0, "step": 55275 }, { "epoch": 489.3805309734513, "grad_norm": 2.384917252129526e-06, "learning_rate": 1.6853140703517586e-05, "loss": 0.0, "step": 55300 }, { "epoch": 489.60176991150445, "grad_norm": 2.1560310869972454e-06, "learning_rate": 1.6843718592964824e-05, "loss": 0.0, "step": 55325 }, { "epoch": 489.82300884955754, "grad_norm": 2.4243936422863044e-06, "learning_rate": 1.6834296482412058e-05, "loss": 0.0, "step": 55350 }, { "epoch": 490.04424778761063, "grad_norm": 3.0094713565631537e-06, "learning_rate": 1.6824874371859296e-05, "loss": 0.0, "step": 55375 }, { "epoch": 490.2654867256637, "grad_norm": 2.24698396777967e-06, "learning_rate": 1.681545226130653e-05, "loss": 0.0, "step": 55400 }, { "epoch": 490.4867256637168, "grad_norm": 2.2297215309663443e-06, "learning_rate": 1.680603015075377e-05, "loss": 0.0, "step": 55425 }, { "epoch": 490.7079646017699, "grad_norm": 2.2107728909759317e-06, "learning_rate": 1.6796608040201003e-05, "loss": 0.0, "step": 55450 }, { "epoch": 490.929203539823, "grad_norm": 2.3521472485299455e-06, "learning_rate": 1.6787185929648238e-05, "loss": 0.0, "step": 55475 }, { "epoch": 491.1504424778761, "grad_norm": 2.246472604383598e-06, "learning_rate": 1.6777763819095476e-05, "loss": 0.0, "step": 55500 }, { "epoch": 491.37168141592923, "grad_norm": 2.3897575829323614e-06, "learning_rate": 1.6768341708542714e-05, "loss": 0.0, "step": 55525 }, { "epoch": 491.5929203539823, "grad_norm": 2.2839994926471263e-06, "learning_rate": 1.6758919597989948e-05, "loss": 0.0, "step": 55550 }, { "epoch": 491.8141592920354, "grad_norm": 2.175724148401059e-06, "learning_rate": 1.6749497487437186e-05, "loss": 0.0, "step": 55575 }, { "epoch": 492.0353982300885, "grad_norm": 2.6180330223724013e-06, "learning_rate": 1.674007537688442e-05, "loss": 0.0, "step": 55600 }, { "epoch": 492.2566371681416, "grad_norm": 2.1969867702864576e-06, "learning_rate": 1.6730653266331655e-05, "loss": 0.0, "step": 55625 }, { "epoch": 492.4778761061947, "grad_norm": 2.5714889488881454e-06, "learning_rate": 1.6721231155778893e-05, "loss": 0.0, "step": 55650 }, { "epoch": 492.6991150442478, "grad_norm": 2.2948302103031892e-06, "learning_rate": 1.6711809045226128e-05, "loss": 0.0, "step": 55675 }, { "epoch": 492.92035398230087, "grad_norm": 2.2227434328669915e-06, "learning_rate": 1.6702386934673366e-05, "loss": 0.0, "step": 55700 }, { "epoch": 493.14159292035396, "grad_norm": 2.325073410247569e-06, "learning_rate": 1.66929648241206e-05, "loss": 0.0, "step": 55725 }, { "epoch": 493.3628318584071, "grad_norm": 2.211025503129349e-06, "learning_rate": 1.6683542713567838e-05, "loss": 0.0, "step": 55750 }, { "epoch": 493.5840707964602, "grad_norm": 2.468476168360212e-06, "learning_rate": 1.6674120603015076e-05, "loss": 0.0, "step": 55775 }, { "epoch": 493.8053097345133, "grad_norm": 2.3331085685640574e-06, "learning_rate": 1.666469849246231e-05, "loss": 0.0, "step": 55800 }, { "epoch": 494.0265486725664, "grad_norm": 2.1859380012756446e-06, "learning_rate": 1.6655276381909545e-05, "loss": 0.0, "step": 55825 }, { "epoch": 494.24778761061947, "grad_norm": 2.1724113139498513e-06, "learning_rate": 1.6645854271356783e-05, "loss": 0.0, "step": 55850 }, { "epoch": 494.46902654867256, "grad_norm": 2.2887822979100747e-06, "learning_rate": 1.6636432160804017e-05, "loss": 0.0, "step": 55875 }, { "epoch": 494.69026548672565, "grad_norm": 2.304637973793433e-06, "learning_rate": 1.6627010050251255e-05, "loss": 0.0, "step": 55900 }, { "epoch": 494.91150442477874, "grad_norm": 2.286943981744116e-06, "learning_rate": 1.661758793969849e-05, "loss": 0.0, "step": 55925 }, { "epoch": 495.13274336283183, "grad_norm": 2.531564177843393e-06, "learning_rate": 1.6608165829145728e-05, "loss": 0.0, "step": 55950 }, { "epoch": 495.353982300885, "grad_norm": 2.2124722818261944e-06, "learning_rate": 1.6598743718592962e-05, "loss": 0.0, "step": 55975 }, { "epoch": 495.57522123893807, "grad_norm": 2.3985323878150666e-06, "learning_rate": 1.65893216080402e-05, "loss": 0.0, "step": 56000 }, { "epoch": 495.79646017699116, "grad_norm": 2.2804726995673263e-06, "learning_rate": 1.6579899497487435e-05, "loss": 0.0, "step": 56025 }, { "epoch": 496.01769911504425, "grad_norm": 2.1945813841739437e-06, "learning_rate": 1.6570477386934673e-05, "loss": 0.0, "step": 56050 }, { "epoch": 496.23893805309734, "grad_norm": 2.2473509488918353e-06, "learning_rate": 1.6561055276381907e-05, "loss": 0.0, "step": 56075 }, { "epoch": 496.46017699115043, "grad_norm": 2.2894701032782905e-06, "learning_rate": 1.6551633165829145e-05, "loss": 0.0, "step": 56100 }, { "epoch": 496.6814159292035, "grad_norm": 2.148014800695819e-06, "learning_rate": 1.654221105527638e-05, "loss": 0.0, "step": 56125 }, { "epoch": 496.9026548672566, "grad_norm": 2.564495616752538e-06, "learning_rate": 1.6532788944723618e-05, "loss": 0.0, "step": 56150 }, { "epoch": 497.12389380530976, "grad_norm": 2.2099100078776246e-06, "learning_rate": 1.6523366834170852e-05, "loss": 0.0, "step": 56175 }, { "epoch": 497.34513274336285, "grad_norm": 2.3972922917891992e-06, "learning_rate": 1.651394472361809e-05, "loss": 0.0, "step": 56200 }, { "epoch": 497.56637168141594, "grad_norm": 2.457747086737072e-06, "learning_rate": 1.6504522613065325e-05, "loss": 0.0, "step": 56225 }, { "epoch": 497.78761061946904, "grad_norm": 2.2426966097555123e-06, "learning_rate": 1.6495100502512563e-05, "loss": 0.0, "step": 56250 }, { "epoch": 498.0088495575221, "grad_norm": 2.2918284230399877e-06, "learning_rate": 1.6485678391959797e-05, "loss": 0.0, "step": 56275 }, { "epoch": 498.2300884955752, "grad_norm": 2.17585784412222e-06, "learning_rate": 1.6476256281407032e-05, "loss": 0.0, "step": 56300 }, { "epoch": 498.4513274336283, "grad_norm": 2.161479415008216e-06, "learning_rate": 1.646683417085427e-05, "loss": 0.0, "step": 56325 }, { "epoch": 498.6725663716814, "grad_norm": 2.421582848910475e-06, "learning_rate": 1.6457412060301504e-05, "loss": 0.0, "step": 56350 }, { "epoch": 498.8938053097345, "grad_norm": 2.4185080746974563e-06, "learning_rate": 1.6447989949748742e-05, "loss": 0.0, "step": 56375 }, { "epoch": 499.11504424778764, "grad_norm": 2.1616513095068512e-06, "learning_rate": 1.643856783919598e-05, "loss": 0.0, "step": 56400 }, { "epoch": 499.3362831858407, "grad_norm": 2.2416020328819286e-06, "learning_rate": 1.6429145728643215e-05, "loss": 0.0, "step": 56425 }, { "epoch": 499.5575221238938, "grad_norm": 2.435425358271459e-06, "learning_rate": 1.6419723618090453e-05, "loss": 0.0, "step": 56450 }, { "epoch": 499.7787610619469, "grad_norm": 2.14709712054173e-06, "learning_rate": 1.6410301507537687e-05, "loss": 0.0, "step": 56475 }, { "epoch": 500.0, "grad_norm": 3.813337798419525e-06, "learning_rate": 1.640087939698492e-05, "loss": 0.0, "step": 56500 }, { "epoch": 500.2212389380531, "grad_norm": 2.2054202872823225e-06, "learning_rate": 1.639145728643216e-05, "loss": 0.0, "step": 56525 }, { "epoch": 500.4424778761062, "grad_norm": 2.623778300403501e-06, "learning_rate": 1.6382035175879394e-05, "loss": 0.0, "step": 56550 }, { "epoch": 500.6637168141593, "grad_norm": 2.288427367602708e-06, "learning_rate": 1.6372613065326632e-05, "loss": 0.0, "step": 56575 }, { "epoch": 500.88495575221236, "grad_norm": 2.2385988813766744e-06, "learning_rate": 1.636319095477387e-05, "loss": 0.0, "step": 56600 }, { "epoch": 501.1061946902655, "grad_norm": 2.1202388325036736e-06, "learning_rate": 1.6353768844221104e-05, "loss": 0.0, "step": 56625 }, { "epoch": 501.3274336283186, "grad_norm": 2.1544938135775737e-06, "learning_rate": 1.634434673366834e-05, "loss": 0.0, "step": 56650 }, { "epoch": 501.5486725663717, "grad_norm": 2.269000560772838e-06, "learning_rate": 1.6334924623115577e-05, "loss": 0.0, "step": 56675 }, { "epoch": 501.7699115044248, "grad_norm": 2.2459566935140174e-06, "learning_rate": 1.632550251256281e-05, "loss": 0.0, "step": 56700 }, { "epoch": 501.9911504424779, "grad_norm": 2.366734861425357e-06, "learning_rate": 1.631608040201005e-05, "loss": 0.0, "step": 56725 }, { "epoch": 502.21238938053096, "grad_norm": 2.2491008166980464e-06, "learning_rate": 1.6306658291457284e-05, "loss": 0.0, "step": 56750 }, { "epoch": 502.43362831858406, "grad_norm": 2.2520573565998347e-06, "learning_rate": 1.629723618090452e-05, "loss": 0.0, "step": 56775 }, { "epoch": 502.65486725663715, "grad_norm": 2.18295781451161e-06, "learning_rate": 1.628781407035176e-05, "loss": 0.0, "step": 56800 }, { "epoch": 502.87610619469024, "grad_norm": 2.468743559802533e-06, "learning_rate": 1.6278391959798994e-05, "loss": 0.0, "step": 56825 }, { "epoch": 503.0973451327434, "grad_norm": 2.408407226539566e-06, "learning_rate": 1.626896984924623e-05, "loss": 0.0, "step": 56850 }, { "epoch": 503.3185840707965, "grad_norm": 2.6439038265380077e-06, "learning_rate": 1.6259547738693467e-05, "loss": 0.0, "step": 56875 }, { "epoch": 503.53982300884957, "grad_norm": 2.210963430115953e-06, "learning_rate": 1.62501256281407e-05, "loss": 0.0, "step": 56900 }, { "epoch": 503.76106194690266, "grad_norm": 2.18524814954435e-06, "learning_rate": 1.624070351758794e-05, "loss": 0.0, "step": 56925 }, { "epoch": 503.98230088495575, "grad_norm": 2.3124639483285137e-06, "learning_rate": 1.6231281407035174e-05, "loss": 0.0, "step": 56950 }, { "epoch": 504.20353982300884, "grad_norm": 2.193597538280301e-06, "learning_rate": 1.622185929648241e-05, "loss": 0.0, "step": 56975 }, { "epoch": 504.42477876106193, "grad_norm": 2.225488287876942e-06, "learning_rate": 1.6212437185929646e-05, "loss": 0.0, "step": 57000 }, { "epoch": 504.646017699115, "grad_norm": 2.271610583193251e-06, "learning_rate": 1.6203015075376884e-05, "loss": 0.0, "step": 57025 }, { "epoch": 504.86725663716817, "grad_norm": 2.379915713390801e-06, "learning_rate": 1.619359296482412e-05, "loss": 0.0, "step": 57050 }, { "epoch": 505.08849557522126, "grad_norm": 2.09743461709877e-06, "learning_rate": 1.6184547738693465e-05, "loss": 0.0, "step": 57075 }, { "epoch": 505.30973451327435, "grad_norm": 2.8712115636153612e-06, "learning_rate": 1.6175125628140703e-05, "loss": 0.0, "step": 57100 }, { "epoch": 505.53097345132744, "grad_norm": 2.462386419210816e-06, "learning_rate": 1.6165703517587937e-05, "loss": 0.0, "step": 57125 }, { "epoch": 505.75221238938053, "grad_norm": 2.167316552004195e-06, "learning_rate": 1.6156281407035175e-05, "loss": 0.0, "step": 57150 }, { "epoch": 505.9734513274336, "grad_norm": 2.2049068775231717e-06, "learning_rate": 1.614685929648241e-05, "loss": 0.0, "step": 57175 }, { "epoch": 506.1946902654867, "grad_norm": 2.2350263861881103e-06, "learning_rate": 1.6137437185929648e-05, "loss": 0.0, "step": 57200 }, { "epoch": 506.4159292035398, "grad_norm": 2.2310214262688532e-06, "learning_rate": 1.6128015075376882e-05, "loss": 0.0, "step": 57225 }, { "epoch": 506.6371681415929, "grad_norm": 2.4185321763070533e-06, "learning_rate": 1.611859296482412e-05, "loss": 0.0, "step": 57250 }, { "epoch": 506.85840707964604, "grad_norm": 2.383060291322181e-06, "learning_rate": 1.6109170854271355e-05, "loss": 0.0, "step": 57275 }, { "epoch": 507.07964601769913, "grad_norm": 2.324021806998644e-06, "learning_rate": 1.6099748743718593e-05, "loss": 0.0, "step": 57300 }, { "epoch": 507.3008849557522, "grad_norm": 2.368271907471353e-06, "learning_rate": 1.6090326633165827e-05, "loss": 0.0, "step": 57325 }, { "epoch": 507.5221238938053, "grad_norm": 2.3462873741664225e-06, "learning_rate": 1.6080904522613062e-05, "loss": 0.0, "step": 57350 }, { "epoch": 507.7433628318584, "grad_norm": 2.2652407096757088e-06, "learning_rate": 1.60714824120603e-05, "loss": 0.0, "step": 57375 }, { "epoch": 507.9646017699115, "grad_norm": 2.4624571324238786e-06, "learning_rate": 1.6062060301507538e-05, "loss": 0.0, "step": 57400 }, { "epoch": 508.1858407079646, "grad_norm": 2.4905627924454166e-06, "learning_rate": 1.6052638190954772e-05, "loss": 0.0, "step": 57425 }, { "epoch": 508.4070796460177, "grad_norm": 2.4047565148066496e-06, "learning_rate": 1.604321608040201e-05, "loss": 0.0, "step": 57450 }, { "epoch": 508.62831858407077, "grad_norm": 2.4676767225173535e-06, "learning_rate": 1.6033793969849245e-05, "loss": 0.0, "step": 57475 }, { "epoch": 508.8495575221239, "grad_norm": 2.1416926756501198e-06, "learning_rate": 1.602437185929648e-05, "loss": 0.0, "step": 57500 }, { "epoch": 509.070796460177, "grad_norm": 2.3627662812941708e-06, "learning_rate": 1.6014949748743717e-05, "loss": 0.0, "step": 57525 }, { "epoch": 509.2920353982301, "grad_norm": 2.2780202471039956e-06, "learning_rate": 1.6005527638190952e-05, "loss": 0.0, "step": 57550 }, { "epoch": 509.5132743362832, "grad_norm": 2.1476726033142768e-06, "learning_rate": 1.599610552763819e-05, "loss": 0.0, "step": 57575 }, { "epoch": 509.7345132743363, "grad_norm": 2.186010988225462e-06, "learning_rate": 1.5986683417085428e-05, "loss": 0.0, "step": 57600 }, { "epoch": 509.95575221238937, "grad_norm": 2.1599473711830797e-06, "learning_rate": 1.5977261306532662e-05, "loss": 0.0, "step": 57625 }, { "epoch": 510.17699115044246, "grad_norm": 2.400791345280595e-06, "learning_rate": 1.59678391959799e-05, "loss": 0.0, "step": 57650 }, { "epoch": 510.39823008849555, "grad_norm": 2.2050048755772877e-06, "learning_rate": 1.5958417085427135e-05, "loss": 0.0, "step": 57675 }, { "epoch": 510.6194690265487, "grad_norm": 2.3849136141507188e-06, "learning_rate": 1.594899497487437e-05, "loss": 0.0, "step": 57700 }, { "epoch": 510.8407079646018, "grad_norm": 2.3569550648971926e-06, "learning_rate": 1.5939572864321607e-05, "loss": 0.0, "step": 57725 }, { "epoch": 511.0619469026549, "grad_norm": 2.2415824787458405e-06, "learning_rate": 1.593015075376884e-05, "loss": 0.0, "step": 57750 }, { "epoch": 511.283185840708, "grad_norm": 2.2757963051844854e-06, "learning_rate": 1.592072864321608e-05, "loss": 0.0, "step": 57775 }, { "epoch": 511.50442477876106, "grad_norm": 2.119940518241492e-06, "learning_rate": 1.5911306532663314e-05, "loss": 0.0, "step": 57800 }, { "epoch": 511.72566371681415, "grad_norm": 2.2797992187406635e-06, "learning_rate": 1.5901884422110552e-05, "loss": 0.0, "step": 57825 }, { "epoch": 511.94690265486724, "grad_norm": 2.1259679670038167e-06, "learning_rate": 1.5892462311557787e-05, "loss": 0.0, "step": 57850 }, { "epoch": 512.1681415929204, "grad_norm": 2.3415022951667197e-06, "learning_rate": 1.5883040201005024e-05, "loss": 0.0, "step": 57875 }, { "epoch": 512.3893805309734, "grad_norm": 2.384511617492535e-06, "learning_rate": 1.587361809045226e-05, "loss": 0.0, "step": 57900 }, { "epoch": 512.6106194690266, "grad_norm": 2.256380412291037e-06, "learning_rate": 1.5864195979899497e-05, "loss": 0.0, "step": 57925 }, { "epoch": 512.8318584070796, "grad_norm": 2.203285021096235e-06, "learning_rate": 1.585477386934673e-05, "loss": 0.0, "step": 57950 }, { "epoch": 513.0530973451328, "grad_norm": 2.3515131033491343e-06, "learning_rate": 1.584535175879397e-05, "loss": 0.0, "step": 57975 }, { "epoch": 513.2743362831858, "grad_norm": 2.1387556898844196e-06, "learning_rate": 1.5835929648241204e-05, "loss": 0.0, "step": 58000 }, { "epoch": 513.4955752212389, "grad_norm": 2.266461251565488e-06, "learning_rate": 1.5826507537688442e-05, "loss": 0.0, "step": 58025 }, { "epoch": 513.7168141592921, "grad_norm": 2.2025176349416142e-06, "learning_rate": 1.5817085427135676e-05, "loss": 0.0, "step": 58050 }, { "epoch": 513.9380530973451, "grad_norm": 2.2797883048042422e-06, "learning_rate": 1.5807663316582914e-05, "loss": 0.0, "step": 58075 }, { "epoch": 514.1592920353983, "grad_norm": 2.1867679151910124e-06, "learning_rate": 1.579824120603015e-05, "loss": 0.0, "step": 58100 }, { "epoch": 514.3805309734513, "grad_norm": 2.2326719317788957e-06, "learning_rate": 1.5788819095477387e-05, "loss": 0.0, "step": 58125 }, { "epoch": 514.6017699115044, "grad_norm": 2.189374754379969e-06, "learning_rate": 1.577939698492462e-05, "loss": 0.0, "step": 58150 }, { "epoch": 514.8230088495575, "grad_norm": 2.527415745134931e-06, "learning_rate": 1.5769974874371856e-05, "loss": 0.0, "step": 58175 }, { "epoch": 515.0442477876106, "grad_norm": 2.103259021168924e-06, "learning_rate": 1.5760552763819094e-05, "loss": 0.0, "step": 58200 }, { "epoch": 515.2654867256637, "grad_norm": 2.143930032616481e-06, "learning_rate": 1.5751130653266332e-05, "loss": 0.0, "step": 58225 }, { "epoch": 515.4867256637168, "grad_norm": 2.148665316781262e-06, "learning_rate": 1.5741708542713566e-05, "loss": 0.0, "step": 58250 }, { "epoch": 515.70796460177, "grad_norm": 2.193175532738678e-06, "learning_rate": 1.5732286432160804e-05, "loss": 0.0, "step": 58275 }, { "epoch": 515.929203539823, "grad_norm": 2.19213006857899e-06, "learning_rate": 1.572286432160804e-05, "loss": 0.0, "step": 58300 }, { "epoch": 516.1504424778761, "grad_norm": 2.039242417595233e-06, "learning_rate": 1.5713442211055277e-05, "loss": 0.0, "step": 58325 }, { "epoch": 516.3716814159292, "grad_norm": 2.0806487555091735e-06, "learning_rate": 1.570402010050251e-05, "loss": 0.0, "step": 58350 }, { "epoch": 516.5929203539823, "grad_norm": 2.325680725334678e-06, "learning_rate": 1.5694597989949746e-05, "loss": 0.0, "step": 58375 }, { "epoch": 516.8141592920354, "grad_norm": 2.5328290575998835e-06, "learning_rate": 1.5685175879396984e-05, "loss": 0.0, "step": 58400 }, { "epoch": 517.0353982300885, "grad_norm": 2.160409167117905e-06, "learning_rate": 1.5675753768844218e-05, "loss": 0.0, "step": 58425 }, { "epoch": 517.2566371681415, "grad_norm": 2.1411708530649776e-06, "learning_rate": 1.5666331658291456e-05, "loss": 0.0, "step": 58450 }, { "epoch": 517.4778761061947, "grad_norm": 2.3736286038911203e-06, "learning_rate": 1.5656909547738694e-05, "loss": 0.0, "step": 58475 }, { "epoch": 517.6991150442478, "grad_norm": 2.218063173131668e-06, "learning_rate": 1.564748743718593e-05, "loss": 0.0, "step": 58500 }, { "epoch": 517.9203539823009, "grad_norm": 2.130582515746937e-06, "learning_rate": 1.5638065326633163e-05, "loss": 0.0, "step": 58525 }, { "epoch": 518.141592920354, "grad_norm": 2.1940231818007305e-06, "learning_rate": 1.56286432160804e-05, "loss": 0.0, "step": 58550 }, { "epoch": 518.362831858407, "grad_norm": 2.4122100512613542e-06, "learning_rate": 1.5619221105527636e-05, "loss": 0.0, "step": 58575 }, { "epoch": 518.5840707964602, "grad_norm": 2.4846845008141827e-06, "learning_rate": 1.5609798994974873e-05, "loss": 0.0, "step": 58600 }, { "epoch": 518.8053097345132, "grad_norm": 2.2036381324141985e-06, "learning_rate": 1.5600376884422108e-05, "loss": 0.0, "step": 58625 }, { "epoch": 519.0265486725664, "grad_norm": 2.2525050553667825e-06, "learning_rate": 1.5590954773869346e-05, "loss": 0.0, "step": 58650 }, { "epoch": 519.2477876106195, "grad_norm": 2.0942813989677234e-06, "learning_rate": 1.5581532663316584e-05, "loss": 0.0, "step": 58675 }, { "epoch": 519.4690265486726, "grad_norm": 2.2243748389882967e-06, "learning_rate": 1.557211055276382e-05, "loss": 0.0, "step": 58700 }, { "epoch": 519.6902654867257, "grad_norm": 2.1014434423705097e-06, "learning_rate": 1.5562688442211053e-05, "loss": 0.0, "step": 58725 }, { "epoch": 519.9115044247787, "grad_norm": 2.2393073777493555e-06, "learning_rate": 1.555326633165829e-05, "loss": 0.0, "step": 58750 }, { "epoch": 520.1327433628319, "grad_norm": 2.0777383724635e-06, "learning_rate": 1.5543844221105525e-05, "loss": 0.0, "step": 58775 }, { "epoch": 520.3539823008849, "grad_norm": 2.6034640541183762e-06, "learning_rate": 1.5534422110552763e-05, "loss": 0.0, "step": 58800 }, { "epoch": 520.5752212389381, "grad_norm": 2.0831812435062602e-06, "learning_rate": 1.5524999999999998e-05, "loss": 0.0, "step": 58825 }, { "epoch": 520.7964601769911, "grad_norm": 2.136677721864544e-06, "learning_rate": 1.5515577889447232e-05, "loss": 0.0, "step": 58850 }, { "epoch": 521.0176991150443, "grad_norm": 2.1674488834833028e-06, "learning_rate": 1.550615577889447e-05, "loss": 0.0, "step": 58875 }, { "epoch": 521.2389380530974, "grad_norm": 2.1403648133855313e-06, "learning_rate": 1.5496733668341708e-05, "loss": 0.0, "step": 58900 }, { "epoch": 521.4601769911504, "grad_norm": 2.262939460706548e-06, "learning_rate": 1.5487311557788943e-05, "loss": 0.0, "step": 58925 }, { "epoch": 521.6814159292036, "grad_norm": 2.329650897081592e-06, "learning_rate": 1.547788944723618e-05, "loss": 0.0, "step": 58950 }, { "epoch": 521.9026548672566, "grad_norm": 2.1805572032462806e-06, "learning_rate": 1.5468467336683415e-05, "loss": 0.0, "step": 58975 }, { "epoch": 522.1238938053098, "grad_norm": 2.138223862857558e-06, "learning_rate": 1.545904522613065e-05, "loss": 0.0, "step": 59000 }, { "epoch": 522.3451327433628, "grad_norm": 2.077035105685354e-06, "learning_rate": 1.5449623115577888e-05, "loss": 0.0, "step": 59025 }, { "epoch": 522.566371681416, "grad_norm": 2.119351620422094e-06, "learning_rate": 1.5440201005025122e-05, "loss": 0.0, "step": 59050 }, { "epoch": 522.787610619469, "grad_norm": 2.1277608084346866e-06, "learning_rate": 1.543077889447236e-05, "loss": 0.0, "step": 59075 }, { "epoch": 523.0088495575221, "grad_norm": 2.2435103801399237e-06, "learning_rate": 1.5421733668341706e-05, "loss": 0.0, "step": 59100 }, { "epoch": 523.2300884955753, "grad_norm": 2.109457454935182e-06, "learning_rate": 1.5412311557788944e-05, "loss": 0.0, "step": 59125 }, { "epoch": 523.4513274336283, "grad_norm": 2.1871469471079763e-06, "learning_rate": 1.540288944723618e-05, "loss": 0.0, "step": 59150 }, { "epoch": 523.6725663716815, "grad_norm": 2.164952775274287e-06, "learning_rate": 1.5393467336683417e-05, "loss": 0.0, "step": 59175 }, { "epoch": 523.8938053097345, "grad_norm": 2.2691756385029294e-06, "learning_rate": 1.538404522613065e-05, "loss": 0.0, "step": 59200 }, { "epoch": 524.1150442477876, "grad_norm": 2.530915253373678e-06, "learning_rate": 1.5374623115577886e-05, "loss": 0.0, "step": 59225 }, { "epoch": 524.3362831858407, "grad_norm": 2.0736520127684344e-06, "learning_rate": 1.5365201005025124e-05, "loss": 0.0, "step": 59250 }, { "epoch": 524.5575221238938, "grad_norm": 2.1487687718035886e-06, "learning_rate": 1.5355778894472362e-05, "loss": 0.0, "step": 59275 }, { "epoch": 524.7787610619469, "grad_norm": 2.1183616354392143e-06, "learning_rate": 1.5346356783919596e-05, "loss": 0.0, "step": 59300 }, { "epoch": 525.0, "grad_norm": 3.7341492316045333e-06, "learning_rate": 1.5336934673366834e-05, "loss": 0.0, "step": 59325 }, { "epoch": 525.2212389380531, "grad_norm": 2.2728333988197846e-06, "learning_rate": 1.532751256281407e-05, "loss": 0.0, "step": 59350 }, { "epoch": 525.4424778761062, "grad_norm": 2.1280679902702104e-06, "learning_rate": 1.5318090452261303e-05, "loss": 0.0, "step": 59375 }, { "epoch": 525.6637168141593, "grad_norm": 2.2984590941632632e-06, "learning_rate": 1.530866834170854e-05, "loss": 0.0, "step": 59400 }, { "epoch": 525.8849557522124, "grad_norm": 2.1854207261640113e-06, "learning_rate": 1.5299246231155776e-05, "loss": 0.0, "step": 59425 }, { "epoch": 526.1061946902655, "grad_norm": 2.3425454855896533e-06, "learning_rate": 1.5289824120603014e-05, "loss": 0.0, "step": 59450 }, { "epoch": 526.3274336283185, "grad_norm": 2.1124915292602964e-06, "learning_rate": 1.528040201005025e-05, "loss": 0.0, "step": 59475 }, { "epoch": 526.5486725663717, "grad_norm": 2.2464760149887297e-06, "learning_rate": 1.5270979899497486e-05, "loss": 0.0, "step": 59500 }, { "epoch": 526.7699115044247, "grad_norm": 2.171899268432753e-06, "learning_rate": 1.5261557788944724e-05, "loss": 0.0, "step": 59525 }, { "epoch": 526.9911504424779, "grad_norm": 2.255975232401397e-06, "learning_rate": 1.5252135678391959e-05, "loss": 0.0, "step": 59550 }, { "epoch": 527.212389380531, "grad_norm": 2.0363463590911124e-06, "learning_rate": 1.5242713567839195e-05, "loss": 0.0, "step": 59575 }, { "epoch": 527.433628318584, "grad_norm": 2.167700813515694e-06, "learning_rate": 1.5233291457286431e-05, "loss": 0.0, "step": 59600 }, { "epoch": 527.6548672566372, "grad_norm": 2.0624515855161007e-06, "learning_rate": 1.5223869346733666e-05, "loss": 0.0, "step": 59625 }, { "epoch": 527.8761061946902, "grad_norm": 2.2426829673349857e-06, "learning_rate": 1.5214447236180902e-05, "loss": 0.0, "step": 59650 }, { "epoch": 528.0973451327434, "grad_norm": 2.1550199562625494e-06, "learning_rate": 1.520502512562814e-05, "loss": 0.0, "step": 59675 }, { "epoch": 528.3185840707964, "grad_norm": 2.2220683604246005e-06, "learning_rate": 1.5195603015075376e-05, "loss": 0.0, "step": 59700 }, { "epoch": 528.5398230088496, "grad_norm": 2.1761745756521123e-06, "learning_rate": 1.5186180904522612e-05, "loss": 0.0, "step": 59725 }, { "epoch": 528.7610619469026, "grad_norm": 2.047935140581103e-06, "learning_rate": 1.5176758793969849e-05, "loss": 0.0, "step": 59750 }, { "epoch": 528.9823008849557, "grad_norm": 2.144729705833015e-06, "learning_rate": 1.5167336683417085e-05, "loss": 0.0, "step": 59775 }, { "epoch": 529.2035398230089, "grad_norm": 2.17788169720734e-06, "learning_rate": 1.515791457286432e-05, "loss": 0.0, "step": 59800 }, { "epoch": 529.4247787610619, "grad_norm": 2.1259872937662294e-06, "learning_rate": 1.5148492462311556e-05, "loss": 0.0, "step": 59825 }, { "epoch": 529.6460176991151, "grad_norm": 2.2486324269266333e-06, "learning_rate": 1.5139070351758792e-05, "loss": 0.0, "step": 59850 }, { "epoch": 529.8672566371681, "grad_norm": 2.5968774934881367e-06, "learning_rate": 1.5129648241206028e-05, "loss": 0.0, "step": 59875 }, { "epoch": 530.0884955752213, "grad_norm": 2.685433173610363e-06, "learning_rate": 1.5120226130653266e-05, "loss": 0.0, "step": 59900 }, { "epoch": 530.3097345132743, "grad_norm": 2.180721139666275e-06, "learning_rate": 1.5110804020100502e-05, "loss": 0.0, "step": 59925 }, { "epoch": 530.5309734513274, "grad_norm": 2.1548758013523184e-06, "learning_rate": 1.5101381909547738e-05, "loss": 0.0, "step": 59950 }, { "epoch": 530.7522123893805, "grad_norm": 2.1548214590438874e-06, "learning_rate": 1.5091959798994973e-05, "loss": 0.0, "step": 59975 }, { "epoch": 530.9734513274336, "grad_norm": 2.1280141027091304e-06, "learning_rate": 1.5082537688442209e-05, "loss": 0.0, "step": 60000 }, { "epoch": 530.9734513274336, "eval_loss": 0.8207878470420837, "eval_runtime": 67.7959, "eval_samples_per_second": 212.152, "eval_steps_per_second": 1.667, "eval_wer": 20.84135324004231, "step": 60000 }, { "epoch": 530.9734513274336, "step": 60000, "total_flos": 3.760322979667968e+20, "train_loss": 0.007700834417891049, "train_runtime": 44653.923, "train_samples_per_second": 573.298, "train_steps_per_second": 2.239 } ], "logging_steps": 25, "max_steps": 100000, "num_input_tokens_seen": 0, "num_train_epochs": 885, "save_steps": 5000, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.760322979667968e+20, "train_batch_size": 256, "trial_name": null, "trial_params": null }