{ "best_global_step": 7500, "best_metric": 0.1822061687707901, "best_model_checkpoint": "./Wav2vec2-afr/checkpoint-7500", "epoch": 6.031374915151972, "eval_steps": 500, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.015084093823063579, "grad_norm": 27.733379364013672, "learning_rate": 1.4999999999999999e-05, "loss": 20.2292, "step": 25 }, { "epoch": 0.030168187646127158, "grad_norm": 19.019153594970703, "learning_rate": 2.9999999999999997e-05, "loss": 11.6267, "step": 50 }, { "epoch": 0.04525228146919074, "grad_norm": 5.631908416748047, "learning_rate": 4.4999999999999996e-05, "loss": 4.8714, "step": 75 }, { "epoch": 0.060336375292254316, "grad_norm": 2.689542293548584, "learning_rate": 5.9999999999999995e-05, "loss": 3.5888, "step": 100 }, { "epoch": 0.0754204691153179, "grad_norm": 1.0974892377853394, "learning_rate": 7.5e-05, "loss": 3.2364, "step": 125 }, { "epoch": 0.09050456293838148, "grad_norm": 0.6232272386550903, "learning_rate": 8.999999999999999e-05, "loss": 3.0531, "step": 150 }, { "epoch": 0.10558865676144506, "grad_norm": 0.3990340828895569, "learning_rate": 0.00010499999999999999, "loss": 3.0197, "step": 175 }, { "epoch": 0.12067275058450863, "grad_norm": 0.6495853662490845, "learning_rate": 0.00011999999999999999, "loss": 2.9908, "step": 200 }, { "epoch": 0.1357568444075722, "grad_norm": 1.156081199645996, "learning_rate": 0.000135, "loss": 2.9629, "step": 225 }, { "epoch": 0.1508409382306358, "grad_norm": 1.0735247135162354, "learning_rate": 0.00015, "loss": 2.5531, "step": 250 }, { "epoch": 0.16592503205369938, "grad_norm": 0.6276996731758118, "learning_rate": 0.0001644, "loss": 1.7912, "step": 275 }, { "epoch": 0.18100912587676296, "grad_norm": 1.1443787813186646, "learning_rate": 0.00017939999999999997, "loss": 0.7469, "step": 300 }, { "epoch": 0.19609321969982654, "grad_norm": 0.47428029775619507, "learning_rate": 0.00019439999999999998, "loss": 0.7277, "step": 325 }, { "epoch": 0.21117731352289013, "grad_norm": 1.0143519639968872, "learning_rate": 0.00020939999999999997, "loss": 0.4411, "step": 350 }, { "epoch": 0.22626140734595368, "grad_norm": 0.36140990257263184, "learning_rate": 0.00022439999999999998, "loss": 0.4558, "step": 375 }, { "epoch": 0.24134550116901726, "grad_norm": 0.6368806958198547, "learning_rate": 0.0002394, "loss": 0.2993, "step": 400 }, { "epoch": 0.25642959499208084, "grad_norm": 0.5180590748786926, "learning_rate": 0.00025439999999999995, "loss": 0.4165, "step": 425 }, { "epoch": 0.2715136888151444, "grad_norm": 5.711215972900391, "learning_rate": 0.0002694, "loss": 0.2686, "step": 450 }, { "epoch": 0.286597782638208, "grad_norm": 1.2874069213867188, "learning_rate": 0.0002844, "loss": 0.4259, "step": 475 }, { "epoch": 0.3016818764612716, "grad_norm": 0.6762956380844116, "learning_rate": 0.00029939999999999996, "loss": 0.2638, "step": 500 }, { "epoch": 0.3016818764612716, "eval_loss": 0.32083866000175476, "eval_runtime": 754.2459, "eval_samples_per_second": 4.395, "eval_steps_per_second": 1.099, "eval_wer": 0.3275495330130109, "step": 500 }, { "epoch": 0.3167659702843352, "grad_norm": 0.3699875771999359, "learning_rate": 0.0002999272139102305, "loss": 0.4233, "step": 525 }, { "epoch": 0.33185006410739876, "grad_norm": 0.9410744905471802, "learning_rate": 0.00029985139506672056, "loss": 0.2435, "step": 550 }, { "epoch": 0.34693415793046234, "grad_norm": 0.7363893389701843, "learning_rate": 0.00029977557622321065, "loss": 0.4594, "step": 575 }, { "epoch": 0.3620182517535259, "grad_norm": 0.9007164239883423, "learning_rate": 0.00029969975737970074, "loss": 0.2262, "step": 600 }, { "epoch": 0.3771023455765895, "grad_norm": 0.4770212173461914, "learning_rate": 0.0002996239385361908, "loss": 0.4473, "step": 625 }, { "epoch": 0.3921864393996531, "grad_norm": 0.9380179643630981, "learning_rate": 0.00029954811969268096, "loss": 0.227, "step": 650 }, { "epoch": 0.40727053322271667, "grad_norm": 0.5483216643333435, "learning_rate": 0.000299472300849171, "loss": 0.3786, "step": 675 }, { "epoch": 0.42235462704578025, "grad_norm": 0.44440972805023193, "learning_rate": 0.0002993964820056611, "loss": 0.1966, "step": 700 }, { "epoch": 0.4374387208688438, "grad_norm": 0.6046245098114014, "learning_rate": 0.0002993206631621512, "loss": 0.3704, "step": 725 }, { "epoch": 0.45252281469190736, "grad_norm": 0.6455674767494202, "learning_rate": 0.0002992448443186413, "loss": 0.1906, "step": 750 }, { "epoch": 0.46760690851497094, "grad_norm": 0.35442134737968445, "learning_rate": 0.0002991690254751314, "loss": 0.3416, "step": 775 }, { "epoch": 0.4826910023380345, "grad_norm": 0.9801518321037292, "learning_rate": 0.0002990932066316215, "loss": 0.1892, "step": 800 }, { "epoch": 0.4977750961610981, "grad_norm": 0.4081840217113495, "learning_rate": 0.00029901738778811157, "loss": 0.3756, "step": 825 }, { "epoch": 0.5128591899841617, "grad_norm": 0.6348875164985657, "learning_rate": 0.00029894156894460166, "loss": 0.1788, "step": 850 }, { "epoch": 0.5279432838072253, "grad_norm": 0.2996980547904968, "learning_rate": 0.00029886575010109175, "loss": 0.3618, "step": 875 }, { "epoch": 0.5430273776302889, "grad_norm": 0.5273446440696716, "learning_rate": 0.00029878993125758183, "loss": 0.1717, "step": 900 }, { "epoch": 0.5581114714533525, "grad_norm": 0.7878848910331726, "learning_rate": 0.000298714112414072, "loss": 0.3516, "step": 925 }, { "epoch": 0.573195565276416, "grad_norm": 0.57522052526474, "learning_rate": 0.00029863829357056206, "loss": 0.1619, "step": 950 }, { "epoch": 0.5882796590994795, "grad_norm": 0.2759730815887451, "learning_rate": 0.00029856247472705215, "loss": 0.3349, "step": 975 }, { "epoch": 0.6033637529225432, "grad_norm": 0.8881000876426697, "learning_rate": 0.00029848665588354224, "loss": 0.1603, "step": 1000 }, { "epoch": 0.6033637529225432, "eval_loss": 0.24969196319580078, "eval_runtime": 757.505, "eval_samples_per_second": 4.376, "eval_steps_per_second": 1.094, "eval_wer": 0.2649138908384171, "step": 1000 }, { "epoch": 0.6184478467456067, "grad_norm": 0.6784160137176514, "learning_rate": 0.0002984108370400323, "loss": 0.3343, "step": 1025 }, { "epoch": 0.6335319405686703, "grad_norm": 1.6744931936264038, "learning_rate": 0.0002983350181965224, "loss": 0.1664, "step": 1050 }, { "epoch": 0.6486160343917339, "grad_norm": 0.35144105553627014, "learning_rate": 0.0002982591993530125, "loss": 0.3518, "step": 1075 }, { "epoch": 0.6637001282147975, "grad_norm": 0.7025427222251892, "learning_rate": 0.0002981833805095026, "loss": 0.1482, "step": 1100 }, { "epoch": 0.678784222037861, "grad_norm": 0.4477069675922394, "learning_rate": 0.0002981075616659927, "loss": 0.3463, "step": 1125 }, { "epoch": 0.6938683158609247, "grad_norm": 0.603905975818634, "learning_rate": 0.0002980317428224828, "loss": 0.1722, "step": 1150 }, { "epoch": 0.7089524096839882, "grad_norm": 0.2929205298423767, "learning_rate": 0.00029795592397897284, "loss": 0.3269, "step": 1175 }, { "epoch": 0.7240365035070518, "grad_norm": 0.6123032569885254, "learning_rate": 0.000297880105135463, "loss": 0.1573, "step": 1200 }, { "epoch": 0.7391205973301154, "grad_norm": 0.2819807529449463, "learning_rate": 0.00029780428629195307, "loss": 0.3203, "step": 1225 }, { "epoch": 0.754204691153179, "grad_norm": 0.5671049356460571, "learning_rate": 0.00029772846744844316, "loss": 0.1489, "step": 1250 }, { "epoch": 0.7692887849762425, "grad_norm": 0.7186427116394043, "learning_rate": 0.00029765264860493325, "loss": 0.3486, "step": 1275 }, { "epoch": 0.7843728787993062, "grad_norm": 0.5397343635559082, "learning_rate": 0.00029757682976142333, "loss": 0.1384, "step": 1300 }, { "epoch": 0.7994569726223697, "grad_norm": 0.3157232701778412, "learning_rate": 0.0002975010109179134, "loss": 0.3505, "step": 1325 }, { "epoch": 0.8145410664454333, "grad_norm": 0.714611828327179, "learning_rate": 0.00029742519207440356, "loss": 0.1502, "step": 1350 }, { "epoch": 0.8296251602684969, "grad_norm": 0.3562470078468323, "learning_rate": 0.0002973493732308936, "loss": 0.3845, "step": 1375 }, { "epoch": 0.8447092540915605, "grad_norm": 0.5777546167373657, "learning_rate": 0.00029727355438738374, "loss": 0.1541, "step": 1400 }, { "epoch": 0.859793347914624, "grad_norm": 0.49272066354751587, "learning_rate": 0.0002971977355438738, "loss": 0.3538, "step": 1425 }, { "epoch": 0.8748774417376876, "grad_norm": 0.8076097369194031, "learning_rate": 0.0002971219167003639, "loss": 0.1343, "step": 1450 }, { "epoch": 0.8899615355607512, "grad_norm": 0.43889355659484863, "learning_rate": 0.000297046097856854, "loss": 0.3096, "step": 1475 }, { "epoch": 0.9050456293838147, "grad_norm": 0.8419676423072815, "learning_rate": 0.0002969702790133441, "loss": 0.142, "step": 1500 }, { "epoch": 0.9050456293838147, "eval_loss": 0.21447643637657166, "eval_runtime": 715.0284, "eval_samples_per_second": 4.636, "eval_steps_per_second": 1.159, "eval_wer": 0.24875020245100685, "step": 1500 }, { "epoch": 0.9201297232068784, "grad_norm": 0.4135502278804779, "learning_rate": 0.00029689446016983417, "loss": 0.359, "step": 1525 }, { "epoch": 0.9352138170299419, "grad_norm": 0.5666655898094177, "learning_rate": 0.0002968186413263243, "loss": 0.1478, "step": 1550 }, { "epoch": 0.9502979108530055, "grad_norm": 0.30614542961120605, "learning_rate": 0.00029674282248281434, "loss": 0.3263, "step": 1575 }, { "epoch": 0.965382004676069, "grad_norm": 0.5047621130943298, "learning_rate": 0.0002966670036393045, "loss": 0.1369, "step": 1600 }, { "epoch": 0.9804660984991327, "grad_norm": 0.4826274514198303, "learning_rate": 0.00029659118479579457, "loss": 0.3709, "step": 1625 }, { "epoch": 0.9955501923221962, "grad_norm": 0.7504212260246277, "learning_rate": 0.00029651536595228466, "loss": 0.1389, "step": 1650 }, { "epoch": 1.0102571837996832, "grad_norm": 0.40745294094085693, "learning_rate": 0.00029643954710877475, "loss": 0.2744, "step": 1675 }, { "epoch": 1.0253412776227468, "grad_norm": 0.27026304602622986, "learning_rate": 0.00029636372826526483, "loss": 0.177, "step": 1700 }, { "epoch": 1.0404253714458105, "grad_norm": 0.4111769199371338, "learning_rate": 0.0002962879094217549, "loss": 0.2356, "step": 1725 }, { "epoch": 1.055509465268874, "grad_norm": 0.30931055545806885, "learning_rate": 0.000296212090578245, "loss": 0.1747, "step": 1750 }, { "epoch": 1.0705935590919375, "grad_norm": 0.3719274699687958, "learning_rate": 0.0002961362717347351, "loss": 0.245, "step": 1775 }, { "epoch": 1.0856776529150012, "grad_norm": 0.28636762499809265, "learning_rate": 0.0002960604528912252, "loss": 0.1781, "step": 1800 }, { "epoch": 1.1007617467380646, "grad_norm": 0.27731388807296753, "learning_rate": 0.0002959846340477153, "loss": 0.2435, "step": 1825 }, { "epoch": 1.1158458405611282, "grad_norm": 0.315758615732193, "learning_rate": 0.0002959088152042054, "loss": 0.1608, "step": 1850 }, { "epoch": 1.1309299343841919, "grad_norm": 0.7471879720687866, "learning_rate": 0.0002958329963606955, "loss": 0.2436, "step": 1875 }, { "epoch": 1.1460140282072555, "grad_norm": 0.3493207097053528, "learning_rate": 0.0002957571775171856, "loss": 0.165, "step": 1900 }, { "epoch": 1.1610981220303191, "grad_norm": 0.3202393651008606, "learning_rate": 0.00029568135867367567, "loss": 0.2448, "step": 1925 }, { "epoch": 1.1761822158533826, "grad_norm": 0.2840123176574707, "learning_rate": 0.00029560553983016576, "loss": 0.1583, "step": 1950 }, { "epoch": 1.1912663096764462, "grad_norm": 0.34918013215065, "learning_rate": 0.00029552972098665584, "loss": 0.246, "step": 1975 }, { "epoch": 1.2063504034995098, "grad_norm": 0.2470645010471344, "learning_rate": 0.00029545390214314593, "loss": 0.1645, "step": 2000 }, { "epoch": 1.2063504034995098, "eval_loss": 0.20230437815189362, "eval_runtime": 763.4911, "eval_samples_per_second": 4.342, "eval_steps_per_second": 1.086, "eval_wer": 0.22521189872050965, "step": 2000 }, { "epoch": 1.2214344973225733, "grad_norm": 0.5461482405662537, "learning_rate": 0.00029537808329963607, "loss": 0.2304, "step": 2025 }, { "epoch": 1.236518591145637, "grad_norm": 0.44666436314582825, "learning_rate": 0.00029530226445612616, "loss": 0.1632, "step": 2050 }, { "epoch": 1.2516026849687005, "grad_norm": 0.44556596875190735, "learning_rate": 0.0002952264456126162, "loss": 0.252, "step": 2075 }, { "epoch": 1.2666867787917642, "grad_norm": 0.44631072878837585, "learning_rate": 0.00029515062676910633, "loss": 0.1795, "step": 2100 }, { "epoch": 1.2817708726148276, "grad_norm": 0.2911534607410431, "learning_rate": 0.0002950748079255964, "loss": 0.2458, "step": 2125 }, { "epoch": 1.2968549664378912, "grad_norm": 0.34504878520965576, "learning_rate": 0.0002949989890820865, "loss": 0.2024, "step": 2150 }, { "epoch": 1.3119390602609549, "grad_norm": 0.295748770236969, "learning_rate": 0.000294926202992317, "loss": 0.6418, "step": 2175 }, { "epoch": 1.3270231540840185, "grad_norm": 0.2935297191143036, "learning_rate": 0.0002948503841488071, "loss": 0.1568, "step": 2200 }, { "epoch": 1.342107247907082, "grad_norm": 0.45815935730934143, "learning_rate": 0.0002947745653052972, "loss": 0.2491, "step": 2225 }, { "epoch": 1.3571913417301456, "grad_norm": 0.3507382273674011, "learning_rate": 0.00029469874646178727, "loss": 0.1683, "step": 2250 }, { "epoch": 1.3722754355532092, "grad_norm": 0.28953784704208374, "learning_rate": 0.00029462292761827736, "loss": 0.2649, "step": 2275 }, { "epoch": 1.3873595293762726, "grad_norm": 0.26776325702667236, "learning_rate": 0.00029454710877476744, "loss": 0.1511, "step": 2300 }, { "epoch": 1.4024436231993362, "grad_norm": 0.6518787741661072, "learning_rate": 0.0002944712899312576, "loss": 0.2438, "step": 2325 }, { "epoch": 1.4175277170223999, "grad_norm": 0.3653862476348877, "learning_rate": 0.0002943954710877476, "loss": 0.1677, "step": 2350 }, { "epoch": 1.4326118108454635, "grad_norm": 0.4537375569343567, "learning_rate": 0.00029431965224423776, "loss": 0.2619, "step": 2375 }, { "epoch": 1.4476959046685272, "grad_norm": 0.3677208721637726, "learning_rate": 0.00029424383340072785, "loss": 0.1564, "step": 2400 }, { "epoch": 1.4627799984915906, "grad_norm": 0.33267661929130554, "learning_rate": 0.00029416801455721793, "loss": 0.232, "step": 2425 }, { "epoch": 1.4778640923146542, "grad_norm": 0.32956621050834656, "learning_rate": 0.000294092195713708, "loss": 0.1841, "step": 2450 }, { "epoch": 1.4929481861377178, "grad_norm": 0.41168802976608276, "learning_rate": 0.0002940163768701981, "loss": 0.2432, "step": 2475 }, { "epoch": 1.5080322799607813, "grad_norm": 0.333852082490921, "learning_rate": 0.0002939405580266882, "loss": 0.1818, "step": 2500 }, { "epoch": 1.5080322799607813, "eval_loss": 0.19838476181030273, "eval_runtime": 747.6476, "eval_samples_per_second": 4.434, "eval_steps_per_second": 1.109, "eval_wer": 0.21765372779787293, "step": 2500 }, { "epoch": 1.523116373783845, "grad_norm": 0.35393306612968445, "learning_rate": 0.00029386473918317833, "loss": 0.2496, "step": 2525 }, { "epoch": 1.5382004676069085, "grad_norm": 0.23671747744083405, "learning_rate": 0.00029378892033966837, "loss": 0.1641, "step": 2550 }, { "epoch": 1.553284561429972, "grad_norm": 0.49785369634628296, "learning_rate": 0.0002937131014961585, "loss": 0.2304, "step": 2575 }, { "epoch": 1.5683686552530358, "grad_norm": 0.7474918365478516, "learning_rate": 0.0002936372826526486, "loss": 0.1914, "step": 2600 }, { "epoch": 1.5834527490760992, "grad_norm": 0.3910874128341675, "learning_rate": 0.0002935614638091387, "loss": 0.2516, "step": 2625 }, { "epoch": 1.5985368428991629, "grad_norm": 0.33777278661727905, "learning_rate": 0.00029348564496562877, "loss": 0.1778, "step": 2650 }, { "epoch": 1.6136209367222265, "grad_norm": 0.39339521527290344, "learning_rate": 0.00029340982612211886, "loss": 0.2651, "step": 2675 }, { "epoch": 1.62870503054529, "grad_norm": 0.33642980456352234, "learning_rate": 0.00029333400727860894, "loss": 0.1987, "step": 2700 }, { "epoch": 1.6437891243683536, "grad_norm": 0.4382663071155548, "learning_rate": 0.00029325818843509903, "loss": 0.3213, "step": 2725 }, { "epoch": 1.6588732181914172, "grad_norm": 0.29682841897010803, "learning_rate": 0.0002931823695915891, "loss": 0.1744, "step": 2750 }, { "epoch": 1.6739573120144806, "grad_norm": 0.5017745494842529, "learning_rate": 0.0002931065507480792, "loss": 0.2454, "step": 2775 }, { "epoch": 1.6890414058375445, "grad_norm": 0.31300872564315796, "learning_rate": 0.00029303073190456935, "loss": 0.1642, "step": 2800 }, { "epoch": 1.704125499660608, "grad_norm": 0.2737913429737091, "learning_rate": 0.0002929549130610594, "loss": 0.2304, "step": 2825 }, { "epoch": 1.7192095934836713, "grad_norm": 0.5299367904663086, "learning_rate": 0.0002928790942175495, "loss": 0.1446, "step": 2850 }, { "epoch": 1.7342936873067352, "grad_norm": 0.4113737642765045, "learning_rate": 0.0002928032753740396, "loss": 0.2751, "step": 2875 }, { "epoch": 1.7493777811297986, "grad_norm": 0.33324816823005676, "learning_rate": 0.0002927274565305297, "loss": 0.1635, "step": 2900 }, { "epoch": 1.7644618749528622, "grad_norm": 0.33196279406547546, "learning_rate": 0.0002926516376870198, "loss": 0.2426, "step": 2925 }, { "epoch": 1.7795459687759259, "grad_norm": 0.2307615876197815, "learning_rate": 0.00029257581884350987, "loss": 0.1548, "step": 2950 }, { "epoch": 1.7946300625989893, "grad_norm": 0.259033739566803, "learning_rate": 0.00029249999999999995, "loss": 0.2481, "step": 2975 }, { "epoch": 1.809714156422053, "grad_norm": 0.3014342784881592, "learning_rate": 0.0002924241811564901, "loss": 0.1827, "step": 3000 }, { "epoch": 1.809714156422053, "eval_loss": 0.1995258629322052, "eval_runtime": 720.0351, "eval_samples_per_second": 4.604, "eval_steps_per_second": 1.151, "eval_wer": 0.2203962641040868, "step": 3000 }, { "epoch": 1.8247982502451165, "grad_norm": 0.348734050989151, "learning_rate": 0.0002923483623129802, "loss": 0.2435, "step": 3025 }, { "epoch": 1.83988234406818, "grad_norm": 0.38749223947525024, "learning_rate": 0.00029227254346947027, "loss": 0.1839, "step": 3050 }, { "epoch": 1.8549664378912438, "grad_norm": 0.3979107141494751, "learning_rate": 0.00029219672462596036, "loss": 0.2518, "step": 3075 }, { "epoch": 1.8700505317143072, "grad_norm": 0.2685915231704712, "learning_rate": 0.00029212090578245044, "loss": 0.1711, "step": 3100 }, { "epoch": 1.8851346255373709, "grad_norm": 0.2596099078655243, "learning_rate": 0.00029204508693894053, "loss": 0.2576, "step": 3125 }, { "epoch": 1.9002187193604345, "grad_norm": 0.26872625946998596, "learning_rate": 0.0002919692680954306, "loss": 0.1484, "step": 3150 }, { "epoch": 1.915302813183498, "grad_norm": 0.32843342423439026, "learning_rate": 0.0002918934492519207, "loss": 0.2559, "step": 3175 }, { "epoch": 1.9303869070065616, "grad_norm": 0.26869305968284607, "learning_rate": 0.0002918176304084108, "loss": 0.1751, "step": 3200 }, { "epoch": 1.9454710008296252, "grad_norm": 0.2810923159122467, "learning_rate": 0.00029174181156490093, "loss": 0.2244, "step": 3225 }, { "epoch": 1.9605550946526886, "grad_norm": 0.3540550470352173, "learning_rate": 0.00029166599272139096, "loss": 0.1661, "step": 3250 }, { "epoch": 1.9756391884757525, "grad_norm": 0.23880811035633087, "learning_rate": 0.0002915901738778811, "loss": 0.2503, "step": 3275 }, { "epoch": 1.990723282298816, "grad_norm": 0.24448829889297485, "learning_rate": 0.0002915143550343712, "loss": 0.1501, "step": 3300 }, { "epoch": 2.005430273776303, "grad_norm": 0.931875467300415, "learning_rate": 0.0002914385361908613, "loss": 0.228, "step": 3325 }, { "epoch": 2.0205143675993664, "grad_norm": 0.22488653659820557, "learning_rate": 0.00029136271734735137, "loss": 0.2308, "step": 3350 }, { "epoch": 2.03559846142243, "grad_norm": 0.33369916677474976, "learning_rate": 0.00029128689850384145, "loss": 0.1449, "step": 3375 }, { "epoch": 2.0506825552454937, "grad_norm": 0.28679683804512024, "learning_rate": 0.00029121107966033154, "loss": 0.221, "step": 3400 }, { "epoch": 2.065766649068557, "grad_norm": 0.2800401449203491, "learning_rate": 0.0002911352608168217, "loss": 0.1486, "step": 3425 }, { "epoch": 2.080850742891621, "grad_norm": 0.36049047112464905, "learning_rate": 0.0002910594419733117, "loss": 0.2204, "step": 3450 }, { "epoch": 2.0959348367146844, "grad_norm": 0.39010685682296753, "learning_rate": 0.00029098362312980186, "loss": 0.1683, "step": 3475 }, { "epoch": 2.111018930537748, "grad_norm": 0.1714339703321457, "learning_rate": 0.00029090780428629194, "loss": 0.1859, "step": 3500 }, { "epoch": 2.111018930537748, "eval_loss": 0.19938839972019196, "eval_runtime": 719.6016, "eval_samples_per_second": 4.607, "eval_steps_per_second": 1.152, "eval_wer": 0.22404578092101712, "step": 3500 }, { "epoch": 2.1261030243608117, "grad_norm": 0.3100337088108063, "learning_rate": 0.00029083198544278203, "loss": 0.159, "step": 3525 }, { "epoch": 2.141187118183875, "grad_norm": 0.26142579317092896, "learning_rate": 0.0002907561665992721, "loss": 0.2408, "step": 3550 }, { "epoch": 2.1562712120069385, "grad_norm": 0.4712742269039154, "learning_rate": 0.0002906803477557622, "loss": 0.154, "step": 3575 }, { "epoch": 2.1713553058300024, "grad_norm": 0.2582917809486389, "learning_rate": 0.0002906045289122523, "loss": 0.2283, "step": 3600 }, { "epoch": 2.1864393996530658, "grad_norm": 0.2900999188423157, "learning_rate": 0.0002905287100687424, "loss": 0.1722, "step": 3625 }, { "epoch": 2.201523493476129, "grad_norm": 0.20985634624958038, "learning_rate": 0.00029045289122523246, "loss": 0.5346, "step": 3650 }, { "epoch": 2.216607587299193, "grad_norm": 0.32814761996269226, "learning_rate": 0.00029037707238172255, "loss": 0.1752, "step": 3675 }, { "epoch": 2.2316916811222565, "grad_norm": 0.22810375690460205, "learning_rate": 0.0002903012535382127, "loss": 0.2093, "step": 3700 }, { "epoch": 2.2467757749453203, "grad_norm": 0.4804486334323883, "learning_rate": 0.0002902254346947027, "loss": 0.1641, "step": 3725 }, { "epoch": 2.2618598687683837, "grad_norm": 0.2502540051937103, "learning_rate": 0.00029014961585119287, "loss": 0.2281, "step": 3750 }, { "epoch": 2.276943962591447, "grad_norm": 0.34445151686668396, "learning_rate": 0.00029007379700768295, "loss": 0.1683, "step": 3775 }, { "epoch": 2.292028056414511, "grad_norm": 0.16622678935527802, "learning_rate": 0.00028999797816417304, "loss": 0.237, "step": 3800 }, { "epoch": 2.3071121502375744, "grad_norm": 0.45780929923057556, "learning_rate": 0.00028992215932066313, "loss": 0.1715, "step": 3825 }, { "epoch": 2.3221962440606383, "grad_norm": 0.3053385615348816, "learning_rate": 0.0002898463404771532, "loss": 0.2236, "step": 3850 }, { "epoch": 2.3372803378837017, "grad_norm": 0.5077650547027588, "learning_rate": 0.0002897705216336433, "loss": 0.168, "step": 3875 }, { "epoch": 2.352364431706765, "grad_norm": 0.23254072666168213, "learning_rate": 0.00028969470279013344, "loss": 0.2139, "step": 3900 }, { "epoch": 2.367448525529829, "grad_norm": 0.5966396927833557, "learning_rate": 0.00028961888394662353, "loss": 0.1661, "step": 3925 }, { "epoch": 2.3825326193528924, "grad_norm": 0.2979792058467865, "learning_rate": 0.0002895430651031136, "loss": 0.2305, "step": 3950 }, { "epoch": 2.397616713175956, "grad_norm": 0.33389779925346375, "learning_rate": 0.0002894672462596037, "loss": 0.1607, "step": 3975 }, { "epoch": 2.4127008069990197, "grad_norm": 0.24503561854362488, "learning_rate": 0.0002893914274160938, "loss": 0.2488, "step": 4000 }, { "epoch": 2.4127008069990197, "eval_loss": 0.20397360622882843, "eval_runtime": 723.2916, "eval_samples_per_second": 4.583, "eval_steps_per_second": 1.146, "eval_wer": 0.2249527614317335, "step": 4000 }, { "epoch": 2.427784900822083, "grad_norm": 0.368486613035202, "learning_rate": 0.0002893156085725839, "loss": 0.1815, "step": 4025 }, { "epoch": 2.4428689946451465, "grad_norm": 0.4782474637031555, "learning_rate": 0.00028923978972907397, "loss": 0.2391, "step": 4050 }, { "epoch": 2.4579530884682104, "grad_norm": 0.3658357858657837, "learning_rate": 0.00028916397088556405, "loss": 0.1652, "step": 4075 }, { "epoch": 2.473037182291274, "grad_norm": 0.27112266421318054, "learning_rate": 0.00028908815204205414, "loss": 0.2343, "step": 4100 }, { "epoch": 2.488121276114337, "grad_norm": 0.3050464391708374, "learning_rate": 0.0002890123331985443, "loss": 0.1487, "step": 4125 }, { "epoch": 2.503205369937401, "grad_norm": 0.25800853967666626, "learning_rate": 0.0002889365143550343, "loss": 0.1978, "step": 4150 }, { "epoch": 2.5182894637604645, "grad_norm": 0.36631202697753906, "learning_rate": 0.00028886069551152445, "loss": 0.1668, "step": 4175 }, { "epoch": 2.5333735575835283, "grad_norm": 0.20563054084777832, "learning_rate": 0.00028878487666801454, "loss": 0.2048, "step": 4200 }, { "epoch": 2.5484576514065918, "grad_norm": 0.4122121334075928, "learning_rate": 0.00028870905782450463, "loss": 0.1579, "step": 4225 }, { "epoch": 2.563541745229655, "grad_norm": 0.27097514271736145, "learning_rate": 0.0002886332389809947, "loss": 0.2392, "step": 4250 }, { "epoch": 2.578625839052719, "grad_norm": 0.4393375813961029, "learning_rate": 0.0002885574201374848, "loss": 0.1461, "step": 4275 }, { "epoch": 2.5937099328757824, "grad_norm": 0.30553507804870605, "learning_rate": 0.0002884816012939749, "loss": 0.2319, "step": 4300 }, { "epoch": 2.6087940266988463, "grad_norm": 0.450008749961853, "learning_rate": 0.00028840578245046503, "loss": 0.1482, "step": 4325 }, { "epoch": 2.6238781205219097, "grad_norm": 0.2398987114429474, "learning_rate": 0.00028832996360695506, "loss": 0.2193, "step": 4350 }, { "epoch": 2.638962214344973, "grad_norm": 0.2716082036495209, "learning_rate": 0.0002882541447634452, "loss": 0.1716, "step": 4375 }, { "epoch": 2.654046308168037, "grad_norm": 0.18016381561756134, "learning_rate": 0.0002881783259199353, "loss": 0.2191, "step": 4400 }, { "epoch": 2.6691304019911004, "grad_norm": 0.24146826565265656, "learning_rate": 0.0002881025070764254, "loss": 0.1576, "step": 4425 }, { "epoch": 2.684214495814164, "grad_norm": 0.20057305693626404, "learning_rate": 0.00028802668823291547, "loss": 0.2222, "step": 4450 }, { "epoch": 2.6992985896372277, "grad_norm": 0.36306390166282654, "learning_rate": 0.00028795086938940555, "loss": 0.1662, "step": 4475 }, { "epoch": 2.714382683460291, "grad_norm": 0.2843382954597473, "learning_rate": 0.00028787505054589564, "loss": 0.2292, "step": 4500 }, { "epoch": 2.714382683460291, "eval_loss": 0.19914484024047852, "eval_runtime": 763.4779, "eval_samples_per_second": 4.342, "eval_steps_per_second": 1.086, "eval_wer": 0.22595691842574098, "step": 4500 }, { "epoch": 2.7294667772833545, "grad_norm": 0.5379898548126221, "learning_rate": 0.0002877992317023858, "loss": 0.1578, "step": 4525 }, { "epoch": 2.7445508711064184, "grad_norm": 0.18573738634586334, "learning_rate": 0.0002877234128588758, "loss": 0.2237, "step": 4550 }, { "epoch": 2.759634964929482, "grad_norm": 0.2831384241580963, "learning_rate": 0.0002876475940153659, "loss": 0.1606, "step": 4575 }, { "epoch": 2.774719058752545, "grad_norm": 0.5031735301017761, "learning_rate": 0.00028757177517185604, "loss": 0.1805, "step": 4600 }, { "epoch": 2.789803152575609, "grad_norm": 0.768892765045166, "learning_rate": 0.0002874959563283461, "loss": 0.157, "step": 4625 }, { "epoch": 2.8048872463986725, "grad_norm": 0.22896036505699158, "learning_rate": 0.0002874201374848362, "loss": 0.2106, "step": 4650 }, { "epoch": 2.8199713402217363, "grad_norm": 0.2187754064798355, "learning_rate": 0.0002873443186413263, "loss": 0.1605, "step": 4675 }, { "epoch": 2.8350554340447998, "grad_norm": 0.1761629283428192, "learning_rate": 0.0002872684997978164, "loss": 0.2186, "step": 4700 }, { "epoch": 2.850139527867863, "grad_norm": 0.3175676167011261, "learning_rate": 0.0002871926809543065, "loss": 0.1537, "step": 4725 }, { "epoch": 2.865223621690927, "grad_norm": 0.21728022396564484, "learning_rate": 0.00028711686211079656, "loss": 0.2119, "step": 4750 }, { "epoch": 2.8803077155139905, "grad_norm": 0.29456526041030884, "learning_rate": 0.00028704104326728665, "loss": 0.1556, "step": 4775 }, { "epoch": 2.8953918093370543, "grad_norm": 0.2309761792421341, "learning_rate": 0.0002869652244237768, "loss": 0.1805, "step": 4800 }, { "epoch": 2.9104759031601177, "grad_norm": 0.30479326844215393, "learning_rate": 0.0002868894055802669, "loss": 0.1529, "step": 4825 }, { "epoch": 2.925559996983181, "grad_norm": 0.1994062215089798, "learning_rate": 0.00028681358673675697, "loss": 0.192, "step": 4850 }, { "epoch": 2.940644090806245, "grad_norm": 0.3960685133934021, "learning_rate": 0.00028673776789324705, "loss": 0.1505, "step": 4875 }, { "epoch": 2.9557281846293084, "grad_norm": 0.18159142136573792, "learning_rate": 0.00028666194904973714, "loss": 0.1899, "step": 4900 }, { "epoch": 2.970812278452372, "grad_norm": 0.5303758978843689, "learning_rate": 0.0002865861302062272, "loss": 0.1899, "step": 4925 }, { "epoch": 2.9858963722754357, "grad_norm": 0.18483863770961761, "learning_rate": 0.0002865103113627173, "loss": 0.2267, "step": 4950 }, { "epoch": 3.0006033637529224, "grad_norm": 0.403163343667984, "learning_rate": 0.0002864344925192074, "loss": 0.1248, "step": 4975 }, { "epoch": 3.015687457575986, "grad_norm": 0.2946682274341583, "learning_rate": 0.0002863586736756975, "loss": 0.2489, "step": 5000 }, { "epoch": 3.015687457575986, "eval_loss": 0.1845189481973648, "eval_runtime": 767.7712, "eval_samples_per_second": 4.318, "eval_steps_per_second": 1.08, "eval_wer": 0.21365869459590778, "step": 5000 }, { "epoch": 3.0307715513990496, "grad_norm": 0.36335453391075134, "learning_rate": 0.00028628285483218763, "loss": 0.0881, "step": 5025 }, { "epoch": 3.0458556452221135, "grad_norm": 0.3755040764808655, "learning_rate": 0.00028620703598867766, "loss": 0.2487, "step": 5050 }, { "epoch": 3.060939739045177, "grad_norm": 0.38198986649513245, "learning_rate": 0.0002861312171451678, "loss": 0.0799, "step": 5075 }, { "epoch": 3.0760238328682403, "grad_norm": 0.17410704493522644, "learning_rate": 0.0002860553983016579, "loss": 0.2627, "step": 5100 }, { "epoch": 3.091107926691304, "grad_norm": 0.6871060132980347, "learning_rate": 0.000285979579458148, "loss": 0.0967, "step": 5125 }, { "epoch": 3.1061920205143676, "grad_norm": 0.40587660670280457, "learning_rate": 0.00028590376061463806, "loss": 0.2739, "step": 5150 }, { "epoch": 3.121276114337431, "grad_norm": 0.5106558799743652, "learning_rate": 0.00028582794177112815, "loss": 0.0817, "step": 5175 }, { "epoch": 3.136360208160495, "grad_norm": 0.5959634184837341, "learning_rate": 0.00028575212292761824, "loss": 0.2631, "step": 5200 }, { "epoch": 3.1514443019835583, "grad_norm": 0.32283464074134827, "learning_rate": 0.0002856763040841084, "loss": 0.0805, "step": 5225 }, { "epoch": 3.1665283958066217, "grad_norm": 0.4039979577064514, "learning_rate": 0.0002856004852405984, "loss": 0.2601, "step": 5250 }, { "epoch": 3.1816124896296856, "grad_norm": 0.9042678475379944, "learning_rate": 0.00028552466639708855, "loss": 0.0845, "step": 5275 }, { "epoch": 3.196696583452749, "grad_norm": 0.28807753324508667, "learning_rate": 0.00028544884755357864, "loss": 0.2692, "step": 5300 }, { "epoch": 3.211780677275813, "grad_norm": 0.2866358458995819, "learning_rate": 0.0002853730287100687, "loss": 0.081, "step": 5325 }, { "epoch": 3.2268647710988763, "grad_norm": 0.2690901756286621, "learning_rate": 0.0002852972098665588, "loss": 0.2499, "step": 5350 }, { "epoch": 3.2419488649219397, "grad_norm": 0.21377098560333252, "learning_rate": 0.0002852213910230489, "loss": 0.0802, "step": 5375 }, { "epoch": 3.2570329587450035, "grad_norm": 0.3079901933670044, "learning_rate": 0.000285145572179539, "loss": 0.2314, "step": 5400 }, { "epoch": 3.272117052568067, "grad_norm": 0.47138121724128723, "learning_rate": 0.00028506975333602913, "loss": 0.0883, "step": 5425 }, { "epoch": 3.2872011463911304, "grad_norm": 0.3404664695262909, "learning_rate": 0.00028499393449251916, "loss": 0.225, "step": 5450 }, { "epoch": 3.3022852402141942, "grad_norm": 0.3051985800266266, "learning_rate": 0.00028491811564900925, "loss": 0.0757, "step": 5475 }, { "epoch": 3.3173693340372576, "grad_norm": 0.27657219767570496, "learning_rate": 0.0002848453295592398, "loss": 0.4731, "step": 5500 }, { "epoch": 3.3173693340372576, "eval_loss": 0.185445174574852, "eval_runtime": 756.69, "eval_samples_per_second": 4.381, "eval_steps_per_second": 1.096, "eval_wer": 0.21210387086325108, "step": 5500 }, { "epoch": 3.3324534278603215, "grad_norm": 0.305276095867157, "learning_rate": 0.00028476951071572984, "loss": 0.0804, "step": 5525 }, { "epoch": 3.347537521683385, "grad_norm": 0.23126491904258728, "learning_rate": 0.00028469369187222, "loss": 0.2612, "step": 5550 }, { "epoch": 3.3626216155064483, "grad_norm": 0.29387226700782776, "learning_rate": 0.00028461787302871006, "loss": 0.085, "step": 5575 }, { "epoch": 3.377705709329512, "grad_norm": 0.7246792912483215, "learning_rate": 0.00028454205418520015, "loss": 0.2898, "step": 5600 }, { "epoch": 3.3927898031525756, "grad_norm": 0.42687901854515076, "learning_rate": 0.00028446623534169024, "loss": 0.101, "step": 5625 }, { "epoch": 3.407873896975639, "grad_norm": 0.2322624772787094, "learning_rate": 0.0002843904164981803, "loss": 0.2543, "step": 5650 }, { "epoch": 3.422957990798703, "grad_norm": 0.49676916003227234, "learning_rate": 0.0002843145976546704, "loss": 0.0832, "step": 5675 }, { "epoch": 3.4380420846217663, "grad_norm": 0.42384791374206543, "learning_rate": 0.0002842387788111605, "loss": 0.2517, "step": 5700 }, { "epoch": 3.4531261784448297, "grad_norm": 0.48723557591438293, "learning_rate": 0.0002841629599676506, "loss": 0.0974, "step": 5725 }, { "epoch": 3.4682102722678936, "grad_norm": 1.3422971963882446, "learning_rate": 0.00028408714112414067, "loss": 0.2576, "step": 5750 }, { "epoch": 3.483294366090957, "grad_norm": 0.5135483145713806, "learning_rate": 0.0002840113222806308, "loss": 0.0904, "step": 5775 }, { "epoch": 3.498378459914021, "grad_norm": 0.1537868082523346, "learning_rate": 0.00028393550343712085, "loss": 0.2522, "step": 5800 }, { "epoch": 3.5134625537370843, "grad_norm": 0.6072225570678711, "learning_rate": 0.000283859684593611, "loss": 0.1021, "step": 5825 }, { "epoch": 3.5285466475601477, "grad_norm": 0.2713959813117981, "learning_rate": 0.0002837838657501011, "loss": 0.2608, "step": 5850 }, { "epoch": 3.5436307413832115, "grad_norm": 0.4154447019100189, "learning_rate": 0.00028370804690659116, "loss": 0.0876, "step": 5875 }, { "epoch": 3.558714835206275, "grad_norm": 0.197547048330307, "learning_rate": 0.00028363222806308125, "loss": 0.2415, "step": 5900 }, { "epoch": 3.573798929029339, "grad_norm": 0.27300530672073364, "learning_rate": 0.00028355640921957134, "loss": 0.085, "step": 5925 }, { "epoch": 3.5888830228524022, "grad_norm": 0.22969838976860046, "learning_rate": 0.0002834805903760614, "loss": 0.2526, "step": 5950 }, { "epoch": 3.6039671166754657, "grad_norm": 0.22905798256397247, "learning_rate": 0.00028340477153255156, "loss": 0.0958, "step": 5975 }, { "epoch": 3.6190512104985295, "grad_norm": 0.13335692882537842, "learning_rate": 0.0002833289526890416, "loss": 0.2497, "step": 6000 }, { "epoch": 3.6190512104985295, "eval_loss": 0.18878242373466492, "eval_runtime": 739.3999, "eval_samples_per_second": 4.483, "eval_steps_per_second": 1.121, "eval_wer": 0.21019273335852723, "step": 6000 }, { "epoch": 3.634135304321593, "grad_norm": 1.2492620944976807, "learning_rate": 0.00028325313384553174, "loss": 0.0951, "step": 6025 }, { "epoch": 3.6492193981446563, "grad_norm": 0.19799137115478516, "learning_rate": 0.0002831773150020218, "loss": 0.2914, "step": 6050 }, { "epoch": 3.66430349196772, "grad_norm": 0.33943161368370056, "learning_rate": 0.0002831014961585119, "loss": 0.0908, "step": 6075 }, { "epoch": 3.6793875857907836, "grad_norm": 0.6077541708946228, "learning_rate": 0.000283025677315002, "loss": 0.2635, "step": 6100 }, { "epoch": 3.694471679613847, "grad_norm": 0.4308757781982422, "learning_rate": 0.0002829498584714921, "loss": 0.1054, "step": 6125 }, { "epoch": 3.709555773436911, "grad_norm": 0.27450037002563477, "learning_rate": 0.00028287403962798217, "loss": 0.2521, "step": 6150 }, { "epoch": 3.7246398672599743, "grad_norm": 0.41612792015075684, "learning_rate": 0.00028279822078447226, "loss": 0.0969, "step": 6175 }, { "epoch": 3.7397239610830377, "grad_norm": 0.818908154964447, "learning_rate": 0.00028272240194096235, "loss": 0.2574, "step": 6200 }, { "epoch": 3.7548080549061016, "grad_norm": 0.3212096393108368, "learning_rate": 0.00028264658309745243, "loss": 0.0874, "step": 6225 }, { "epoch": 3.769892148729165, "grad_norm": 0.4317522644996643, "learning_rate": 0.0002825707642539426, "loss": 0.2812, "step": 6250 }, { "epoch": 3.7849762425522284, "grad_norm": 0.3905152976512909, "learning_rate": 0.00028249494541043266, "loss": 0.0903, "step": 6275 }, { "epoch": 3.8000603363752923, "grad_norm": 0.22848589718341827, "learning_rate": 0.00028241912656692275, "loss": 0.292, "step": 6300 }, { "epoch": 3.8151444301983557, "grad_norm": 0.645900547504425, "learning_rate": 0.00028234330772341284, "loss": 0.0863, "step": 6325 }, { "epoch": 3.8302285240214196, "grad_norm": 0.3289954960346222, "learning_rate": 0.0002822674888799029, "loss": 0.2798, "step": 6350 }, { "epoch": 3.845312617844483, "grad_norm": 0.3951515853404999, "learning_rate": 0.000282191670036393, "loss": 0.0894, "step": 6375 }, { "epoch": 3.860396711667547, "grad_norm": 0.23933938145637512, "learning_rate": 0.00028211585119288315, "loss": 0.2836, "step": 6400 }, { "epoch": 3.8754808054906102, "grad_norm": 0.3123800456523895, "learning_rate": 0.0002820400323493732, "loss": 0.0841, "step": 6425 }, { "epoch": 3.8905648993136737, "grad_norm": 0.2620724141597748, "learning_rate": 0.0002819642135058633, "loss": 0.2695, "step": 6450 }, { "epoch": 3.9056489931367375, "grad_norm": 0.3551441431045532, "learning_rate": 0.0002818883946623534, "loss": 0.0887, "step": 6475 }, { "epoch": 3.920733086959801, "grad_norm": 0.2354227900505066, "learning_rate": 0.0002818125758188435, "loss": 0.2872, "step": 6500 }, { "epoch": 3.920733086959801, "eval_loss": 0.191518634557724, "eval_runtime": 727.3001, "eval_samples_per_second": 4.558, "eval_steps_per_second": 1.14, "eval_wer": 0.21967283917292016, "step": 6500 }, { "epoch": 3.9358171807828644, "grad_norm": 0.4430249333381653, "learning_rate": 0.0002817367569753336, "loss": 0.0916, "step": 6525 }, { "epoch": 3.950901274605928, "grad_norm": 0.3285467326641083, "learning_rate": 0.00028166093813182367, "loss": 0.2646, "step": 6550 }, { "epoch": 3.9659853684289916, "grad_norm": 0.27407094836235046, "learning_rate": 0.00028158511928831376, "loss": 0.0852, "step": 6575 }, { "epoch": 3.981069462252055, "grad_norm": 0.25445982813835144, "learning_rate": 0.00028150930044480385, "loss": 0.2673, "step": 6600 }, { "epoch": 3.996153556075119, "grad_norm": 0.4180232882499695, "learning_rate": 0.00028143348160129393, "loss": 0.0804, "step": 6625 }, { "epoch": 4.010860547552606, "grad_norm": 0.42869091033935547, "learning_rate": 0.000281357662757784, "loss": 0.2138, "step": 6650 }, { "epoch": 4.025944641375669, "grad_norm": 0.1404862254858017, "learning_rate": 0.00028128184391427416, "loss": 0.1138, "step": 6675 }, { "epoch": 4.041028735198733, "grad_norm": 0.3522794246673584, "learning_rate": 0.0002812060250707642, "loss": 0.4489, "step": 6700 }, { "epoch": 4.056112829021797, "grad_norm": 0.24887488782405853, "learning_rate": 0.00028113020622725434, "loss": 0.1131, "step": 6725 }, { "epoch": 4.07119692284486, "grad_norm": 0.45187073945999146, "learning_rate": 0.0002810543873837444, "loss": 0.1959, "step": 6750 }, { "epoch": 4.0862810166679235, "grad_norm": 0.30940550565719604, "learning_rate": 0.0002809785685402345, "loss": 0.1017, "step": 6775 }, { "epoch": 4.101365110490987, "grad_norm": 0.43294191360473633, "learning_rate": 0.0002809027496967246, "loss": 0.197, "step": 6800 }, { "epoch": 4.116449204314051, "grad_norm": 0.32438719272613525, "learning_rate": 0.0002808269308532147, "loss": 0.1136, "step": 6825 }, { "epoch": 4.131533298137114, "grad_norm": 0.38587555289268494, "learning_rate": 0.00028075111200970477, "loss": 0.1977, "step": 6850 }, { "epoch": 4.146617391960178, "grad_norm": 0.2455216646194458, "learning_rate": 0.0002806752931661949, "loss": 0.1072, "step": 6875 }, { "epoch": 4.161701485783242, "grad_norm": 0.356931209564209, "learning_rate": 0.00028059947432268494, "loss": 0.2118, "step": 6900 }, { "epoch": 4.176785579606305, "grad_norm": 0.8069139719009399, "learning_rate": 0.0002805236554791751, "loss": 0.1282, "step": 6925 }, { "epoch": 4.191869673429369, "grad_norm": 0.27101409435272217, "learning_rate": 0.00028044783663566517, "loss": 0.226, "step": 6950 }, { "epoch": 4.206953767252433, "grad_norm": 0.2601521909236908, "learning_rate": 0.00028037201779215526, "loss": 0.1372, "step": 6975 }, { "epoch": 4.222037861075496, "grad_norm": 0.5733149647712708, "learning_rate": 0.00028029619894864535, "loss": 0.248, "step": 7000 }, { "epoch": 4.222037861075496, "eval_loss": 0.19203022122383118, "eval_runtime": 763.6876, "eval_samples_per_second": 4.341, "eval_steps_per_second": 1.086, "eval_wer": 0.21993197646169627, "step": 7000 }, { "epoch": 4.2371219548985595, "grad_norm": 0.34575146436691284, "learning_rate": 0.00028022038010513543, "loss": 0.1382, "step": 7025 }, { "epoch": 4.252206048721623, "grad_norm": 0.40568241477012634, "learning_rate": 0.0002801445612616255, "loss": 0.2228, "step": 7050 }, { "epoch": 4.267290142544686, "grad_norm": 0.2740955054759979, "learning_rate": 0.0002800687424181156, "loss": 0.1264, "step": 7075 }, { "epoch": 4.28237423636775, "grad_norm": 0.33927688002586365, "learning_rate": 0.0002799929235746057, "loss": 0.2267, "step": 7100 }, { "epoch": 4.297458330190814, "grad_norm": 0.27805888652801514, "learning_rate": 0.0002799171047310958, "loss": 0.1333, "step": 7125 }, { "epoch": 4.312542424013877, "grad_norm": 0.36450818181037903, "learning_rate": 0.0002798412858875859, "loss": 0.1916, "step": 7150 }, { "epoch": 4.327626517836941, "grad_norm": 0.1994234174489975, "learning_rate": 0.000279765467044076, "loss": 0.1451, "step": 7175 }, { "epoch": 4.342710611660005, "grad_norm": 0.3419537842273712, "learning_rate": 0.0002796896482005661, "loss": 0.2135, "step": 7200 }, { "epoch": 4.357794705483068, "grad_norm": 0.252347856760025, "learning_rate": 0.0002796138293570562, "loss": 0.1289, "step": 7225 }, { "epoch": 4.3728787993061315, "grad_norm": 0.3467054069042206, "learning_rate": 0.00027953801051354627, "loss": 0.2341, "step": 7250 }, { "epoch": 4.387962893129195, "grad_norm": 0.24314385652542114, "learning_rate": 0.00027946219167003636, "loss": 0.1246, "step": 7275 }, { "epoch": 4.403046986952258, "grad_norm": 0.35150381922721863, "learning_rate": 0.0002793863728265265, "loss": 0.2159, "step": 7300 }, { "epoch": 4.418131080775322, "grad_norm": 0.44021642208099365, "learning_rate": 0.00027931055398301653, "loss": 0.1256, "step": 7325 }, { "epoch": 4.433215174598386, "grad_norm": 0.33157458901405334, "learning_rate": 0.00027923473513950667, "loss": 0.2206, "step": 7350 }, { "epoch": 4.44829926842145, "grad_norm": 0.292894184589386, "learning_rate": 0.00027915891629599676, "loss": 0.1199, "step": 7375 }, { "epoch": 4.463383362244513, "grad_norm": 0.5214021801948547, "learning_rate": 0.00027908309745248685, "loss": 0.1855, "step": 7400 }, { "epoch": 4.478467456067577, "grad_norm": 0.22543422877788544, "learning_rate": 0.00027900727860897693, "loss": 0.1206, "step": 7425 }, { "epoch": 4.493551549890641, "grad_norm": 0.33008188009262085, "learning_rate": 0.000278931459765467, "loss": 0.2273, "step": 7450 }, { "epoch": 4.508635643713704, "grad_norm": 0.25787121057510376, "learning_rate": 0.0002788556409219571, "loss": 0.1044, "step": 7475 }, { "epoch": 4.5237197375367675, "grad_norm": 0.26990142464637756, "learning_rate": 0.0002787798220784472, "loss": 0.2048, "step": 7500 }, { "epoch": 4.5237197375367675, "eval_loss": 0.1822061687707901, "eval_runtime": 772.5305, "eval_samples_per_second": 4.291, "eval_steps_per_second": 1.073, "eval_wer": 0.20550666738649248, "step": 7500 }, { "epoch": 4.538803831359831, "grad_norm": 0.2780537009239197, "learning_rate": 0.0002787040032349373, "loss": 0.1367, "step": 7525 }, { "epoch": 4.553887925182894, "grad_norm": 0.2901013493537903, "learning_rate": 0.00027862818439142737, "loss": 0.2171, "step": 7550 }, { "epoch": 4.568972019005958, "grad_norm": 0.7013656497001648, "learning_rate": 0.0002785523655479175, "loss": 0.1269, "step": 7575 }, { "epoch": 4.584056112829022, "grad_norm": 0.4125453531742096, "learning_rate": 0.00027847654670440754, "loss": 0.2193, "step": 7600 }, { "epoch": 4.599140206652085, "grad_norm": 0.17244857549667358, "learning_rate": 0.0002784007278608977, "loss": 0.1334, "step": 7625 }, { "epoch": 4.614224300475149, "grad_norm": 0.23291832208633423, "learning_rate": 0.00027832490901738777, "loss": 0.212, "step": 7650 }, { "epoch": 4.629308394298213, "grad_norm": 0.20680192112922668, "learning_rate": 0.00027824909017387786, "loss": 0.1127, "step": 7675 }, { "epoch": 4.644392488121277, "grad_norm": 0.3413441777229309, "learning_rate": 0.00027817327133036794, "loss": 0.1931, "step": 7700 }, { "epoch": 4.65947658194434, "grad_norm": 0.24499134719371796, "learning_rate": 0.00027809745248685803, "loss": 0.1148, "step": 7725 }, { "epoch": 4.674560675767403, "grad_norm": 0.4222862720489502, "learning_rate": 0.0002780216336433481, "loss": 0.2316, "step": 7750 }, { "epoch": 4.689644769590467, "grad_norm": 0.190853551030159, "learning_rate": 0.00027794581479983826, "loss": 0.1408, "step": 7775 }, { "epoch": 4.70472886341353, "grad_norm": 0.58743816614151, "learning_rate": 0.0002778699959563283, "loss": 0.1865, "step": 7800 }, { "epoch": 4.719812957236594, "grad_norm": 0.19071631133556366, "learning_rate": 0.00027779417711281843, "loss": 0.1304, "step": 7825 }, { "epoch": 4.734897051059658, "grad_norm": 0.7743087410926819, "learning_rate": 0.0002777183582693085, "loss": 0.203, "step": 7850 }, { "epoch": 4.749981144882721, "grad_norm": 0.21871432662010193, "learning_rate": 0.0002776425394257986, "loss": 0.099, "step": 7875 }, { "epoch": 4.765065238705785, "grad_norm": 0.3248477280139923, "learning_rate": 0.0002775667205822887, "loss": 0.2121, "step": 7900 }, { "epoch": 4.780149332528849, "grad_norm": 0.21200844645500183, "learning_rate": 0.0002774909017387788, "loss": 0.126, "step": 7925 }, { "epoch": 4.795233426351912, "grad_norm": 0.6078742742538452, "learning_rate": 0.00027741508289526887, "loss": 0.2062, "step": 7950 }, { "epoch": 4.8103175201749755, "grad_norm": 0.260425865650177, "learning_rate": 0.00027733926405175896, "loss": 0.1122, "step": 7975 }, { "epoch": 4.825401613998039, "grad_norm": 0.4660604000091553, "learning_rate": 0.00027726344520824904, "loss": 0.1977, "step": 8000 }, { "epoch": 4.825401613998039, "eval_loss": 0.1850125789642334, "eval_runtime": 772.6249, "eval_samples_per_second": 4.291, "eval_steps_per_second": 1.073, "eval_wer": 0.2094477136532959, "step": 8000 }, { "epoch": 4.840485707821102, "grad_norm": 0.2847197949886322, "learning_rate": 0.00027718762636473913, "loss": 0.1246, "step": 8025 }, { "epoch": 4.855569801644166, "grad_norm": 0.39191052317619324, "learning_rate": 0.00027711180752122927, "loss": 0.1994, "step": 8050 }, { "epoch": 4.87065389546723, "grad_norm": 0.19209399819374084, "learning_rate": 0.00027703598867771936, "loss": 0.1233, "step": 8075 }, { "epoch": 4.885737989290293, "grad_norm": 0.4371297359466553, "learning_rate": 0.00027696016983420944, "loss": 0.234, "step": 8100 }, { "epoch": 4.900822083113357, "grad_norm": 0.25683364272117615, "learning_rate": 0.00027688435099069953, "loss": 0.1182, "step": 8125 }, { "epoch": 4.915906176936421, "grad_norm": 0.42062830924987793, "learning_rate": 0.0002768085321471896, "loss": 0.2268, "step": 8150 }, { "epoch": 4.930990270759484, "grad_norm": 0.28874415159225464, "learning_rate": 0.0002767327133036797, "loss": 0.1134, "step": 8175 }, { "epoch": 4.946074364582548, "grad_norm": 0.28938642144203186, "learning_rate": 0.00027665689446016985, "loss": 0.2109, "step": 8200 }, { "epoch": 4.961158458405611, "grad_norm": 0.16918736696243286, "learning_rate": 0.0002765810756166599, "loss": 0.1132, "step": 8225 }, { "epoch": 4.976242552228674, "grad_norm": 0.3270578682422638, "learning_rate": 0.00027650525677315, "loss": 0.2191, "step": 8250 }, { "epoch": 4.991326646051738, "grad_norm": 0.8475662469863892, "learning_rate": 0.0002764294379296401, "loss": 0.146, "step": 8275 }, { "epoch": 5.006033637529225, "grad_norm": 0.3081296980381012, "learning_rate": 0.0002763536190861302, "loss": 0.2, "step": 8300 }, { "epoch": 5.021117731352289, "grad_norm": 0.17431728541851044, "learning_rate": 0.0002762778002426203, "loss": 0.17, "step": 8325 }, { "epoch": 5.036201825175352, "grad_norm": 0.40710902214050293, "learning_rate": 0.00027620198139911037, "loss": 0.1504, "step": 8350 }, { "epoch": 5.051285918998416, "grad_norm": 0.19659915566444397, "learning_rate": 0.00027612616255560046, "loss": 0.1745, "step": 8375 }, { "epoch": 5.06637001282148, "grad_norm": 0.3791191577911377, "learning_rate": 0.00027605034371209054, "loss": 0.1248, "step": 8400 }, { "epoch": 5.081454106644543, "grad_norm": 0.1905103623867035, "learning_rate": 0.00027597452486858063, "loss": 0.1933, "step": 8425 }, { "epoch": 5.096538200467607, "grad_norm": 0.28806644678115845, "learning_rate": 0.0002758987060250707, "loss": 0.1295, "step": 8450 }, { "epoch": 5.111622294290671, "grad_norm": 0.34333837032318115, "learning_rate": 0.00027582288718156086, "loss": 0.1778, "step": 8475 }, { "epoch": 5.1267063881137345, "grad_norm": 0.5322638154029846, "learning_rate": 0.0002757470683380509, "loss": 0.1459, "step": 8500 }, { "epoch": 5.1267063881137345, "eval_loss": 0.1905168741941452, "eval_runtime": 772.0187, "eval_samples_per_second": 4.294, "eval_steps_per_second": 1.074, "eval_wer": 0.21526750526372618, "step": 8500 }, { "epoch": 5.141790481936797, "grad_norm": 0.39731988310813904, "learning_rate": 0.00027567124949454103, "loss": 0.1731, "step": 8525 }, { "epoch": 5.156874575759861, "grad_norm": 0.760844349861145, "learning_rate": 0.0002755954306510311, "loss": 0.1466, "step": 8550 }, { "epoch": 5.171958669582925, "grad_norm": 0.8529795408248901, "learning_rate": 0.0002755196118075212, "loss": 0.1737, "step": 8575 }, { "epoch": 5.187042763405988, "grad_norm": 0.45475590229034424, "learning_rate": 0.0002754437929640113, "loss": 0.1452, "step": 8600 }, { "epoch": 5.202126857229052, "grad_norm": 0.26215818524360657, "learning_rate": 0.0002753710068742418, "loss": 0.4218, "step": 8625 }, { "epoch": 5.217210951052116, "grad_norm": 0.38315796852111816, "learning_rate": 0.0002752951880307319, "loss": 0.1261, "step": 8650 }, { "epoch": 5.232295044875179, "grad_norm": 0.41056641936302185, "learning_rate": 0.00027521936918722197, "loss": 0.162, "step": 8675 }, { "epoch": 5.247379138698243, "grad_norm": 0.22341406345367432, "learning_rate": 0.00027514355034371205, "loss": 0.1305, "step": 8700 }, { "epoch": 5.2624632325213065, "grad_norm": 0.2595282793045044, "learning_rate": 0.00027506773150020214, "loss": 0.1655, "step": 8725 }, { "epoch": 5.2775473263443695, "grad_norm": 0.2871881127357483, "learning_rate": 0.0002749919126566923, "loss": 0.1347, "step": 8750 }, { "epoch": 5.292631420167433, "grad_norm": 0.17933253943920135, "learning_rate": 0.0002749160938131823, "loss": 0.1715, "step": 8775 }, { "epoch": 5.307715513990497, "grad_norm": 0.5562332272529602, "learning_rate": 0.00027484027496967246, "loss": 0.1298, "step": 8800 }, { "epoch": 5.32279960781356, "grad_norm": 0.2567090392112732, "learning_rate": 0.00027476445612616254, "loss": 0.1673, "step": 8825 }, { "epoch": 5.337883701636624, "grad_norm": 0.3126806914806366, "learning_rate": 0.00027468863728265263, "loss": 0.126, "step": 8850 }, { "epoch": 5.352967795459688, "grad_norm": 0.16551165282726288, "learning_rate": 0.0002746128184391427, "loss": 0.1633, "step": 8875 }, { "epoch": 5.368051889282752, "grad_norm": 0.337634414434433, "learning_rate": 0.0002745369995956328, "loss": 0.1204, "step": 8900 }, { "epoch": 5.383135983105815, "grad_norm": 0.36297717690467834, "learning_rate": 0.0002744611807521229, "loss": 0.1927, "step": 8925 }, { "epoch": 5.398220076928879, "grad_norm": 0.3347633481025696, "learning_rate": 0.00027438536190861303, "loss": 0.1434, "step": 8950 }, { "epoch": 5.4133041707519425, "grad_norm": 0.2264309823513031, "learning_rate": 0.00027430954306510307, "loss": 0.2008, "step": 8975 }, { "epoch": 5.4283882645750055, "grad_norm": 0.263372004032135, "learning_rate": 0.0002742337242215932, "loss": 0.1471, "step": 9000 }, { "epoch": 5.4283882645750055, "eval_loss": 0.1863842010498047, "eval_runtime": 767.0229, "eval_samples_per_second": 4.322, "eval_steps_per_second": 1.081, "eval_wer": 0.20231064082492037, "step": 9000 }, { "epoch": 5.443472358398069, "grad_norm": 0.20563261210918427, "learning_rate": 0.0002741579053780833, "loss": 0.1721, "step": 9025 }, { "epoch": 5.458556452221133, "grad_norm": 0.26344984769821167, "learning_rate": 0.0002740820865345733, "loss": 0.1332, "step": 9050 }, { "epoch": 5.473640546044196, "grad_norm": 0.2505332827568054, "learning_rate": 0.00027400626769106347, "loss": 0.1638, "step": 9075 }, { "epoch": 5.48872463986726, "grad_norm": 0.5164260864257812, "learning_rate": 0.00027393044884755355, "loss": 0.1311, "step": 9100 }, { "epoch": 5.503808733690324, "grad_norm": 0.15487757325172424, "learning_rate": 0.00027385463000404364, "loss": 0.1715, "step": 9125 }, { "epoch": 5.518892827513387, "grad_norm": 0.46957388520240784, "learning_rate": 0.00027377881116053373, "loss": 0.1392, "step": 9150 }, { "epoch": 5.533976921336451, "grad_norm": 0.20295724272727966, "learning_rate": 0.0002737029923170238, "loss": 0.2009, "step": 9175 }, { "epoch": 5.5490610151595146, "grad_norm": 0.27826988697052, "learning_rate": 0.0002736271734735139, "loss": 0.1404, "step": 9200 }, { "epoch": 5.5641451089825775, "grad_norm": 0.2126350849866867, "learning_rate": 0.00027355135463000404, "loss": 0.1784, "step": 9225 }, { "epoch": 5.579229202805641, "grad_norm": 0.4118373692035675, "learning_rate": 0.00027347553578649413, "loss": 0.1309, "step": 9250 }, { "epoch": 5.594313296628705, "grad_norm": 0.16608783602714539, "learning_rate": 0.0002733997169429842, "loss": 0.1532, "step": 9275 }, { "epoch": 5.609397390451768, "grad_norm": 0.3244372308254242, "learning_rate": 0.0002733238980994743, "loss": 0.124, "step": 9300 }, { "epoch": 5.624481484274832, "grad_norm": 0.2650703489780426, "learning_rate": 0.0002732480792559644, "loss": 0.1924, "step": 9325 }, { "epoch": 5.639565578097896, "grad_norm": 0.4252435564994812, "learning_rate": 0.0002731722604124545, "loss": 0.1336, "step": 9350 }, { "epoch": 5.654649671920959, "grad_norm": 0.18568560481071472, "learning_rate": 0.00027309644156894457, "loss": 0.167, "step": 9375 }, { "epoch": 5.669733765744023, "grad_norm": 0.30477166175842285, "learning_rate": 0.00027302062272543465, "loss": 0.128, "step": 9400 }, { "epoch": 5.684817859567087, "grad_norm": 0.19481921195983887, "learning_rate": 0.0002729448038819248, "loss": 0.1736, "step": 9425 }, { "epoch": 5.69990195339015, "grad_norm": 0.5762272477149963, "learning_rate": 0.0002728689850384149, "loss": 0.1353, "step": 9450 }, { "epoch": 5.7149860472132135, "grad_norm": 0.9085726141929626, "learning_rate": 0.00027279316619490497, "loss": 0.1679, "step": 9475 }, { "epoch": 5.730070141036277, "grad_norm": 0.5637058615684509, "learning_rate": 0.00027271734735139505, "loss": 0.1528, "step": 9500 }, { "epoch": 5.730070141036277, "eval_loss": 0.1828906089067459, "eval_runtime": 770.1353, "eval_samples_per_second": 4.304, "eval_steps_per_second": 1.076, "eval_wer": 0.208940236462776, "step": 9500 }, { "epoch": 5.745154234859341, "grad_norm": 0.1850712150335312, "learning_rate": 0.00027264152850788514, "loss": 0.1782, "step": 9525 }, { "epoch": 5.760238328682404, "grad_norm": 0.4733022153377533, "learning_rate": 0.00027256570966437523, "loss": 0.136, "step": 9550 }, { "epoch": 5.775322422505468, "grad_norm": 0.2352697253227234, "learning_rate": 0.0002724898908208653, "loss": 0.1636, "step": 9575 }, { "epoch": 5.790406516328532, "grad_norm": 0.297294557094574, "learning_rate": 0.0002724140719773554, "loss": 0.1399, "step": 9600 }, { "epoch": 5.805490610151595, "grad_norm": 0.22856192290782928, "learning_rate": 0.0002723382531338455, "loss": 0.1672, "step": 9625 }, { "epoch": 5.820574703974659, "grad_norm": 0.19410184025764465, "learning_rate": 0.00027226243429033563, "loss": 0.1342, "step": 9650 }, { "epoch": 5.835658797797723, "grad_norm": 0.15539394319057465, "learning_rate": 0.00027218661544682566, "loss": 0.178, "step": 9675 }, { "epoch": 5.8507428916207855, "grad_norm": 0.27372846007347107, "learning_rate": 0.0002721107966033158, "loss": 0.1359, "step": 9700 }, { "epoch": 5.865826985443849, "grad_norm": 0.1481178253889084, "learning_rate": 0.0002720349777598059, "loss": 0.1484, "step": 9725 }, { "epoch": 5.880911079266913, "grad_norm": 0.22347645461559296, "learning_rate": 0.000271959158916296, "loss": 0.1372, "step": 9750 }, { "epoch": 5.895995173089977, "grad_norm": 0.13955964148044586, "learning_rate": 0.00027188334007278607, "loss": 0.1506, "step": 9775 }, { "epoch": 5.91107926691304, "grad_norm": 0.44418036937713623, "learning_rate": 0.00027180752122927615, "loss": 0.129, "step": 9800 }, { "epoch": 5.926163360736104, "grad_norm": 0.18400181829929352, "learning_rate": 0.00027173170238576624, "loss": 0.1812, "step": 9825 }, { "epoch": 5.941247454559168, "grad_norm": 0.3210029602050781, "learning_rate": 0.0002716558835422564, "loss": 0.1279, "step": 9850 }, { "epoch": 5.956331548382231, "grad_norm": 0.4363233745098114, "learning_rate": 0.0002715800646987464, "loss": 0.1578, "step": 9875 }, { "epoch": 5.971415642205295, "grad_norm": 0.9395078420639038, "learning_rate": 0.00027150424585523655, "loss": 0.1398, "step": 9900 }, { "epoch": 5.9864997360283585, "grad_norm": 0.16781263053417206, "learning_rate": 0.00027142842701172664, "loss": 0.1687, "step": 9925 }, { "epoch": 6.001206727505845, "grad_norm": 0.5314217209815979, "learning_rate": 0.0002713526081682167, "loss": 0.1023, "step": 9950 }, { "epoch": 6.016290821328909, "grad_norm": 0.503394365310669, "learning_rate": 0.0002712767893247068, "loss": 0.2136, "step": 9975 }, { "epoch": 6.031374915151972, "grad_norm": 0.38936060667037964, "learning_rate": 0.0002712009704811969, "loss": 0.0668, "step": 10000 }, { "epoch": 6.031374915151972, "eval_loss": 0.19079850614070892, "eval_runtime": 765.219, "eval_samples_per_second": 4.332, "eval_steps_per_second": 1.083, "eval_wer": 0.20498839280894024, "step": 10000 }, { "epoch": 6.031374915151972, "step": 10000, "total_flos": 7.129096476605158e+19, "train_loss": 0.34351529302597045, "train_runtime": 124569.2108, "train_samples_per_second": 12.772, "train_steps_per_second": 0.798 } ], "logging_steps": 25, "max_steps": 99420, "num_input_tokens_seen": 0, "num_train_epochs": 60, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.129096476605158e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }