| { | |
| "best_global_step": 7500, | |
| "best_metric": 0.1822061687707901, | |
| "best_model_checkpoint": "./Wav2vec2-afr/checkpoint-7500", | |
| "epoch": 6.031374915151972, | |
| "eval_steps": 500, | |
| "global_step": 10000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.015084093823063579, | |
| "grad_norm": 27.733379364013672, | |
| "learning_rate": 1.4999999999999999e-05, | |
| "loss": 20.2292, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.030168187646127158, | |
| "grad_norm": 19.019153594970703, | |
| "learning_rate": 2.9999999999999997e-05, | |
| "loss": 11.6267, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.04525228146919074, | |
| "grad_norm": 5.631908416748047, | |
| "learning_rate": 4.4999999999999996e-05, | |
| "loss": 4.8714, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.060336375292254316, | |
| "grad_norm": 2.689542293548584, | |
| "learning_rate": 5.9999999999999995e-05, | |
| "loss": 3.5888, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0754204691153179, | |
| "grad_norm": 1.0974892377853394, | |
| "learning_rate": 7.5e-05, | |
| "loss": 3.2364, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.09050456293838148, | |
| "grad_norm": 0.6232272386550903, | |
| "learning_rate": 8.999999999999999e-05, | |
| "loss": 3.0531, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.10558865676144506, | |
| "grad_norm": 0.3990340828895569, | |
| "learning_rate": 0.00010499999999999999, | |
| "loss": 3.0197, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.12067275058450863, | |
| "grad_norm": 0.6495853662490845, | |
| "learning_rate": 0.00011999999999999999, | |
| "loss": 2.9908, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1357568444075722, | |
| "grad_norm": 1.156081199645996, | |
| "learning_rate": 0.000135, | |
| "loss": 2.9629, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.1508409382306358, | |
| "grad_norm": 1.0735247135162354, | |
| "learning_rate": 0.00015, | |
| "loss": 2.5531, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.16592503205369938, | |
| "grad_norm": 0.6276996731758118, | |
| "learning_rate": 0.0001644, | |
| "loss": 1.7912, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.18100912587676296, | |
| "grad_norm": 1.1443787813186646, | |
| "learning_rate": 0.00017939999999999997, | |
| "loss": 0.7469, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.19609321969982654, | |
| "grad_norm": 0.47428029775619507, | |
| "learning_rate": 0.00019439999999999998, | |
| "loss": 0.7277, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.21117731352289013, | |
| "grad_norm": 1.0143519639968872, | |
| "learning_rate": 0.00020939999999999997, | |
| "loss": 0.4411, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.22626140734595368, | |
| "grad_norm": 0.36140990257263184, | |
| "learning_rate": 0.00022439999999999998, | |
| "loss": 0.4558, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.24134550116901726, | |
| "grad_norm": 0.6368806958198547, | |
| "learning_rate": 0.0002394, | |
| "loss": 0.2993, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.25642959499208084, | |
| "grad_norm": 0.5180590748786926, | |
| "learning_rate": 0.00025439999999999995, | |
| "loss": 0.4165, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.2715136888151444, | |
| "grad_norm": 5.711215972900391, | |
| "learning_rate": 0.0002694, | |
| "loss": 0.2686, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.286597782638208, | |
| "grad_norm": 1.2874069213867188, | |
| "learning_rate": 0.0002844, | |
| "loss": 0.4259, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.3016818764612716, | |
| "grad_norm": 0.6762956380844116, | |
| "learning_rate": 0.00029939999999999996, | |
| "loss": 0.2638, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3016818764612716, | |
| "eval_loss": 0.32083866000175476, | |
| "eval_runtime": 754.2459, | |
| "eval_samples_per_second": 4.395, | |
| "eval_steps_per_second": 1.099, | |
| "eval_wer": 0.3275495330130109, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3167659702843352, | |
| "grad_norm": 0.3699875771999359, | |
| "learning_rate": 0.0002999272139102305, | |
| "loss": 0.4233, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.33185006410739876, | |
| "grad_norm": 0.9410744905471802, | |
| "learning_rate": 0.00029985139506672056, | |
| "loss": 0.2435, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.34693415793046234, | |
| "grad_norm": 0.7363893389701843, | |
| "learning_rate": 0.00029977557622321065, | |
| "loss": 0.4594, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.3620182517535259, | |
| "grad_norm": 0.9007164239883423, | |
| "learning_rate": 0.00029969975737970074, | |
| "loss": 0.2262, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3771023455765895, | |
| "grad_norm": 0.4770212173461914, | |
| "learning_rate": 0.0002996239385361908, | |
| "loss": 0.4473, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.3921864393996531, | |
| "grad_norm": 0.9380179643630981, | |
| "learning_rate": 0.00029954811969268096, | |
| "loss": 0.227, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.40727053322271667, | |
| "grad_norm": 0.5483216643333435, | |
| "learning_rate": 0.000299472300849171, | |
| "loss": 0.3786, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.42235462704578025, | |
| "grad_norm": 0.44440972805023193, | |
| "learning_rate": 0.0002993964820056611, | |
| "loss": 0.1966, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.4374387208688438, | |
| "grad_norm": 0.6046245098114014, | |
| "learning_rate": 0.0002993206631621512, | |
| "loss": 0.3704, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.45252281469190736, | |
| "grad_norm": 0.6455674767494202, | |
| "learning_rate": 0.0002992448443186413, | |
| "loss": 0.1906, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.46760690851497094, | |
| "grad_norm": 0.35442134737968445, | |
| "learning_rate": 0.0002991690254751314, | |
| "loss": 0.3416, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.4826910023380345, | |
| "grad_norm": 0.9801518321037292, | |
| "learning_rate": 0.0002990932066316215, | |
| "loss": 0.1892, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.4977750961610981, | |
| "grad_norm": 0.4081840217113495, | |
| "learning_rate": 0.00029901738778811157, | |
| "loss": 0.3756, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.5128591899841617, | |
| "grad_norm": 0.6348875164985657, | |
| "learning_rate": 0.00029894156894460166, | |
| "loss": 0.1788, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.5279432838072253, | |
| "grad_norm": 0.2996980547904968, | |
| "learning_rate": 0.00029886575010109175, | |
| "loss": 0.3618, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.5430273776302889, | |
| "grad_norm": 0.5273446440696716, | |
| "learning_rate": 0.00029878993125758183, | |
| "loss": 0.1717, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.5581114714533525, | |
| "grad_norm": 0.7878848910331726, | |
| "learning_rate": 0.000298714112414072, | |
| "loss": 0.3516, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.573195565276416, | |
| "grad_norm": 0.57522052526474, | |
| "learning_rate": 0.00029863829357056206, | |
| "loss": 0.1619, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.5882796590994795, | |
| "grad_norm": 0.2759730815887451, | |
| "learning_rate": 0.00029856247472705215, | |
| "loss": 0.3349, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.6033637529225432, | |
| "grad_norm": 0.8881000876426697, | |
| "learning_rate": 0.00029848665588354224, | |
| "loss": 0.1603, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.6033637529225432, | |
| "eval_loss": 0.24969196319580078, | |
| "eval_runtime": 757.505, | |
| "eval_samples_per_second": 4.376, | |
| "eval_steps_per_second": 1.094, | |
| "eval_wer": 0.2649138908384171, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.6184478467456067, | |
| "grad_norm": 0.6784160137176514, | |
| "learning_rate": 0.0002984108370400323, | |
| "loss": 0.3343, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.6335319405686703, | |
| "grad_norm": 1.6744931936264038, | |
| "learning_rate": 0.0002983350181965224, | |
| "loss": 0.1664, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.6486160343917339, | |
| "grad_norm": 0.35144105553627014, | |
| "learning_rate": 0.0002982591993530125, | |
| "loss": 0.3518, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.6637001282147975, | |
| "grad_norm": 0.7025427222251892, | |
| "learning_rate": 0.0002981833805095026, | |
| "loss": 0.1482, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.678784222037861, | |
| "grad_norm": 0.4477069675922394, | |
| "learning_rate": 0.0002981075616659927, | |
| "loss": 0.3463, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.6938683158609247, | |
| "grad_norm": 0.603905975818634, | |
| "learning_rate": 0.0002980317428224828, | |
| "loss": 0.1722, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.7089524096839882, | |
| "grad_norm": 0.2929205298423767, | |
| "learning_rate": 0.00029795592397897284, | |
| "loss": 0.3269, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.7240365035070518, | |
| "grad_norm": 0.6123032569885254, | |
| "learning_rate": 0.000297880105135463, | |
| "loss": 0.1573, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.7391205973301154, | |
| "grad_norm": 0.2819807529449463, | |
| "learning_rate": 0.00029780428629195307, | |
| "loss": 0.3203, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.754204691153179, | |
| "grad_norm": 0.5671049356460571, | |
| "learning_rate": 0.00029772846744844316, | |
| "loss": 0.1489, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.7692887849762425, | |
| "grad_norm": 0.7186427116394043, | |
| "learning_rate": 0.00029765264860493325, | |
| "loss": 0.3486, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.7843728787993062, | |
| "grad_norm": 0.5397343635559082, | |
| "learning_rate": 0.00029757682976142333, | |
| "loss": 0.1384, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.7994569726223697, | |
| "grad_norm": 0.3157232701778412, | |
| "learning_rate": 0.0002975010109179134, | |
| "loss": 0.3505, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.8145410664454333, | |
| "grad_norm": 0.714611828327179, | |
| "learning_rate": 0.00029742519207440356, | |
| "loss": 0.1502, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.8296251602684969, | |
| "grad_norm": 0.3562470078468323, | |
| "learning_rate": 0.0002973493732308936, | |
| "loss": 0.3845, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.8447092540915605, | |
| "grad_norm": 0.5777546167373657, | |
| "learning_rate": 0.00029727355438738374, | |
| "loss": 0.1541, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.859793347914624, | |
| "grad_norm": 0.49272066354751587, | |
| "learning_rate": 0.0002971977355438738, | |
| "loss": 0.3538, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.8748774417376876, | |
| "grad_norm": 0.8076097369194031, | |
| "learning_rate": 0.0002971219167003639, | |
| "loss": 0.1343, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.8899615355607512, | |
| "grad_norm": 0.43889355659484863, | |
| "learning_rate": 0.000297046097856854, | |
| "loss": 0.3096, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.9050456293838147, | |
| "grad_norm": 0.8419676423072815, | |
| "learning_rate": 0.0002969702790133441, | |
| "loss": 0.142, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.9050456293838147, | |
| "eval_loss": 0.21447643637657166, | |
| "eval_runtime": 715.0284, | |
| "eval_samples_per_second": 4.636, | |
| "eval_steps_per_second": 1.159, | |
| "eval_wer": 0.24875020245100685, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.9201297232068784, | |
| "grad_norm": 0.4135502278804779, | |
| "learning_rate": 0.00029689446016983417, | |
| "loss": 0.359, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.9352138170299419, | |
| "grad_norm": 0.5666655898094177, | |
| "learning_rate": 0.0002968186413263243, | |
| "loss": 0.1478, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.9502979108530055, | |
| "grad_norm": 0.30614542961120605, | |
| "learning_rate": 0.00029674282248281434, | |
| "loss": 0.3263, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.965382004676069, | |
| "grad_norm": 0.5047621130943298, | |
| "learning_rate": 0.0002966670036393045, | |
| "loss": 0.1369, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.9804660984991327, | |
| "grad_norm": 0.4826274514198303, | |
| "learning_rate": 0.00029659118479579457, | |
| "loss": 0.3709, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.9955501923221962, | |
| "grad_norm": 0.7504212260246277, | |
| "learning_rate": 0.00029651536595228466, | |
| "loss": 0.1389, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.0102571837996832, | |
| "grad_norm": 0.40745294094085693, | |
| "learning_rate": 0.00029643954710877475, | |
| "loss": 0.2744, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 1.0253412776227468, | |
| "grad_norm": 0.27026304602622986, | |
| "learning_rate": 0.00029636372826526483, | |
| "loss": 0.177, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.0404253714458105, | |
| "grad_norm": 0.4111769199371338, | |
| "learning_rate": 0.0002962879094217549, | |
| "loss": 0.2356, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 1.055509465268874, | |
| "grad_norm": 0.30931055545806885, | |
| "learning_rate": 0.000296212090578245, | |
| "loss": 0.1747, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.0705935590919375, | |
| "grad_norm": 0.3719274699687958, | |
| "learning_rate": 0.0002961362717347351, | |
| "loss": 0.245, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 1.0856776529150012, | |
| "grad_norm": 0.28636762499809265, | |
| "learning_rate": 0.0002960604528912252, | |
| "loss": 0.1781, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.1007617467380646, | |
| "grad_norm": 0.27731388807296753, | |
| "learning_rate": 0.0002959846340477153, | |
| "loss": 0.2435, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 1.1158458405611282, | |
| "grad_norm": 0.315758615732193, | |
| "learning_rate": 0.0002959088152042054, | |
| "loss": 0.1608, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.1309299343841919, | |
| "grad_norm": 0.7471879720687866, | |
| "learning_rate": 0.0002958329963606955, | |
| "loss": 0.2436, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 1.1460140282072555, | |
| "grad_norm": 0.3493207097053528, | |
| "learning_rate": 0.0002957571775171856, | |
| "loss": 0.165, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.1610981220303191, | |
| "grad_norm": 0.3202393651008606, | |
| "learning_rate": 0.00029568135867367567, | |
| "loss": 0.2448, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 1.1761822158533826, | |
| "grad_norm": 0.2840123176574707, | |
| "learning_rate": 0.00029560553983016576, | |
| "loss": 0.1583, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.1912663096764462, | |
| "grad_norm": 0.34918013215065, | |
| "learning_rate": 0.00029552972098665584, | |
| "loss": 0.246, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 1.2063504034995098, | |
| "grad_norm": 0.2470645010471344, | |
| "learning_rate": 0.00029545390214314593, | |
| "loss": 0.1645, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.2063504034995098, | |
| "eval_loss": 0.20230437815189362, | |
| "eval_runtime": 763.4911, | |
| "eval_samples_per_second": 4.342, | |
| "eval_steps_per_second": 1.086, | |
| "eval_wer": 0.22521189872050965, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.2214344973225733, | |
| "grad_norm": 0.5461482405662537, | |
| "learning_rate": 0.00029537808329963607, | |
| "loss": 0.2304, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 1.236518591145637, | |
| "grad_norm": 0.44666436314582825, | |
| "learning_rate": 0.00029530226445612616, | |
| "loss": 0.1632, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.2516026849687005, | |
| "grad_norm": 0.44556596875190735, | |
| "learning_rate": 0.0002952264456126162, | |
| "loss": 0.252, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 1.2666867787917642, | |
| "grad_norm": 0.44631072878837585, | |
| "learning_rate": 0.00029515062676910633, | |
| "loss": 0.1795, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.2817708726148276, | |
| "grad_norm": 0.2911534607410431, | |
| "learning_rate": 0.0002950748079255964, | |
| "loss": 0.2458, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 1.2968549664378912, | |
| "grad_norm": 0.34504878520965576, | |
| "learning_rate": 0.0002949989890820865, | |
| "loss": 0.2024, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.3119390602609549, | |
| "grad_norm": 0.295748770236969, | |
| "learning_rate": 0.000294926202992317, | |
| "loss": 0.6418, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 1.3270231540840185, | |
| "grad_norm": 0.2935297191143036, | |
| "learning_rate": 0.0002948503841488071, | |
| "loss": 0.1568, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.342107247907082, | |
| "grad_norm": 0.45815935730934143, | |
| "learning_rate": 0.0002947745653052972, | |
| "loss": 0.2491, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 1.3571913417301456, | |
| "grad_norm": 0.3507382273674011, | |
| "learning_rate": 0.00029469874646178727, | |
| "loss": 0.1683, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.3722754355532092, | |
| "grad_norm": 0.28953784704208374, | |
| "learning_rate": 0.00029462292761827736, | |
| "loss": 0.2649, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 1.3873595293762726, | |
| "grad_norm": 0.26776325702667236, | |
| "learning_rate": 0.00029454710877476744, | |
| "loss": 0.1511, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.4024436231993362, | |
| "grad_norm": 0.6518787741661072, | |
| "learning_rate": 0.0002944712899312576, | |
| "loss": 0.2438, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 1.4175277170223999, | |
| "grad_norm": 0.3653862476348877, | |
| "learning_rate": 0.0002943954710877476, | |
| "loss": 0.1677, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.4326118108454635, | |
| "grad_norm": 0.4537375569343567, | |
| "learning_rate": 0.00029431965224423776, | |
| "loss": 0.2619, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 1.4476959046685272, | |
| "grad_norm": 0.3677208721637726, | |
| "learning_rate": 0.00029424383340072785, | |
| "loss": 0.1564, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.4627799984915906, | |
| "grad_norm": 0.33267661929130554, | |
| "learning_rate": 0.00029416801455721793, | |
| "loss": 0.232, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 1.4778640923146542, | |
| "grad_norm": 0.32956621050834656, | |
| "learning_rate": 0.000294092195713708, | |
| "loss": 0.1841, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.4929481861377178, | |
| "grad_norm": 0.41168802976608276, | |
| "learning_rate": 0.0002940163768701981, | |
| "loss": 0.2432, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 1.5080322799607813, | |
| "grad_norm": 0.333852082490921, | |
| "learning_rate": 0.0002939405580266882, | |
| "loss": 0.1818, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.5080322799607813, | |
| "eval_loss": 0.19838476181030273, | |
| "eval_runtime": 747.6476, | |
| "eval_samples_per_second": 4.434, | |
| "eval_steps_per_second": 1.109, | |
| "eval_wer": 0.21765372779787293, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.523116373783845, | |
| "grad_norm": 0.35393306612968445, | |
| "learning_rate": 0.00029386473918317833, | |
| "loss": 0.2496, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 1.5382004676069085, | |
| "grad_norm": 0.23671747744083405, | |
| "learning_rate": 0.00029378892033966837, | |
| "loss": 0.1641, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.553284561429972, | |
| "grad_norm": 0.49785369634628296, | |
| "learning_rate": 0.0002937131014961585, | |
| "loss": 0.2304, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 1.5683686552530358, | |
| "grad_norm": 0.7474918365478516, | |
| "learning_rate": 0.0002936372826526486, | |
| "loss": 0.1914, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.5834527490760992, | |
| "grad_norm": 0.3910874128341675, | |
| "learning_rate": 0.0002935614638091387, | |
| "loss": 0.2516, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 1.5985368428991629, | |
| "grad_norm": 0.33777278661727905, | |
| "learning_rate": 0.00029348564496562877, | |
| "loss": 0.1778, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.6136209367222265, | |
| "grad_norm": 0.39339521527290344, | |
| "learning_rate": 0.00029340982612211886, | |
| "loss": 0.2651, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 1.62870503054529, | |
| "grad_norm": 0.33642980456352234, | |
| "learning_rate": 0.00029333400727860894, | |
| "loss": 0.1987, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.6437891243683536, | |
| "grad_norm": 0.4382663071155548, | |
| "learning_rate": 0.00029325818843509903, | |
| "loss": 0.3213, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 1.6588732181914172, | |
| "grad_norm": 0.29682841897010803, | |
| "learning_rate": 0.0002931823695915891, | |
| "loss": 0.1744, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.6739573120144806, | |
| "grad_norm": 0.5017745494842529, | |
| "learning_rate": 0.0002931065507480792, | |
| "loss": 0.2454, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 1.6890414058375445, | |
| "grad_norm": 0.31300872564315796, | |
| "learning_rate": 0.00029303073190456935, | |
| "loss": 0.1642, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.704125499660608, | |
| "grad_norm": 0.2737913429737091, | |
| "learning_rate": 0.0002929549130610594, | |
| "loss": 0.2304, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 1.7192095934836713, | |
| "grad_norm": 0.5299367904663086, | |
| "learning_rate": 0.0002928790942175495, | |
| "loss": 0.1446, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.7342936873067352, | |
| "grad_norm": 0.4113737642765045, | |
| "learning_rate": 0.0002928032753740396, | |
| "loss": 0.2751, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 1.7493777811297986, | |
| "grad_norm": 0.33324816823005676, | |
| "learning_rate": 0.0002927274565305297, | |
| "loss": 0.1635, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.7644618749528622, | |
| "grad_norm": 0.33196279406547546, | |
| "learning_rate": 0.0002926516376870198, | |
| "loss": 0.2426, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 1.7795459687759259, | |
| "grad_norm": 0.2307615876197815, | |
| "learning_rate": 0.00029257581884350987, | |
| "loss": 0.1548, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.7946300625989893, | |
| "grad_norm": 0.259033739566803, | |
| "learning_rate": 0.00029249999999999995, | |
| "loss": 0.2481, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 1.809714156422053, | |
| "grad_norm": 0.3014342784881592, | |
| "learning_rate": 0.0002924241811564901, | |
| "loss": 0.1827, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.809714156422053, | |
| "eval_loss": 0.1995258629322052, | |
| "eval_runtime": 720.0351, | |
| "eval_samples_per_second": 4.604, | |
| "eval_steps_per_second": 1.151, | |
| "eval_wer": 0.2203962641040868, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.8247982502451165, | |
| "grad_norm": 0.348734050989151, | |
| "learning_rate": 0.0002923483623129802, | |
| "loss": 0.2435, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 1.83988234406818, | |
| "grad_norm": 0.38749223947525024, | |
| "learning_rate": 0.00029227254346947027, | |
| "loss": 0.1839, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.8549664378912438, | |
| "grad_norm": 0.3979107141494751, | |
| "learning_rate": 0.00029219672462596036, | |
| "loss": 0.2518, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 1.8700505317143072, | |
| "grad_norm": 0.2685915231704712, | |
| "learning_rate": 0.00029212090578245044, | |
| "loss": 0.1711, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.8851346255373709, | |
| "grad_norm": 0.2596099078655243, | |
| "learning_rate": 0.00029204508693894053, | |
| "loss": 0.2576, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 1.9002187193604345, | |
| "grad_norm": 0.26872625946998596, | |
| "learning_rate": 0.0002919692680954306, | |
| "loss": 0.1484, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.915302813183498, | |
| "grad_norm": 0.32843342423439026, | |
| "learning_rate": 0.0002918934492519207, | |
| "loss": 0.2559, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 1.9303869070065616, | |
| "grad_norm": 0.26869305968284607, | |
| "learning_rate": 0.0002918176304084108, | |
| "loss": 0.1751, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.9454710008296252, | |
| "grad_norm": 0.2810923159122467, | |
| "learning_rate": 0.00029174181156490093, | |
| "loss": 0.2244, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 1.9605550946526886, | |
| "grad_norm": 0.3540550470352173, | |
| "learning_rate": 0.00029166599272139096, | |
| "loss": 0.1661, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.9756391884757525, | |
| "grad_norm": 0.23880811035633087, | |
| "learning_rate": 0.0002915901738778811, | |
| "loss": 0.2503, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 1.990723282298816, | |
| "grad_norm": 0.24448829889297485, | |
| "learning_rate": 0.0002915143550343712, | |
| "loss": 0.1501, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.005430273776303, | |
| "grad_norm": 0.931875467300415, | |
| "learning_rate": 0.0002914385361908613, | |
| "loss": 0.228, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 2.0205143675993664, | |
| "grad_norm": 0.22488653659820557, | |
| "learning_rate": 0.00029136271734735137, | |
| "loss": 0.2308, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.03559846142243, | |
| "grad_norm": 0.33369916677474976, | |
| "learning_rate": 0.00029128689850384145, | |
| "loss": 0.1449, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 2.0506825552454937, | |
| "grad_norm": 0.28679683804512024, | |
| "learning_rate": 0.00029121107966033154, | |
| "loss": 0.221, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.065766649068557, | |
| "grad_norm": 0.2800401449203491, | |
| "learning_rate": 0.0002911352608168217, | |
| "loss": 0.1486, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 2.080850742891621, | |
| "grad_norm": 0.36049047112464905, | |
| "learning_rate": 0.0002910594419733117, | |
| "loss": 0.2204, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.0959348367146844, | |
| "grad_norm": 0.39010685682296753, | |
| "learning_rate": 0.00029098362312980186, | |
| "loss": 0.1683, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 2.111018930537748, | |
| "grad_norm": 0.1714339703321457, | |
| "learning_rate": 0.00029090780428629194, | |
| "loss": 0.1859, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.111018930537748, | |
| "eval_loss": 0.19938839972019196, | |
| "eval_runtime": 719.6016, | |
| "eval_samples_per_second": 4.607, | |
| "eval_steps_per_second": 1.152, | |
| "eval_wer": 0.22404578092101712, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.1261030243608117, | |
| "grad_norm": 0.3100337088108063, | |
| "learning_rate": 0.00029083198544278203, | |
| "loss": 0.159, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 2.141187118183875, | |
| "grad_norm": 0.26142579317092896, | |
| "learning_rate": 0.0002907561665992721, | |
| "loss": 0.2408, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.1562712120069385, | |
| "grad_norm": 0.4712742269039154, | |
| "learning_rate": 0.0002906803477557622, | |
| "loss": 0.154, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 2.1713553058300024, | |
| "grad_norm": 0.2582917809486389, | |
| "learning_rate": 0.0002906045289122523, | |
| "loss": 0.2283, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.1864393996530658, | |
| "grad_norm": 0.2900999188423157, | |
| "learning_rate": 0.0002905287100687424, | |
| "loss": 0.1722, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 2.201523493476129, | |
| "grad_norm": 0.20985634624958038, | |
| "learning_rate": 0.00029045289122523246, | |
| "loss": 0.5346, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 2.216607587299193, | |
| "grad_norm": 0.32814761996269226, | |
| "learning_rate": 0.00029037707238172255, | |
| "loss": 0.1752, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 2.2316916811222565, | |
| "grad_norm": 0.22810375690460205, | |
| "learning_rate": 0.0002903012535382127, | |
| "loss": 0.2093, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.2467757749453203, | |
| "grad_norm": 0.4804486334323883, | |
| "learning_rate": 0.0002902254346947027, | |
| "loss": 0.1641, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 2.2618598687683837, | |
| "grad_norm": 0.2502540051937103, | |
| "learning_rate": 0.00029014961585119287, | |
| "loss": 0.2281, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.276943962591447, | |
| "grad_norm": 0.34445151686668396, | |
| "learning_rate": 0.00029007379700768295, | |
| "loss": 0.1683, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 2.292028056414511, | |
| "grad_norm": 0.16622678935527802, | |
| "learning_rate": 0.00028999797816417304, | |
| "loss": 0.237, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.3071121502375744, | |
| "grad_norm": 0.45780929923057556, | |
| "learning_rate": 0.00028992215932066313, | |
| "loss": 0.1715, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 2.3221962440606383, | |
| "grad_norm": 0.3053385615348816, | |
| "learning_rate": 0.0002898463404771532, | |
| "loss": 0.2236, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.3372803378837017, | |
| "grad_norm": 0.5077650547027588, | |
| "learning_rate": 0.0002897705216336433, | |
| "loss": 0.168, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 2.352364431706765, | |
| "grad_norm": 0.23254072666168213, | |
| "learning_rate": 0.00028969470279013344, | |
| "loss": 0.2139, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.367448525529829, | |
| "grad_norm": 0.5966396927833557, | |
| "learning_rate": 0.00028961888394662353, | |
| "loss": 0.1661, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 2.3825326193528924, | |
| "grad_norm": 0.2979792058467865, | |
| "learning_rate": 0.0002895430651031136, | |
| "loss": 0.2305, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.397616713175956, | |
| "grad_norm": 0.33389779925346375, | |
| "learning_rate": 0.0002894672462596037, | |
| "loss": 0.1607, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 2.4127008069990197, | |
| "grad_norm": 0.24503561854362488, | |
| "learning_rate": 0.0002893914274160938, | |
| "loss": 0.2488, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.4127008069990197, | |
| "eval_loss": 0.20397360622882843, | |
| "eval_runtime": 723.2916, | |
| "eval_samples_per_second": 4.583, | |
| "eval_steps_per_second": 1.146, | |
| "eval_wer": 0.2249527614317335, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.427784900822083, | |
| "grad_norm": 0.368486613035202, | |
| "learning_rate": 0.0002893156085725839, | |
| "loss": 0.1815, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 2.4428689946451465, | |
| "grad_norm": 0.4782474637031555, | |
| "learning_rate": 0.00028923978972907397, | |
| "loss": 0.2391, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.4579530884682104, | |
| "grad_norm": 0.3658357858657837, | |
| "learning_rate": 0.00028916397088556405, | |
| "loss": 0.1652, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 2.473037182291274, | |
| "grad_norm": 0.27112266421318054, | |
| "learning_rate": 0.00028908815204205414, | |
| "loss": 0.2343, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.488121276114337, | |
| "grad_norm": 0.3050464391708374, | |
| "learning_rate": 0.0002890123331985443, | |
| "loss": 0.1487, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 2.503205369937401, | |
| "grad_norm": 0.25800853967666626, | |
| "learning_rate": 0.0002889365143550343, | |
| "loss": 0.1978, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.5182894637604645, | |
| "grad_norm": 0.36631202697753906, | |
| "learning_rate": 0.00028886069551152445, | |
| "loss": 0.1668, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 2.5333735575835283, | |
| "grad_norm": 0.20563054084777832, | |
| "learning_rate": 0.00028878487666801454, | |
| "loss": 0.2048, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.5484576514065918, | |
| "grad_norm": 0.4122121334075928, | |
| "learning_rate": 0.00028870905782450463, | |
| "loss": 0.1579, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 2.563541745229655, | |
| "grad_norm": 0.27097514271736145, | |
| "learning_rate": 0.0002886332389809947, | |
| "loss": 0.2392, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.578625839052719, | |
| "grad_norm": 0.4393375813961029, | |
| "learning_rate": 0.0002885574201374848, | |
| "loss": 0.1461, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 2.5937099328757824, | |
| "grad_norm": 0.30553507804870605, | |
| "learning_rate": 0.0002884816012939749, | |
| "loss": 0.2319, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.6087940266988463, | |
| "grad_norm": 0.450008749961853, | |
| "learning_rate": 0.00028840578245046503, | |
| "loss": 0.1482, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 2.6238781205219097, | |
| "grad_norm": 0.2398987114429474, | |
| "learning_rate": 0.00028832996360695506, | |
| "loss": 0.2193, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.638962214344973, | |
| "grad_norm": 0.2716082036495209, | |
| "learning_rate": 0.0002882541447634452, | |
| "loss": 0.1716, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 2.654046308168037, | |
| "grad_norm": 0.18016381561756134, | |
| "learning_rate": 0.0002881783259199353, | |
| "loss": 0.2191, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.6691304019911004, | |
| "grad_norm": 0.24146826565265656, | |
| "learning_rate": 0.0002881025070764254, | |
| "loss": 0.1576, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 2.684214495814164, | |
| "grad_norm": 0.20057305693626404, | |
| "learning_rate": 0.00028802668823291547, | |
| "loss": 0.2222, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 2.6992985896372277, | |
| "grad_norm": 0.36306390166282654, | |
| "learning_rate": 0.00028795086938940555, | |
| "loss": 0.1662, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 2.714382683460291, | |
| "grad_norm": 0.2843382954597473, | |
| "learning_rate": 0.00028787505054589564, | |
| "loss": 0.2292, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.714382683460291, | |
| "eval_loss": 0.19914484024047852, | |
| "eval_runtime": 763.4779, | |
| "eval_samples_per_second": 4.342, | |
| "eval_steps_per_second": 1.086, | |
| "eval_wer": 0.22595691842574098, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.7294667772833545, | |
| "grad_norm": 0.5379898548126221, | |
| "learning_rate": 0.0002877992317023858, | |
| "loss": 0.1578, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 2.7445508711064184, | |
| "grad_norm": 0.18573738634586334, | |
| "learning_rate": 0.0002877234128588758, | |
| "loss": 0.2237, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 2.759634964929482, | |
| "grad_norm": 0.2831384241580963, | |
| "learning_rate": 0.0002876475940153659, | |
| "loss": 0.1606, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 2.774719058752545, | |
| "grad_norm": 0.5031735301017761, | |
| "learning_rate": 0.00028757177517185604, | |
| "loss": 0.1805, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.789803152575609, | |
| "grad_norm": 0.768892765045166, | |
| "learning_rate": 0.0002874959563283461, | |
| "loss": 0.157, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 2.8048872463986725, | |
| "grad_norm": 0.22896036505699158, | |
| "learning_rate": 0.0002874201374848362, | |
| "loss": 0.2106, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 2.8199713402217363, | |
| "grad_norm": 0.2187754064798355, | |
| "learning_rate": 0.0002873443186413263, | |
| "loss": 0.1605, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 2.8350554340447998, | |
| "grad_norm": 0.1761629283428192, | |
| "learning_rate": 0.0002872684997978164, | |
| "loss": 0.2186, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.850139527867863, | |
| "grad_norm": 0.3175676167011261, | |
| "learning_rate": 0.0002871926809543065, | |
| "loss": 0.1537, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 2.865223621690927, | |
| "grad_norm": 0.21728022396564484, | |
| "learning_rate": 0.00028711686211079656, | |
| "loss": 0.2119, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 2.8803077155139905, | |
| "grad_norm": 0.29456526041030884, | |
| "learning_rate": 0.00028704104326728665, | |
| "loss": 0.1556, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 2.8953918093370543, | |
| "grad_norm": 0.2309761792421341, | |
| "learning_rate": 0.0002869652244237768, | |
| "loss": 0.1805, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.9104759031601177, | |
| "grad_norm": 0.30479326844215393, | |
| "learning_rate": 0.0002868894055802669, | |
| "loss": 0.1529, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 2.925559996983181, | |
| "grad_norm": 0.1994062215089798, | |
| "learning_rate": 0.00028681358673675697, | |
| "loss": 0.192, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 2.940644090806245, | |
| "grad_norm": 0.3960685133934021, | |
| "learning_rate": 0.00028673776789324705, | |
| "loss": 0.1505, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 2.9557281846293084, | |
| "grad_norm": 0.18159142136573792, | |
| "learning_rate": 0.00028666194904973714, | |
| "loss": 0.1899, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 2.970812278452372, | |
| "grad_norm": 0.5303758978843689, | |
| "learning_rate": 0.0002865861302062272, | |
| "loss": 0.1899, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 2.9858963722754357, | |
| "grad_norm": 0.18483863770961761, | |
| "learning_rate": 0.0002865103113627173, | |
| "loss": 0.2267, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 3.0006033637529224, | |
| "grad_norm": 0.403163343667984, | |
| "learning_rate": 0.0002864344925192074, | |
| "loss": 0.1248, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 3.015687457575986, | |
| "grad_norm": 0.2946682274341583, | |
| "learning_rate": 0.0002863586736756975, | |
| "loss": 0.2489, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.015687457575986, | |
| "eval_loss": 0.1845189481973648, | |
| "eval_runtime": 767.7712, | |
| "eval_samples_per_second": 4.318, | |
| "eval_steps_per_second": 1.08, | |
| "eval_wer": 0.21365869459590778, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.0307715513990496, | |
| "grad_norm": 0.36335453391075134, | |
| "learning_rate": 0.00028628285483218763, | |
| "loss": 0.0881, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 3.0458556452221135, | |
| "grad_norm": 0.3755040764808655, | |
| "learning_rate": 0.00028620703598867766, | |
| "loss": 0.2487, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 3.060939739045177, | |
| "grad_norm": 0.38198986649513245, | |
| "learning_rate": 0.0002861312171451678, | |
| "loss": 0.0799, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 3.0760238328682403, | |
| "grad_norm": 0.17410704493522644, | |
| "learning_rate": 0.0002860553983016579, | |
| "loss": 0.2627, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 3.091107926691304, | |
| "grad_norm": 0.6871060132980347, | |
| "learning_rate": 0.000285979579458148, | |
| "loss": 0.0967, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 3.1061920205143676, | |
| "grad_norm": 0.40587660670280457, | |
| "learning_rate": 0.00028590376061463806, | |
| "loss": 0.2739, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 3.121276114337431, | |
| "grad_norm": 0.5106558799743652, | |
| "learning_rate": 0.00028582794177112815, | |
| "loss": 0.0817, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 3.136360208160495, | |
| "grad_norm": 0.5959634184837341, | |
| "learning_rate": 0.00028575212292761824, | |
| "loss": 0.2631, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 3.1514443019835583, | |
| "grad_norm": 0.32283464074134827, | |
| "learning_rate": 0.0002856763040841084, | |
| "loss": 0.0805, | |
| "step": 5225 | |
| }, | |
| { | |
| "epoch": 3.1665283958066217, | |
| "grad_norm": 0.4039979577064514, | |
| "learning_rate": 0.0002856004852405984, | |
| "loss": 0.2601, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 3.1816124896296856, | |
| "grad_norm": 0.9042678475379944, | |
| "learning_rate": 0.00028552466639708855, | |
| "loss": 0.0845, | |
| "step": 5275 | |
| }, | |
| { | |
| "epoch": 3.196696583452749, | |
| "grad_norm": 0.28807753324508667, | |
| "learning_rate": 0.00028544884755357864, | |
| "loss": 0.2692, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 3.211780677275813, | |
| "grad_norm": 0.2866358458995819, | |
| "learning_rate": 0.0002853730287100687, | |
| "loss": 0.081, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 3.2268647710988763, | |
| "grad_norm": 0.2690901756286621, | |
| "learning_rate": 0.0002852972098665588, | |
| "loss": 0.2499, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 3.2419488649219397, | |
| "grad_norm": 0.21377098560333252, | |
| "learning_rate": 0.0002852213910230489, | |
| "loss": 0.0802, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 3.2570329587450035, | |
| "grad_norm": 0.3079901933670044, | |
| "learning_rate": 0.000285145572179539, | |
| "loss": 0.2314, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 3.272117052568067, | |
| "grad_norm": 0.47138121724128723, | |
| "learning_rate": 0.00028506975333602913, | |
| "loss": 0.0883, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 3.2872011463911304, | |
| "grad_norm": 0.3404664695262909, | |
| "learning_rate": 0.00028499393449251916, | |
| "loss": 0.225, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 3.3022852402141942, | |
| "grad_norm": 0.3051985800266266, | |
| "learning_rate": 0.00028491811564900925, | |
| "loss": 0.0757, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 3.3173693340372576, | |
| "grad_norm": 0.27657219767570496, | |
| "learning_rate": 0.0002848453295592398, | |
| "loss": 0.4731, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.3173693340372576, | |
| "eval_loss": 0.185445174574852, | |
| "eval_runtime": 756.69, | |
| "eval_samples_per_second": 4.381, | |
| "eval_steps_per_second": 1.096, | |
| "eval_wer": 0.21210387086325108, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.3324534278603215, | |
| "grad_norm": 0.305276095867157, | |
| "learning_rate": 0.00028476951071572984, | |
| "loss": 0.0804, | |
| "step": 5525 | |
| }, | |
| { | |
| "epoch": 3.347537521683385, | |
| "grad_norm": 0.23126491904258728, | |
| "learning_rate": 0.00028469369187222, | |
| "loss": 0.2612, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 3.3626216155064483, | |
| "grad_norm": 0.29387226700782776, | |
| "learning_rate": 0.00028461787302871006, | |
| "loss": 0.085, | |
| "step": 5575 | |
| }, | |
| { | |
| "epoch": 3.377705709329512, | |
| "grad_norm": 0.7246792912483215, | |
| "learning_rate": 0.00028454205418520015, | |
| "loss": 0.2898, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 3.3927898031525756, | |
| "grad_norm": 0.42687901854515076, | |
| "learning_rate": 0.00028446623534169024, | |
| "loss": 0.101, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 3.407873896975639, | |
| "grad_norm": 0.2322624772787094, | |
| "learning_rate": 0.0002843904164981803, | |
| "loss": 0.2543, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 3.422957990798703, | |
| "grad_norm": 0.49676916003227234, | |
| "learning_rate": 0.0002843145976546704, | |
| "loss": 0.0832, | |
| "step": 5675 | |
| }, | |
| { | |
| "epoch": 3.4380420846217663, | |
| "grad_norm": 0.42384791374206543, | |
| "learning_rate": 0.0002842387788111605, | |
| "loss": 0.2517, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 3.4531261784448297, | |
| "grad_norm": 0.48723557591438293, | |
| "learning_rate": 0.0002841629599676506, | |
| "loss": 0.0974, | |
| "step": 5725 | |
| }, | |
| { | |
| "epoch": 3.4682102722678936, | |
| "grad_norm": 1.3422971963882446, | |
| "learning_rate": 0.00028408714112414067, | |
| "loss": 0.2576, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 3.483294366090957, | |
| "grad_norm": 0.5135483145713806, | |
| "learning_rate": 0.0002840113222806308, | |
| "loss": 0.0904, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 3.498378459914021, | |
| "grad_norm": 0.1537868082523346, | |
| "learning_rate": 0.00028393550343712085, | |
| "loss": 0.2522, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 3.5134625537370843, | |
| "grad_norm": 0.6072225570678711, | |
| "learning_rate": 0.000283859684593611, | |
| "loss": 0.1021, | |
| "step": 5825 | |
| }, | |
| { | |
| "epoch": 3.5285466475601477, | |
| "grad_norm": 0.2713959813117981, | |
| "learning_rate": 0.0002837838657501011, | |
| "loss": 0.2608, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 3.5436307413832115, | |
| "grad_norm": 0.4154447019100189, | |
| "learning_rate": 0.00028370804690659116, | |
| "loss": 0.0876, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 3.558714835206275, | |
| "grad_norm": 0.197547048330307, | |
| "learning_rate": 0.00028363222806308125, | |
| "loss": 0.2415, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 3.573798929029339, | |
| "grad_norm": 0.27300530672073364, | |
| "learning_rate": 0.00028355640921957134, | |
| "loss": 0.085, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 3.5888830228524022, | |
| "grad_norm": 0.22969838976860046, | |
| "learning_rate": 0.0002834805903760614, | |
| "loss": 0.2526, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 3.6039671166754657, | |
| "grad_norm": 0.22905798256397247, | |
| "learning_rate": 0.00028340477153255156, | |
| "loss": 0.0958, | |
| "step": 5975 | |
| }, | |
| { | |
| "epoch": 3.6190512104985295, | |
| "grad_norm": 0.13335692882537842, | |
| "learning_rate": 0.0002833289526890416, | |
| "loss": 0.2497, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.6190512104985295, | |
| "eval_loss": 0.18878242373466492, | |
| "eval_runtime": 739.3999, | |
| "eval_samples_per_second": 4.483, | |
| "eval_steps_per_second": 1.121, | |
| "eval_wer": 0.21019273335852723, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.634135304321593, | |
| "grad_norm": 1.2492620944976807, | |
| "learning_rate": 0.00028325313384553174, | |
| "loss": 0.0951, | |
| "step": 6025 | |
| }, | |
| { | |
| "epoch": 3.6492193981446563, | |
| "grad_norm": 0.19799137115478516, | |
| "learning_rate": 0.0002831773150020218, | |
| "loss": 0.2914, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 3.66430349196772, | |
| "grad_norm": 0.33943161368370056, | |
| "learning_rate": 0.0002831014961585119, | |
| "loss": 0.0908, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 3.6793875857907836, | |
| "grad_norm": 0.6077541708946228, | |
| "learning_rate": 0.000283025677315002, | |
| "loss": 0.2635, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 3.694471679613847, | |
| "grad_norm": 0.4308757781982422, | |
| "learning_rate": 0.0002829498584714921, | |
| "loss": 0.1054, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 3.709555773436911, | |
| "grad_norm": 0.27450037002563477, | |
| "learning_rate": 0.00028287403962798217, | |
| "loss": 0.2521, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 3.7246398672599743, | |
| "grad_norm": 0.41612792015075684, | |
| "learning_rate": 0.00028279822078447226, | |
| "loss": 0.0969, | |
| "step": 6175 | |
| }, | |
| { | |
| "epoch": 3.7397239610830377, | |
| "grad_norm": 0.818908154964447, | |
| "learning_rate": 0.00028272240194096235, | |
| "loss": 0.2574, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 3.7548080549061016, | |
| "grad_norm": 0.3212096393108368, | |
| "learning_rate": 0.00028264658309745243, | |
| "loss": 0.0874, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 3.769892148729165, | |
| "grad_norm": 0.4317522644996643, | |
| "learning_rate": 0.0002825707642539426, | |
| "loss": 0.2812, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 3.7849762425522284, | |
| "grad_norm": 0.3905152976512909, | |
| "learning_rate": 0.00028249494541043266, | |
| "loss": 0.0903, | |
| "step": 6275 | |
| }, | |
| { | |
| "epoch": 3.8000603363752923, | |
| "grad_norm": 0.22848589718341827, | |
| "learning_rate": 0.00028241912656692275, | |
| "loss": 0.292, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 3.8151444301983557, | |
| "grad_norm": 0.645900547504425, | |
| "learning_rate": 0.00028234330772341284, | |
| "loss": 0.0863, | |
| "step": 6325 | |
| }, | |
| { | |
| "epoch": 3.8302285240214196, | |
| "grad_norm": 0.3289954960346222, | |
| "learning_rate": 0.0002822674888799029, | |
| "loss": 0.2798, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 3.845312617844483, | |
| "grad_norm": 0.3951515853404999, | |
| "learning_rate": 0.000282191670036393, | |
| "loss": 0.0894, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 3.860396711667547, | |
| "grad_norm": 0.23933938145637512, | |
| "learning_rate": 0.00028211585119288315, | |
| "loss": 0.2836, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 3.8754808054906102, | |
| "grad_norm": 0.3123800456523895, | |
| "learning_rate": 0.0002820400323493732, | |
| "loss": 0.0841, | |
| "step": 6425 | |
| }, | |
| { | |
| "epoch": 3.8905648993136737, | |
| "grad_norm": 0.2620724141597748, | |
| "learning_rate": 0.0002819642135058633, | |
| "loss": 0.2695, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 3.9056489931367375, | |
| "grad_norm": 0.3551441431045532, | |
| "learning_rate": 0.0002818883946623534, | |
| "loss": 0.0887, | |
| "step": 6475 | |
| }, | |
| { | |
| "epoch": 3.920733086959801, | |
| "grad_norm": 0.2354227900505066, | |
| "learning_rate": 0.0002818125758188435, | |
| "loss": 0.2872, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 3.920733086959801, | |
| "eval_loss": 0.191518634557724, | |
| "eval_runtime": 727.3001, | |
| "eval_samples_per_second": 4.558, | |
| "eval_steps_per_second": 1.14, | |
| "eval_wer": 0.21967283917292016, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 3.9358171807828644, | |
| "grad_norm": 0.4430249333381653, | |
| "learning_rate": 0.0002817367569753336, | |
| "loss": 0.0916, | |
| "step": 6525 | |
| }, | |
| { | |
| "epoch": 3.950901274605928, | |
| "grad_norm": 0.3285467326641083, | |
| "learning_rate": 0.00028166093813182367, | |
| "loss": 0.2646, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 3.9659853684289916, | |
| "grad_norm": 0.27407094836235046, | |
| "learning_rate": 0.00028158511928831376, | |
| "loss": 0.0852, | |
| "step": 6575 | |
| }, | |
| { | |
| "epoch": 3.981069462252055, | |
| "grad_norm": 0.25445982813835144, | |
| "learning_rate": 0.00028150930044480385, | |
| "loss": 0.2673, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 3.996153556075119, | |
| "grad_norm": 0.4180232882499695, | |
| "learning_rate": 0.00028143348160129393, | |
| "loss": 0.0804, | |
| "step": 6625 | |
| }, | |
| { | |
| "epoch": 4.010860547552606, | |
| "grad_norm": 0.42869091033935547, | |
| "learning_rate": 0.000281357662757784, | |
| "loss": 0.2138, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 4.025944641375669, | |
| "grad_norm": 0.1404862254858017, | |
| "learning_rate": 0.00028128184391427416, | |
| "loss": 0.1138, | |
| "step": 6675 | |
| }, | |
| { | |
| "epoch": 4.041028735198733, | |
| "grad_norm": 0.3522794246673584, | |
| "learning_rate": 0.0002812060250707642, | |
| "loss": 0.4489, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 4.056112829021797, | |
| "grad_norm": 0.24887488782405853, | |
| "learning_rate": 0.00028113020622725434, | |
| "loss": 0.1131, | |
| "step": 6725 | |
| }, | |
| { | |
| "epoch": 4.07119692284486, | |
| "grad_norm": 0.45187073945999146, | |
| "learning_rate": 0.0002810543873837444, | |
| "loss": 0.1959, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 4.0862810166679235, | |
| "grad_norm": 0.30940550565719604, | |
| "learning_rate": 0.0002809785685402345, | |
| "loss": 0.1017, | |
| "step": 6775 | |
| }, | |
| { | |
| "epoch": 4.101365110490987, | |
| "grad_norm": 0.43294191360473633, | |
| "learning_rate": 0.0002809027496967246, | |
| "loss": 0.197, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 4.116449204314051, | |
| "grad_norm": 0.32438719272613525, | |
| "learning_rate": 0.0002808269308532147, | |
| "loss": 0.1136, | |
| "step": 6825 | |
| }, | |
| { | |
| "epoch": 4.131533298137114, | |
| "grad_norm": 0.38587555289268494, | |
| "learning_rate": 0.00028075111200970477, | |
| "loss": 0.1977, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 4.146617391960178, | |
| "grad_norm": 0.2455216646194458, | |
| "learning_rate": 0.0002806752931661949, | |
| "loss": 0.1072, | |
| "step": 6875 | |
| }, | |
| { | |
| "epoch": 4.161701485783242, | |
| "grad_norm": 0.356931209564209, | |
| "learning_rate": 0.00028059947432268494, | |
| "loss": 0.2118, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 4.176785579606305, | |
| "grad_norm": 0.8069139719009399, | |
| "learning_rate": 0.0002805236554791751, | |
| "loss": 0.1282, | |
| "step": 6925 | |
| }, | |
| { | |
| "epoch": 4.191869673429369, | |
| "grad_norm": 0.27101409435272217, | |
| "learning_rate": 0.00028044783663566517, | |
| "loss": 0.226, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 4.206953767252433, | |
| "grad_norm": 0.2601521909236908, | |
| "learning_rate": 0.00028037201779215526, | |
| "loss": 0.1372, | |
| "step": 6975 | |
| }, | |
| { | |
| "epoch": 4.222037861075496, | |
| "grad_norm": 0.5733149647712708, | |
| "learning_rate": 0.00028029619894864535, | |
| "loss": 0.248, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 4.222037861075496, | |
| "eval_loss": 0.19203022122383118, | |
| "eval_runtime": 763.6876, | |
| "eval_samples_per_second": 4.341, | |
| "eval_steps_per_second": 1.086, | |
| "eval_wer": 0.21993197646169627, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 4.2371219548985595, | |
| "grad_norm": 0.34575146436691284, | |
| "learning_rate": 0.00028022038010513543, | |
| "loss": 0.1382, | |
| "step": 7025 | |
| }, | |
| { | |
| "epoch": 4.252206048721623, | |
| "grad_norm": 0.40568241477012634, | |
| "learning_rate": 0.0002801445612616255, | |
| "loss": 0.2228, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 4.267290142544686, | |
| "grad_norm": 0.2740955054759979, | |
| "learning_rate": 0.0002800687424181156, | |
| "loss": 0.1264, | |
| "step": 7075 | |
| }, | |
| { | |
| "epoch": 4.28237423636775, | |
| "grad_norm": 0.33927688002586365, | |
| "learning_rate": 0.0002799929235746057, | |
| "loss": 0.2267, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 4.297458330190814, | |
| "grad_norm": 0.27805888652801514, | |
| "learning_rate": 0.0002799171047310958, | |
| "loss": 0.1333, | |
| "step": 7125 | |
| }, | |
| { | |
| "epoch": 4.312542424013877, | |
| "grad_norm": 0.36450818181037903, | |
| "learning_rate": 0.0002798412858875859, | |
| "loss": 0.1916, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 4.327626517836941, | |
| "grad_norm": 0.1994234174489975, | |
| "learning_rate": 0.000279765467044076, | |
| "loss": 0.1451, | |
| "step": 7175 | |
| }, | |
| { | |
| "epoch": 4.342710611660005, | |
| "grad_norm": 0.3419537842273712, | |
| "learning_rate": 0.0002796896482005661, | |
| "loss": 0.2135, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 4.357794705483068, | |
| "grad_norm": 0.252347856760025, | |
| "learning_rate": 0.0002796138293570562, | |
| "loss": 0.1289, | |
| "step": 7225 | |
| }, | |
| { | |
| "epoch": 4.3728787993061315, | |
| "grad_norm": 0.3467054069042206, | |
| "learning_rate": 0.00027953801051354627, | |
| "loss": 0.2341, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 4.387962893129195, | |
| "grad_norm": 0.24314385652542114, | |
| "learning_rate": 0.00027946219167003636, | |
| "loss": 0.1246, | |
| "step": 7275 | |
| }, | |
| { | |
| "epoch": 4.403046986952258, | |
| "grad_norm": 0.35150381922721863, | |
| "learning_rate": 0.0002793863728265265, | |
| "loss": 0.2159, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 4.418131080775322, | |
| "grad_norm": 0.44021642208099365, | |
| "learning_rate": 0.00027931055398301653, | |
| "loss": 0.1256, | |
| "step": 7325 | |
| }, | |
| { | |
| "epoch": 4.433215174598386, | |
| "grad_norm": 0.33157458901405334, | |
| "learning_rate": 0.00027923473513950667, | |
| "loss": 0.2206, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 4.44829926842145, | |
| "grad_norm": 0.292894184589386, | |
| "learning_rate": 0.00027915891629599676, | |
| "loss": 0.1199, | |
| "step": 7375 | |
| }, | |
| { | |
| "epoch": 4.463383362244513, | |
| "grad_norm": 0.5214021801948547, | |
| "learning_rate": 0.00027908309745248685, | |
| "loss": 0.1855, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 4.478467456067577, | |
| "grad_norm": 0.22543422877788544, | |
| "learning_rate": 0.00027900727860897693, | |
| "loss": 0.1206, | |
| "step": 7425 | |
| }, | |
| { | |
| "epoch": 4.493551549890641, | |
| "grad_norm": 0.33008188009262085, | |
| "learning_rate": 0.000278931459765467, | |
| "loss": 0.2273, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 4.508635643713704, | |
| "grad_norm": 0.25787121057510376, | |
| "learning_rate": 0.0002788556409219571, | |
| "loss": 0.1044, | |
| "step": 7475 | |
| }, | |
| { | |
| "epoch": 4.5237197375367675, | |
| "grad_norm": 0.26990142464637756, | |
| "learning_rate": 0.0002787798220784472, | |
| "loss": 0.2048, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 4.5237197375367675, | |
| "eval_loss": 0.1822061687707901, | |
| "eval_runtime": 772.5305, | |
| "eval_samples_per_second": 4.291, | |
| "eval_steps_per_second": 1.073, | |
| "eval_wer": 0.20550666738649248, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 4.538803831359831, | |
| "grad_norm": 0.2780537009239197, | |
| "learning_rate": 0.0002787040032349373, | |
| "loss": 0.1367, | |
| "step": 7525 | |
| }, | |
| { | |
| "epoch": 4.553887925182894, | |
| "grad_norm": 0.2901013493537903, | |
| "learning_rate": 0.00027862818439142737, | |
| "loss": 0.2171, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 4.568972019005958, | |
| "grad_norm": 0.7013656497001648, | |
| "learning_rate": 0.0002785523655479175, | |
| "loss": 0.1269, | |
| "step": 7575 | |
| }, | |
| { | |
| "epoch": 4.584056112829022, | |
| "grad_norm": 0.4125453531742096, | |
| "learning_rate": 0.00027847654670440754, | |
| "loss": 0.2193, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 4.599140206652085, | |
| "grad_norm": 0.17244857549667358, | |
| "learning_rate": 0.0002784007278608977, | |
| "loss": 0.1334, | |
| "step": 7625 | |
| }, | |
| { | |
| "epoch": 4.614224300475149, | |
| "grad_norm": 0.23291832208633423, | |
| "learning_rate": 0.00027832490901738777, | |
| "loss": 0.212, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 4.629308394298213, | |
| "grad_norm": 0.20680192112922668, | |
| "learning_rate": 0.00027824909017387786, | |
| "loss": 0.1127, | |
| "step": 7675 | |
| }, | |
| { | |
| "epoch": 4.644392488121277, | |
| "grad_norm": 0.3413441777229309, | |
| "learning_rate": 0.00027817327133036794, | |
| "loss": 0.1931, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 4.65947658194434, | |
| "grad_norm": 0.24499134719371796, | |
| "learning_rate": 0.00027809745248685803, | |
| "loss": 0.1148, | |
| "step": 7725 | |
| }, | |
| { | |
| "epoch": 4.674560675767403, | |
| "grad_norm": 0.4222862720489502, | |
| "learning_rate": 0.0002780216336433481, | |
| "loss": 0.2316, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 4.689644769590467, | |
| "grad_norm": 0.190853551030159, | |
| "learning_rate": 0.00027794581479983826, | |
| "loss": 0.1408, | |
| "step": 7775 | |
| }, | |
| { | |
| "epoch": 4.70472886341353, | |
| "grad_norm": 0.58743816614151, | |
| "learning_rate": 0.0002778699959563283, | |
| "loss": 0.1865, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 4.719812957236594, | |
| "grad_norm": 0.19071631133556366, | |
| "learning_rate": 0.00027779417711281843, | |
| "loss": 0.1304, | |
| "step": 7825 | |
| }, | |
| { | |
| "epoch": 4.734897051059658, | |
| "grad_norm": 0.7743087410926819, | |
| "learning_rate": 0.0002777183582693085, | |
| "loss": 0.203, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 4.749981144882721, | |
| "grad_norm": 0.21871432662010193, | |
| "learning_rate": 0.0002776425394257986, | |
| "loss": 0.099, | |
| "step": 7875 | |
| }, | |
| { | |
| "epoch": 4.765065238705785, | |
| "grad_norm": 0.3248477280139923, | |
| "learning_rate": 0.0002775667205822887, | |
| "loss": 0.2121, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 4.780149332528849, | |
| "grad_norm": 0.21200844645500183, | |
| "learning_rate": 0.0002774909017387788, | |
| "loss": 0.126, | |
| "step": 7925 | |
| }, | |
| { | |
| "epoch": 4.795233426351912, | |
| "grad_norm": 0.6078742742538452, | |
| "learning_rate": 0.00027741508289526887, | |
| "loss": 0.2062, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 4.8103175201749755, | |
| "grad_norm": 0.260425865650177, | |
| "learning_rate": 0.00027733926405175896, | |
| "loss": 0.1122, | |
| "step": 7975 | |
| }, | |
| { | |
| "epoch": 4.825401613998039, | |
| "grad_norm": 0.4660604000091553, | |
| "learning_rate": 0.00027726344520824904, | |
| "loss": 0.1977, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 4.825401613998039, | |
| "eval_loss": 0.1850125789642334, | |
| "eval_runtime": 772.6249, | |
| "eval_samples_per_second": 4.291, | |
| "eval_steps_per_second": 1.073, | |
| "eval_wer": 0.2094477136532959, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 4.840485707821102, | |
| "grad_norm": 0.2847197949886322, | |
| "learning_rate": 0.00027718762636473913, | |
| "loss": 0.1246, | |
| "step": 8025 | |
| }, | |
| { | |
| "epoch": 4.855569801644166, | |
| "grad_norm": 0.39191052317619324, | |
| "learning_rate": 0.00027711180752122927, | |
| "loss": 0.1994, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 4.87065389546723, | |
| "grad_norm": 0.19209399819374084, | |
| "learning_rate": 0.00027703598867771936, | |
| "loss": 0.1233, | |
| "step": 8075 | |
| }, | |
| { | |
| "epoch": 4.885737989290293, | |
| "grad_norm": 0.4371297359466553, | |
| "learning_rate": 0.00027696016983420944, | |
| "loss": 0.234, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 4.900822083113357, | |
| "grad_norm": 0.25683364272117615, | |
| "learning_rate": 0.00027688435099069953, | |
| "loss": 0.1182, | |
| "step": 8125 | |
| }, | |
| { | |
| "epoch": 4.915906176936421, | |
| "grad_norm": 0.42062830924987793, | |
| "learning_rate": 0.0002768085321471896, | |
| "loss": 0.2268, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 4.930990270759484, | |
| "grad_norm": 0.28874415159225464, | |
| "learning_rate": 0.0002767327133036797, | |
| "loss": 0.1134, | |
| "step": 8175 | |
| }, | |
| { | |
| "epoch": 4.946074364582548, | |
| "grad_norm": 0.28938642144203186, | |
| "learning_rate": 0.00027665689446016985, | |
| "loss": 0.2109, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 4.961158458405611, | |
| "grad_norm": 0.16918736696243286, | |
| "learning_rate": 0.0002765810756166599, | |
| "loss": 0.1132, | |
| "step": 8225 | |
| }, | |
| { | |
| "epoch": 4.976242552228674, | |
| "grad_norm": 0.3270578682422638, | |
| "learning_rate": 0.00027650525677315, | |
| "loss": 0.2191, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 4.991326646051738, | |
| "grad_norm": 0.8475662469863892, | |
| "learning_rate": 0.0002764294379296401, | |
| "loss": 0.146, | |
| "step": 8275 | |
| }, | |
| { | |
| "epoch": 5.006033637529225, | |
| "grad_norm": 0.3081296980381012, | |
| "learning_rate": 0.0002763536190861302, | |
| "loss": 0.2, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 5.021117731352289, | |
| "grad_norm": 0.17431728541851044, | |
| "learning_rate": 0.0002762778002426203, | |
| "loss": 0.17, | |
| "step": 8325 | |
| }, | |
| { | |
| "epoch": 5.036201825175352, | |
| "grad_norm": 0.40710902214050293, | |
| "learning_rate": 0.00027620198139911037, | |
| "loss": 0.1504, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 5.051285918998416, | |
| "grad_norm": 0.19659915566444397, | |
| "learning_rate": 0.00027612616255560046, | |
| "loss": 0.1745, | |
| "step": 8375 | |
| }, | |
| { | |
| "epoch": 5.06637001282148, | |
| "grad_norm": 0.3791191577911377, | |
| "learning_rate": 0.00027605034371209054, | |
| "loss": 0.1248, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 5.081454106644543, | |
| "grad_norm": 0.1905103623867035, | |
| "learning_rate": 0.00027597452486858063, | |
| "loss": 0.1933, | |
| "step": 8425 | |
| }, | |
| { | |
| "epoch": 5.096538200467607, | |
| "grad_norm": 0.28806644678115845, | |
| "learning_rate": 0.0002758987060250707, | |
| "loss": 0.1295, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 5.111622294290671, | |
| "grad_norm": 0.34333837032318115, | |
| "learning_rate": 0.00027582288718156086, | |
| "loss": 0.1778, | |
| "step": 8475 | |
| }, | |
| { | |
| "epoch": 5.1267063881137345, | |
| "grad_norm": 0.5322638154029846, | |
| "learning_rate": 0.0002757470683380509, | |
| "loss": 0.1459, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 5.1267063881137345, | |
| "eval_loss": 0.1905168741941452, | |
| "eval_runtime": 772.0187, | |
| "eval_samples_per_second": 4.294, | |
| "eval_steps_per_second": 1.074, | |
| "eval_wer": 0.21526750526372618, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 5.141790481936797, | |
| "grad_norm": 0.39731988310813904, | |
| "learning_rate": 0.00027567124949454103, | |
| "loss": 0.1731, | |
| "step": 8525 | |
| }, | |
| { | |
| "epoch": 5.156874575759861, | |
| "grad_norm": 0.760844349861145, | |
| "learning_rate": 0.0002755954306510311, | |
| "loss": 0.1466, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 5.171958669582925, | |
| "grad_norm": 0.8529795408248901, | |
| "learning_rate": 0.0002755196118075212, | |
| "loss": 0.1737, | |
| "step": 8575 | |
| }, | |
| { | |
| "epoch": 5.187042763405988, | |
| "grad_norm": 0.45475590229034424, | |
| "learning_rate": 0.0002754437929640113, | |
| "loss": 0.1452, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 5.202126857229052, | |
| "grad_norm": 0.26215818524360657, | |
| "learning_rate": 0.0002753710068742418, | |
| "loss": 0.4218, | |
| "step": 8625 | |
| }, | |
| { | |
| "epoch": 5.217210951052116, | |
| "grad_norm": 0.38315796852111816, | |
| "learning_rate": 0.0002752951880307319, | |
| "loss": 0.1261, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 5.232295044875179, | |
| "grad_norm": 0.41056641936302185, | |
| "learning_rate": 0.00027521936918722197, | |
| "loss": 0.162, | |
| "step": 8675 | |
| }, | |
| { | |
| "epoch": 5.247379138698243, | |
| "grad_norm": 0.22341406345367432, | |
| "learning_rate": 0.00027514355034371205, | |
| "loss": 0.1305, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 5.2624632325213065, | |
| "grad_norm": 0.2595282793045044, | |
| "learning_rate": 0.00027506773150020214, | |
| "loss": 0.1655, | |
| "step": 8725 | |
| }, | |
| { | |
| "epoch": 5.2775473263443695, | |
| "grad_norm": 0.2871881127357483, | |
| "learning_rate": 0.0002749919126566923, | |
| "loss": 0.1347, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 5.292631420167433, | |
| "grad_norm": 0.17933253943920135, | |
| "learning_rate": 0.0002749160938131823, | |
| "loss": 0.1715, | |
| "step": 8775 | |
| }, | |
| { | |
| "epoch": 5.307715513990497, | |
| "grad_norm": 0.5562332272529602, | |
| "learning_rate": 0.00027484027496967246, | |
| "loss": 0.1298, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 5.32279960781356, | |
| "grad_norm": 0.2567090392112732, | |
| "learning_rate": 0.00027476445612616254, | |
| "loss": 0.1673, | |
| "step": 8825 | |
| }, | |
| { | |
| "epoch": 5.337883701636624, | |
| "grad_norm": 0.3126806914806366, | |
| "learning_rate": 0.00027468863728265263, | |
| "loss": 0.126, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 5.352967795459688, | |
| "grad_norm": 0.16551165282726288, | |
| "learning_rate": 0.0002746128184391427, | |
| "loss": 0.1633, | |
| "step": 8875 | |
| }, | |
| { | |
| "epoch": 5.368051889282752, | |
| "grad_norm": 0.337634414434433, | |
| "learning_rate": 0.0002745369995956328, | |
| "loss": 0.1204, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 5.383135983105815, | |
| "grad_norm": 0.36297717690467834, | |
| "learning_rate": 0.0002744611807521229, | |
| "loss": 0.1927, | |
| "step": 8925 | |
| }, | |
| { | |
| "epoch": 5.398220076928879, | |
| "grad_norm": 0.3347633481025696, | |
| "learning_rate": 0.00027438536190861303, | |
| "loss": 0.1434, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 5.4133041707519425, | |
| "grad_norm": 0.2264309823513031, | |
| "learning_rate": 0.00027430954306510307, | |
| "loss": 0.2008, | |
| "step": 8975 | |
| }, | |
| { | |
| "epoch": 5.4283882645750055, | |
| "grad_norm": 0.263372004032135, | |
| "learning_rate": 0.0002742337242215932, | |
| "loss": 0.1471, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 5.4283882645750055, | |
| "eval_loss": 0.1863842010498047, | |
| "eval_runtime": 767.0229, | |
| "eval_samples_per_second": 4.322, | |
| "eval_steps_per_second": 1.081, | |
| "eval_wer": 0.20231064082492037, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 5.443472358398069, | |
| "grad_norm": 0.20563261210918427, | |
| "learning_rate": 0.0002741579053780833, | |
| "loss": 0.1721, | |
| "step": 9025 | |
| }, | |
| { | |
| "epoch": 5.458556452221133, | |
| "grad_norm": 0.26344984769821167, | |
| "learning_rate": 0.0002740820865345733, | |
| "loss": 0.1332, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 5.473640546044196, | |
| "grad_norm": 0.2505332827568054, | |
| "learning_rate": 0.00027400626769106347, | |
| "loss": 0.1638, | |
| "step": 9075 | |
| }, | |
| { | |
| "epoch": 5.48872463986726, | |
| "grad_norm": 0.5164260864257812, | |
| "learning_rate": 0.00027393044884755355, | |
| "loss": 0.1311, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 5.503808733690324, | |
| "grad_norm": 0.15487757325172424, | |
| "learning_rate": 0.00027385463000404364, | |
| "loss": 0.1715, | |
| "step": 9125 | |
| }, | |
| { | |
| "epoch": 5.518892827513387, | |
| "grad_norm": 0.46957388520240784, | |
| "learning_rate": 0.00027377881116053373, | |
| "loss": 0.1392, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 5.533976921336451, | |
| "grad_norm": 0.20295724272727966, | |
| "learning_rate": 0.0002737029923170238, | |
| "loss": 0.2009, | |
| "step": 9175 | |
| }, | |
| { | |
| "epoch": 5.5490610151595146, | |
| "grad_norm": 0.27826988697052, | |
| "learning_rate": 0.0002736271734735139, | |
| "loss": 0.1404, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 5.5641451089825775, | |
| "grad_norm": 0.2126350849866867, | |
| "learning_rate": 0.00027355135463000404, | |
| "loss": 0.1784, | |
| "step": 9225 | |
| }, | |
| { | |
| "epoch": 5.579229202805641, | |
| "grad_norm": 0.4118373692035675, | |
| "learning_rate": 0.00027347553578649413, | |
| "loss": 0.1309, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 5.594313296628705, | |
| "grad_norm": 0.16608783602714539, | |
| "learning_rate": 0.0002733997169429842, | |
| "loss": 0.1532, | |
| "step": 9275 | |
| }, | |
| { | |
| "epoch": 5.609397390451768, | |
| "grad_norm": 0.3244372308254242, | |
| "learning_rate": 0.0002733238980994743, | |
| "loss": 0.124, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 5.624481484274832, | |
| "grad_norm": 0.2650703489780426, | |
| "learning_rate": 0.0002732480792559644, | |
| "loss": 0.1924, | |
| "step": 9325 | |
| }, | |
| { | |
| "epoch": 5.639565578097896, | |
| "grad_norm": 0.4252435564994812, | |
| "learning_rate": 0.0002731722604124545, | |
| "loss": 0.1336, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 5.654649671920959, | |
| "grad_norm": 0.18568560481071472, | |
| "learning_rate": 0.00027309644156894457, | |
| "loss": 0.167, | |
| "step": 9375 | |
| }, | |
| { | |
| "epoch": 5.669733765744023, | |
| "grad_norm": 0.30477166175842285, | |
| "learning_rate": 0.00027302062272543465, | |
| "loss": 0.128, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 5.684817859567087, | |
| "grad_norm": 0.19481921195983887, | |
| "learning_rate": 0.0002729448038819248, | |
| "loss": 0.1736, | |
| "step": 9425 | |
| }, | |
| { | |
| "epoch": 5.69990195339015, | |
| "grad_norm": 0.5762272477149963, | |
| "learning_rate": 0.0002728689850384149, | |
| "loss": 0.1353, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 5.7149860472132135, | |
| "grad_norm": 0.9085726141929626, | |
| "learning_rate": 0.00027279316619490497, | |
| "loss": 0.1679, | |
| "step": 9475 | |
| }, | |
| { | |
| "epoch": 5.730070141036277, | |
| "grad_norm": 0.5637058615684509, | |
| "learning_rate": 0.00027271734735139505, | |
| "loss": 0.1528, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 5.730070141036277, | |
| "eval_loss": 0.1828906089067459, | |
| "eval_runtime": 770.1353, | |
| "eval_samples_per_second": 4.304, | |
| "eval_steps_per_second": 1.076, | |
| "eval_wer": 0.208940236462776, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 5.745154234859341, | |
| "grad_norm": 0.1850712150335312, | |
| "learning_rate": 0.00027264152850788514, | |
| "loss": 0.1782, | |
| "step": 9525 | |
| }, | |
| { | |
| "epoch": 5.760238328682404, | |
| "grad_norm": 0.4733022153377533, | |
| "learning_rate": 0.00027256570966437523, | |
| "loss": 0.136, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 5.775322422505468, | |
| "grad_norm": 0.2352697253227234, | |
| "learning_rate": 0.0002724898908208653, | |
| "loss": 0.1636, | |
| "step": 9575 | |
| }, | |
| { | |
| "epoch": 5.790406516328532, | |
| "grad_norm": 0.297294557094574, | |
| "learning_rate": 0.0002724140719773554, | |
| "loss": 0.1399, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 5.805490610151595, | |
| "grad_norm": 0.22856192290782928, | |
| "learning_rate": 0.0002723382531338455, | |
| "loss": 0.1672, | |
| "step": 9625 | |
| }, | |
| { | |
| "epoch": 5.820574703974659, | |
| "grad_norm": 0.19410184025764465, | |
| "learning_rate": 0.00027226243429033563, | |
| "loss": 0.1342, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 5.835658797797723, | |
| "grad_norm": 0.15539394319057465, | |
| "learning_rate": 0.00027218661544682566, | |
| "loss": 0.178, | |
| "step": 9675 | |
| }, | |
| { | |
| "epoch": 5.8507428916207855, | |
| "grad_norm": 0.27372846007347107, | |
| "learning_rate": 0.0002721107966033158, | |
| "loss": 0.1359, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 5.865826985443849, | |
| "grad_norm": 0.1481178253889084, | |
| "learning_rate": 0.0002720349777598059, | |
| "loss": 0.1484, | |
| "step": 9725 | |
| }, | |
| { | |
| "epoch": 5.880911079266913, | |
| "grad_norm": 0.22347645461559296, | |
| "learning_rate": 0.000271959158916296, | |
| "loss": 0.1372, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 5.895995173089977, | |
| "grad_norm": 0.13955964148044586, | |
| "learning_rate": 0.00027188334007278607, | |
| "loss": 0.1506, | |
| "step": 9775 | |
| }, | |
| { | |
| "epoch": 5.91107926691304, | |
| "grad_norm": 0.44418036937713623, | |
| "learning_rate": 0.00027180752122927615, | |
| "loss": 0.129, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 5.926163360736104, | |
| "grad_norm": 0.18400181829929352, | |
| "learning_rate": 0.00027173170238576624, | |
| "loss": 0.1812, | |
| "step": 9825 | |
| }, | |
| { | |
| "epoch": 5.941247454559168, | |
| "grad_norm": 0.3210029602050781, | |
| "learning_rate": 0.0002716558835422564, | |
| "loss": 0.1279, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 5.956331548382231, | |
| "grad_norm": 0.4363233745098114, | |
| "learning_rate": 0.0002715800646987464, | |
| "loss": 0.1578, | |
| "step": 9875 | |
| }, | |
| { | |
| "epoch": 5.971415642205295, | |
| "grad_norm": 0.9395078420639038, | |
| "learning_rate": 0.00027150424585523655, | |
| "loss": 0.1398, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 5.9864997360283585, | |
| "grad_norm": 0.16781263053417206, | |
| "learning_rate": 0.00027142842701172664, | |
| "loss": 0.1687, | |
| "step": 9925 | |
| }, | |
| { | |
| "epoch": 6.001206727505845, | |
| "grad_norm": 0.5314217209815979, | |
| "learning_rate": 0.0002713526081682167, | |
| "loss": 0.1023, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 6.016290821328909, | |
| "grad_norm": 0.503394365310669, | |
| "learning_rate": 0.0002712767893247068, | |
| "loss": 0.2136, | |
| "step": 9975 | |
| }, | |
| { | |
| "epoch": 6.031374915151972, | |
| "grad_norm": 0.38936060667037964, | |
| "learning_rate": 0.0002712009704811969, | |
| "loss": 0.0668, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 6.031374915151972, | |
| "eval_loss": 0.19079850614070892, | |
| "eval_runtime": 765.219, | |
| "eval_samples_per_second": 4.332, | |
| "eval_steps_per_second": 1.083, | |
| "eval_wer": 0.20498839280894024, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 6.031374915151972, | |
| "step": 10000, | |
| "total_flos": 7.129096476605158e+19, | |
| "train_loss": 0.34351529302597045, | |
| "train_runtime": 124569.2108, | |
| "train_samples_per_second": 12.772, | |
| "train_steps_per_second": 0.798 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 99420, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 60, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 5 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.129096476605158e+19, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |