| { |
| "best_global_step": 5000, |
| "best_metric": 0.20783165635834974, |
| "best_model_checkpoint": "/home/florent/milo/models/whisper-mg-v1/checkpoint-5000", |
| "epoch": 3.946285069787114, |
| "eval_steps": 1000, |
| "global_step": 7000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.014098406880022557, |
| "grad_norm": 84.86420440673828, |
| "learning_rate": 4.800000000000001e-07, |
| "loss": 17.227703857421876, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.028196813760045115, |
| "grad_norm": 66.53484344482422, |
| "learning_rate": 9.800000000000001e-07, |
| "loss": 13.832662353515625, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.042295220640067674, |
| "grad_norm": 43.68435287475586, |
| "learning_rate": 1.48e-06, |
| "loss": 10.33947021484375, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.05639362752009023, |
| "grad_norm": 34.214447021484375, |
| "learning_rate": 1.98e-06, |
| "loss": 7.374788818359375, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.07049203440011279, |
| "grad_norm": 27.010826110839844, |
| "learning_rate": 2.4800000000000004e-06, |
| "loss": 5.627406005859375, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.08459044128013535, |
| "grad_norm": 24.344482421875, |
| "learning_rate": 2.9800000000000003e-06, |
| "loss": 4.657888793945313, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0986888481601579, |
| "grad_norm": 24.166223526000977, |
| "learning_rate": 3.48e-06, |
| "loss": 3.687208251953125, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.11278725504018046, |
| "grad_norm": 23.202571868896484, |
| "learning_rate": 3.980000000000001e-06, |
| "loss": 2.8217697143554688, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.126885661920203, |
| "grad_norm": 21.142698287963867, |
| "learning_rate": 4.48e-06, |
| "loss": 2.384236602783203, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.14098406880022557, |
| "grad_norm": 21.08614730834961, |
| "learning_rate": 4.980000000000001e-06, |
| "loss": 2.1562757873535157, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.15508247568024813, |
| "grad_norm": 20.73036003112793, |
| "learning_rate": 5.480000000000001e-06, |
| "loss": 2.0236280822753905, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.1691808825602707, |
| "grad_norm": 21.132606506347656, |
| "learning_rate": 5.98e-06, |
| "loss": 1.865457763671875, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.18327928944029326, |
| "grad_norm": 20.846128463745117, |
| "learning_rate": 6.480000000000001e-06, |
| "loss": 1.7576261901855468, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.1973776963203158, |
| "grad_norm": 18.715116500854492, |
| "learning_rate": 6.98e-06, |
| "loss": 1.6428683471679688, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.21147610320033836, |
| "grad_norm": 18.342042922973633, |
| "learning_rate": 7.48e-06, |
| "loss": 1.6423768615722656, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.22557451008036092, |
| "grad_norm": 19.184682846069336, |
| "learning_rate": 7.980000000000002e-06, |
| "loss": 1.6796397399902343, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.23967291696038348, |
| "grad_norm": 15.670419692993164, |
| "learning_rate": 8.48e-06, |
| "loss": 1.5983003234863282, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.253771323840406, |
| "grad_norm": 17.033227920532227, |
| "learning_rate": 8.98e-06, |
| "loss": 1.5592161560058593, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2678697307204286, |
| "grad_norm": 17.4881649017334, |
| "learning_rate": 9.48e-06, |
| "loss": 1.5372105407714844, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.28196813760045114, |
| "grad_norm": 14.954177856445312, |
| "learning_rate": 9.980000000000001e-06, |
| "loss": 1.3610572814941406, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2960665444804737, |
| "grad_norm": 14.768590927124023, |
| "learning_rate": 9.986078886310906e-06, |
| "loss": 1.4396812438964843, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.31016495136049627, |
| "grad_norm": 14.442098617553711, |
| "learning_rate": 9.971577726218099e-06, |
| "loss": 1.3178257751464844, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.32426335824051883, |
| "grad_norm": 15.411622047424316, |
| "learning_rate": 9.957076566125291e-06, |
| "loss": 1.4055836486816407, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.3383617651205414, |
| "grad_norm": 15.339015007019043, |
| "learning_rate": 9.942575406032482e-06, |
| "loss": 1.263025665283203, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.35246017200056395, |
| "grad_norm": 12.730027198791504, |
| "learning_rate": 9.928074245939677e-06, |
| "loss": 1.3695268249511718, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.3665585788805865, |
| "grad_norm": 15.123625755310059, |
| "learning_rate": 9.913573085846868e-06, |
| "loss": 1.2718246459960938, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.380656985760609, |
| "grad_norm": 15.623430252075195, |
| "learning_rate": 9.899071925754062e-06, |
| "loss": 1.2624960327148438, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.3947553926406316, |
| "grad_norm": 14.083202362060547, |
| "learning_rate": 9.884570765661253e-06, |
| "loss": 1.312706756591797, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.40885379952065415, |
| "grad_norm": 14.791698455810547, |
| "learning_rate": 9.870069605568446e-06, |
| "loss": 1.1517729187011718, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.4229522064006767, |
| "grad_norm": 16.083740234375, |
| "learning_rate": 9.855568445475639e-06, |
| "loss": 1.3111384582519532, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.4370506132806993, |
| "grad_norm": 12.33295726776123, |
| "learning_rate": 9.841067285382831e-06, |
| "loss": 1.1583899688720702, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.45114902016072184, |
| "grad_norm": 15.303627014160156, |
| "learning_rate": 9.826566125290024e-06, |
| "loss": 1.1577481079101561, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.4652474270407444, |
| "grad_norm": 14.345124244689941, |
| "learning_rate": 9.812064965197217e-06, |
| "loss": 1.1901895904541016, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.47934583392076696, |
| "grad_norm": 14.913029670715332, |
| "learning_rate": 9.79756380510441e-06, |
| "loss": 1.1890618896484375, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.4934442408007895, |
| "grad_norm": 13.710859298706055, |
| "learning_rate": 9.783062645011602e-06, |
| "loss": 1.1438979339599609, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.507542647680812, |
| "grad_norm": 13.514805793762207, |
| "learning_rate": 9.768561484918795e-06, |
| "loss": 1.1654217529296875, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.5216410545608346, |
| "grad_norm": 12.146404266357422, |
| "learning_rate": 9.754060324825988e-06, |
| "loss": 1.1279725646972656, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.5357394614408572, |
| "grad_norm": 14.747977256774902, |
| "learning_rate": 9.73955916473318e-06, |
| "loss": 1.0921778106689453, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.5498378683208798, |
| "grad_norm": 12.012781143188477, |
| "learning_rate": 9.725058004640371e-06, |
| "loss": 1.089789581298828, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.5639362752009023, |
| "grad_norm": 14.096403121948242, |
| "learning_rate": 9.710556844547566e-06, |
| "loss": 1.09536865234375, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.5639362752009023, |
| "eval_loss": 0.3027215600013733, |
| "eval_runtime": 1414.4639, |
| "eval_samples_per_second": 2.191, |
| "eval_steps_per_second": 0.274, |
| "eval_wer": 0.25130644178215644, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.5780346820809249, |
| "grad_norm": 12.812657356262207, |
| "learning_rate": 9.696055684454757e-06, |
| "loss": 1.073106155395508, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.5921330889609474, |
| "grad_norm": 12.071333885192871, |
| "learning_rate": 9.68155452436195e-06, |
| "loss": 1.0373800659179688, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.60623149584097, |
| "grad_norm": 14.140727043151855, |
| "learning_rate": 9.667053364269142e-06, |
| "loss": 1.0855345916748047, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.6203299027209925, |
| "grad_norm": 12.884740829467773, |
| "learning_rate": 9.652552204176335e-06, |
| "loss": 1.0462813568115235, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.634428309601015, |
| "grad_norm": 12.38447093963623, |
| "learning_rate": 9.638051044083528e-06, |
| "loss": 1.0593311309814453, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.6485267164810377, |
| "grad_norm": 13.830496788024902, |
| "learning_rate": 9.62354988399072e-06, |
| "loss": 1.0979310607910155, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.6626251233610602, |
| "grad_norm": 14.614953994750977, |
| "learning_rate": 9.609048723897913e-06, |
| "loss": 1.1127804565429686, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.6767235302410828, |
| "grad_norm": 13.682169914245605, |
| "learning_rate": 9.594547563805106e-06, |
| "loss": 1.0035169982910157, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.6908219371211053, |
| "grad_norm": 13.420117378234863, |
| "learning_rate": 9.580046403712297e-06, |
| "loss": 1.0495610046386719, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.7049203440011279, |
| "grad_norm": 12.9420166015625, |
| "learning_rate": 9.565545243619491e-06, |
| "loss": 1.060257339477539, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.7190187508811504, |
| "grad_norm": 12.879499435424805, |
| "learning_rate": 9.551044083526682e-06, |
| "loss": 1.0332550811767578, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.733117157761173, |
| "grad_norm": 13.018166542053223, |
| "learning_rate": 9.536542923433877e-06, |
| "loss": 1.0661671447753907, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.7472155646411955, |
| "grad_norm": 13.178157806396484, |
| "learning_rate": 9.522041763341068e-06, |
| "loss": 0.9954322052001953, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.761313971521218, |
| "grad_norm": 15.271596908569336, |
| "learning_rate": 9.50754060324826e-06, |
| "loss": 1.0291824340820312, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.7754123784012407, |
| "grad_norm": 17.327726364135742, |
| "learning_rate": 9.493039443155453e-06, |
| "loss": 1.0398453521728515, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.7895107852812632, |
| "grad_norm": 13.380585670471191, |
| "learning_rate": 9.478538283062646e-06, |
| "loss": 0.9851990509033203, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.8036091921612858, |
| "grad_norm": 11.947903633117676, |
| "learning_rate": 9.464037122969838e-06, |
| "loss": 0.9933275604248046, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.8177075990413083, |
| "grad_norm": 11.313209533691406, |
| "learning_rate": 9.449535962877031e-06, |
| "loss": 0.95019775390625, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.8318060059213309, |
| "grad_norm": 9.134963035583496, |
| "learning_rate": 9.435034802784224e-06, |
| "loss": 0.9632527160644532, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.8459044128013534, |
| "grad_norm": 11.307052612304688, |
| "learning_rate": 9.420533642691417e-06, |
| "loss": 1.0088771820068358, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.860002819681376, |
| "grad_norm": 11.490585327148438, |
| "learning_rate": 9.406032482598608e-06, |
| "loss": 0.975683822631836, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.8741012265613985, |
| "grad_norm": 11.09744930267334, |
| "learning_rate": 9.391531322505802e-06, |
| "loss": 0.9516730499267578, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.8881996334414212, |
| "grad_norm": 12.852828979492188, |
| "learning_rate": 9.377030162412993e-06, |
| "loss": 0.9338680267333984, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.9022980403214437, |
| "grad_norm": 13.335673332214355, |
| "learning_rate": 9.362529002320186e-06, |
| "loss": 0.9313024139404297, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.9163964472014662, |
| "grad_norm": 11.356801986694336, |
| "learning_rate": 9.348027842227378e-06, |
| "loss": 1.0073654174804687, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.9304948540814888, |
| "grad_norm": 13.629708290100098, |
| "learning_rate": 9.333526682134571e-06, |
| "loss": 1.007170639038086, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.9445932609615113, |
| "grad_norm": 9.606148719787598, |
| "learning_rate": 9.319025522041764e-06, |
| "loss": 0.9286507415771484, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.9586916678415339, |
| "grad_norm": 9.82172679901123, |
| "learning_rate": 9.304524361948957e-06, |
| "loss": 0.9336747741699218, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.9727900747215564, |
| "grad_norm": 11.669585227966309, |
| "learning_rate": 9.29002320185615e-06, |
| "loss": 0.9786383819580078, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.986888481601579, |
| "grad_norm": 11.385014533996582, |
| "learning_rate": 9.275522041763342e-06, |
| "loss": 0.9959779357910157, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.0005639362752008, |
| "grad_norm": 11.361459732055664, |
| "learning_rate": 9.261020881670535e-06, |
| "loss": 0.9319418334960937, |
| "step": 1775 |
| }, |
| { |
| "epoch": 1.0146623431552235, |
| "grad_norm": 10.459379196166992, |
| "learning_rate": 9.246519721577727e-06, |
| "loss": 0.7039449310302734, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.028760750035246, |
| "grad_norm": 8.165692329406738, |
| "learning_rate": 9.23201856148492e-06, |
| "loss": 0.7685092926025391, |
| "step": 1825 |
| }, |
| { |
| "epoch": 1.0428591569152685, |
| "grad_norm": 9.223814010620117, |
| "learning_rate": 9.217517401392111e-06, |
| "loss": 0.7643940734863282, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.056957563795291, |
| "grad_norm": 12.819178581237793, |
| "learning_rate": 9.203016241299306e-06, |
| "loss": 0.7673802185058594, |
| "step": 1875 |
| }, |
| { |
| "epoch": 1.0710559706753138, |
| "grad_norm": 8.873549461364746, |
| "learning_rate": 9.188515081206497e-06, |
| "loss": 0.7162540435791016, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.0851543775553363, |
| "grad_norm": 11.80384349822998, |
| "learning_rate": 9.174013921113691e-06, |
| "loss": 0.7582861328125, |
| "step": 1925 |
| }, |
| { |
| "epoch": 1.0992527844353588, |
| "grad_norm": 10.77530288696289, |
| "learning_rate": 9.159512761020882e-06, |
| "loss": 0.7159561920166015, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.1133511913153813, |
| "grad_norm": 11.646292686462402, |
| "learning_rate": 9.145011600928075e-06, |
| "loss": 0.7990260314941406, |
| "step": 1975 |
| }, |
| { |
| "epoch": 1.1274495981954038, |
| "grad_norm": 11.254467010498047, |
| "learning_rate": 9.130510440835267e-06, |
| "loss": 0.7786456298828125, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.1274495981954038, |
| "eval_loss": 0.2605888545513153, |
| "eval_runtime": 1451.9513, |
| "eval_samples_per_second": 2.134, |
| "eval_steps_per_second": 0.267, |
| "eval_wer": 0.22206013129365215, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.1415480050754265, |
| "grad_norm": 8.868310928344727, |
| "learning_rate": 9.11600928074246e-06, |
| "loss": 0.7087932586669922, |
| "step": 2025 |
| }, |
| { |
| "epoch": 1.155646411955449, |
| "grad_norm": 11.000504493713379, |
| "learning_rate": 9.101508120649653e-06, |
| "loss": 0.7248979949951172, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.1697448188354715, |
| "grad_norm": 12.025858879089355, |
| "learning_rate": 9.087006960556846e-06, |
| "loss": 0.7824922943115235, |
| "step": 2075 |
| }, |
| { |
| "epoch": 1.1838432257154943, |
| "grad_norm": 12.042730331420898, |
| "learning_rate": 9.072505800464038e-06, |
| "loss": 0.8172999572753906, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.1979416325955168, |
| "grad_norm": 9.696090698242188, |
| "learning_rate": 9.058004640371231e-06, |
| "loss": 0.8125002288818359, |
| "step": 2125 |
| }, |
| { |
| "epoch": 1.2120400394755393, |
| "grad_norm": 10.16943359375, |
| "learning_rate": 9.043503480278422e-06, |
| "loss": 0.76618408203125, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.2261384463555618, |
| "grad_norm": 9.093573570251465, |
| "learning_rate": 9.029002320185616e-06, |
| "loss": 0.7104488372802734, |
| "step": 2175 |
| }, |
| { |
| "epoch": 1.2402368532355843, |
| "grad_norm": 10.546662330627441, |
| "learning_rate": 9.014501160092808e-06, |
| "loss": 0.7818730926513672, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.254335260115607, |
| "grad_norm": 12.367358207702637, |
| "learning_rate": 9e-06, |
| "loss": 0.8542975616455079, |
| "step": 2225 |
| }, |
| { |
| "epoch": 1.2684336669956295, |
| "grad_norm": 10.952892303466797, |
| "learning_rate": 8.985498839907193e-06, |
| "loss": 0.7449074554443359, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.282532073875652, |
| "grad_norm": 8.846220970153809, |
| "learning_rate": 8.970997679814386e-06, |
| "loss": 0.7692269897460937, |
| "step": 2275 |
| }, |
| { |
| "epoch": 1.2966304807556746, |
| "grad_norm": 11.092299461364746, |
| "learning_rate": 8.956496519721578e-06, |
| "loss": 0.770924072265625, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.310728887635697, |
| "grad_norm": 11.600993156433105, |
| "learning_rate": 8.941995359628771e-06, |
| "loss": 0.8001760864257812, |
| "step": 2325 |
| }, |
| { |
| "epoch": 1.3248272945157198, |
| "grad_norm": 9.988462448120117, |
| "learning_rate": 8.927494199535964e-06, |
| "loss": 0.736719970703125, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.3389257013957423, |
| "grad_norm": 11.111364364624023, |
| "learning_rate": 8.912993039443157e-06, |
| "loss": 0.7216539001464843, |
| "step": 2375 |
| }, |
| { |
| "epoch": 1.3530241082757648, |
| "grad_norm": 12.4348726272583, |
| "learning_rate": 8.898491879350348e-06, |
| "loss": 0.8057781219482422, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.3671225151557875, |
| "grad_norm": 11.37057876586914, |
| "learning_rate": 8.883990719257542e-06, |
| "loss": 0.7377596282958985, |
| "step": 2425 |
| }, |
| { |
| "epoch": 1.3812209220358098, |
| "grad_norm": 8.443863868713379, |
| "learning_rate": 8.869489559164733e-06, |
| "loss": 0.764500732421875, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.3953193289158325, |
| "grad_norm": 11.640331268310547, |
| "learning_rate": 8.854988399071927e-06, |
| "loss": 0.7854479217529297, |
| "step": 2475 |
| }, |
| { |
| "epoch": 1.409417735795855, |
| "grad_norm": 9.915708541870117, |
| "learning_rate": 8.840487238979118e-06, |
| "loss": 0.7995922088623046, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.4235161426758776, |
| "grad_norm": 12.385272979736328, |
| "learning_rate": 8.825986078886311e-06, |
| "loss": 0.7540443420410157, |
| "step": 2525 |
| }, |
| { |
| "epoch": 1.4376145495559003, |
| "grad_norm": 10.713929176330566, |
| "learning_rate": 8.811484918793504e-06, |
| "loss": 0.7403028869628906, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.4517129564359228, |
| "grad_norm": 10.878042221069336, |
| "learning_rate": 8.796983758700697e-06, |
| "loss": 0.7737533569335937, |
| "step": 2575 |
| }, |
| { |
| "epoch": 1.4658113633159453, |
| "grad_norm": 9.902074813842773, |
| "learning_rate": 8.78248259860789e-06, |
| "loss": 0.752545166015625, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.4799097701959678, |
| "grad_norm": 9.259173393249512, |
| "learning_rate": 8.767981438515082e-06, |
| "loss": 0.7878742980957031, |
| "step": 2625 |
| }, |
| { |
| "epoch": 1.4940081770759903, |
| "grad_norm": 11.374395370483398, |
| "learning_rate": 8.753480278422275e-06, |
| "loss": 0.7678947448730469, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.508106583956013, |
| "grad_norm": 10.379016876220703, |
| "learning_rate": 8.738979118329467e-06, |
| "loss": 0.7748676300048828, |
| "step": 2675 |
| }, |
| { |
| "epoch": 1.5222049908360356, |
| "grad_norm": 11.049686431884766, |
| "learning_rate": 8.72447795823666e-06, |
| "loss": 0.7766862487792969, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.536303397716058, |
| "grad_norm": 10.903966903686523, |
| "learning_rate": 8.709976798143853e-06, |
| "loss": 0.7612065124511719, |
| "step": 2725 |
| }, |
| { |
| "epoch": 1.5504018045960808, |
| "grad_norm": 11.366527557373047, |
| "learning_rate": 8.695475638051046e-06, |
| "loss": 0.7193534088134765, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.564500211476103, |
| "grad_norm": 9.587455749511719, |
| "learning_rate": 8.680974477958237e-06, |
| "loss": 0.6888518524169922, |
| "step": 2775 |
| }, |
| { |
| "epoch": 1.5785986183561258, |
| "grad_norm": 10.296839714050293, |
| "learning_rate": 8.666473317865431e-06, |
| "loss": 0.7584939575195313, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.5926970252361483, |
| "grad_norm": 13.031759262084961, |
| "learning_rate": 8.651972157772622e-06, |
| "loss": 0.8102165985107422, |
| "step": 2825 |
| }, |
| { |
| "epoch": 1.6067954321161708, |
| "grad_norm": 10.904520988464355, |
| "learning_rate": 8.637470997679815e-06, |
| "loss": 0.7805465698242188, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.6208938389961935, |
| "grad_norm": 9.31521224975586, |
| "learning_rate": 8.622969837587007e-06, |
| "loss": 0.7300022125244141, |
| "step": 2875 |
| }, |
| { |
| "epoch": 1.6349922458762158, |
| "grad_norm": 11.240145683288574, |
| "learning_rate": 8.6084686774942e-06, |
| "loss": 0.783562240600586, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.6490906527562386, |
| "grad_norm": 10.595625877380371, |
| "learning_rate": 8.593967517401393e-06, |
| "loss": 0.7286166381835938, |
| "step": 2925 |
| }, |
| { |
| "epoch": 1.663189059636261, |
| "grad_norm": 9.539634704589844, |
| "learning_rate": 8.579466357308586e-06, |
| "loss": 0.7374664306640625, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.6772874665162836, |
| "grad_norm": 11.489903450012207, |
| "learning_rate": 8.564965197215778e-06, |
| "loss": 0.7891261291503906, |
| "step": 2975 |
| }, |
| { |
| "epoch": 1.6913858733963063, |
| "grad_norm": 11.152668952941895, |
| "learning_rate": 8.550464037122971e-06, |
| "loss": 0.750862808227539, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.6913858733963063, |
| "eval_loss": 0.2470168024301529, |
| "eval_runtime": 1517.4032, |
| "eval_samples_per_second": 2.042, |
| "eval_steps_per_second": 0.256, |
| "eval_wer": 0.21129884793317413, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.7054842802763288, |
| "grad_norm": 10.490551948547363, |
| "learning_rate": 8.535962877030162e-06, |
| "loss": 0.7084814453125, |
| "step": 3025 |
| }, |
| { |
| "epoch": 1.7195826871563513, |
| "grad_norm": 11.868510246276855, |
| "learning_rate": 8.521461716937356e-06, |
| "loss": 0.7457318115234375, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.733681094036374, |
| "grad_norm": 9.93341064453125, |
| "learning_rate": 8.506960556844547e-06, |
| "loss": 0.7481878662109375, |
| "step": 3075 |
| }, |
| { |
| "epoch": 1.7477795009163963, |
| "grad_norm": 9.801637649536133, |
| "learning_rate": 8.492459396751742e-06, |
| "loss": 0.7577263641357422, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.761877907796419, |
| "grad_norm": 9.962018013000488, |
| "learning_rate": 8.477958236658933e-06, |
| "loss": 0.7176610565185547, |
| "step": 3125 |
| }, |
| { |
| "epoch": 1.7759763146764416, |
| "grad_norm": 10.016621589660645, |
| "learning_rate": 8.463457076566126e-06, |
| "loss": 0.8129973602294922, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.790074721556464, |
| "grad_norm": 10.95247745513916, |
| "learning_rate": 8.448955916473318e-06, |
| "loss": 0.76224609375, |
| "step": 3175 |
| }, |
| { |
| "epoch": 1.8041731284364868, |
| "grad_norm": 10.412908554077148, |
| "learning_rate": 8.434454756380511e-06, |
| "loss": 0.72124267578125, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.818271535316509, |
| "grad_norm": 10.183904647827148, |
| "learning_rate": 8.419953596287704e-06, |
| "loss": 0.7625586700439453, |
| "step": 3225 |
| }, |
| { |
| "epoch": 1.8323699421965318, |
| "grad_norm": 10.891714096069336, |
| "learning_rate": 8.405452436194896e-06, |
| "loss": 0.719871826171875, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.8464683490765543, |
| "grad_norm": 10.172575950622559, |
| "learning_rate": 8.390951276102089e-06, |
| "loss": 0.753576889038086, |
| "step": 3275 |
| }, |
| { |
| "epoch": 1.8605667559565768, |
| "grad_norm": 9.48585033416748, |
| "learning_rate": 8.376450116009282e-06, |
| "loss": 0.7430252838134765, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.8746651628365996, |
| "grad_norm": 11.499151229858398, |
| "learning_rate": 8.361948955916473e-06, |
| "loss": 0.7010813903808594, |
| "step": 3325 |
| }, |
| { |
| "epoch": 1.888763569716622, |
| "grad_norm": 9.432136535644531, |
| "learning_rate": 8.347447795823667e-06, |
| "loss": 0.71728759765625, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.9028619765966446, |
| "grad_norm": 8.735350608825684, |
| "learning_rate": 8.332946635730858e-06, |
| "loss": 0.8144132232666016, |
| "step": 3375 |
| }, |
| { |
| "epoch": 1.916960383476667, |
| "grad_norm": 9.190115928649902, |
| "learning_rate": 8.318445475638051e-06, |
| "loss": 0.7769715881347656, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.9310587903566896, |
| "grad_norm": 8.803107261657715, |
| "learning_rate": 8.303944315545245e-06, |
| "loss": 0.7007711791992187, |
| "step": 3425 |
| }, |
| { |
| "epoch": 1.9451571972367123, |
| "grad_norm": 10.293452262878418, |
| "learning_rate": 8.289443155452436e-06, |
| "loss": 0.7087128448486328, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.9592556041167348, |
| "grad_norm": 11.334561347961426, |
| "learning_rate": 8.27494199535963e-06, |
| "loss": 0.7811639404296875, |
| "step": 3475 |
| }, |
| { |
| "epoch": 1.9733540109967573, |
| "grad_norm": 9.957331657409668, |
| "learning_rate": 8.260440835266822e-06, |
| "loss": 0.7865208435058594, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.98745241787678, |
| "grad_norm": 12.995558738708496, |
| "learning_rate": 8.245939675174015e-06, |
| "loss": 0.7895949554443359, |
| "step": 3525 |
| }, |
| { |
| "epoch": 2.0011278725504016, |
| "grad_norm": 9.675638198852539, |
| "learning_rate": 8.231438515081207e-06, |
| "loss": 0.721746826171875, |
| "step": 3550 |
| }, |
| { |
| "epoch": 2.0152262794304243, |
| "grad_norm": 18.80123519897461, |
| "learning_rate": 8.2169373549884e-06, |
| "loss": 0.557889175415039, |
| "step": 3575 |
| }, |
| { |
| "epoch": 2.029324686310447, |
| "grad_norm": 9.540290832519531, |
| "learning_rate": 8.202436194895593e-06, |
| "loss": 0.5269033432006835, |
| "step": 3600 |
| }, |
| { |
| "epoch": 2.0434230931904693, |
| "grad_norm": 8.743149757385254, |
| "learning_rate": 8.187935034802785e-06, |
| "loss": 0.5314161300659179, |
| "step": 3625 |
| }, |
| { |
| "epoch": 2.057521500070492, |
| "grad_norm": 8.326909065246582, |
| "learning_rate": 8.173433874709976e-06, |
| "loss": 0.5158148193359375, |
| "step": 3650 |
| }, |
| { |
| "epoch": 2.071619906950515, |
| "grad_norm": 8.55147933959961, |
| "learning_rate": 8.158932714617171e-06, |
| "loss": 0.543358497619629, |
| "step": 3675 |
| }, |
| { |
| "epoch": 2.085718313830537, |
| "grad_norm": 9.927057266235352, |
| "learning_rate": 8.144431554524362e-06, |
| "loss": 0.5452922058105468, |
| "step": 3700 |
| }, |
| { |
| "epoch": 2.09981672071056, |
| "grad_norm": 7.827384948730469, |
| "learning_rate": 8.129930394431556e-06, |
| "loss": 0.5253535079956054, |
| "step": 3725 |
| }, |
| { |
| "epoch": 2.113915127590582, |
| "grad_norm": 9.569947242736816, |
| "learning_rate": 8.115429234338747e-06, |
| "loss": 0.5380427169799805, |
| "step": 3750 |
| }, |
| { |
| "epoch": 2.128013534470605, |
| "grad_norm": 8.02529525756836, |
| "learning_rate": 8.10092807424594e-06, |
| "loss": 0.50870849609375, |
| "step": 3775 |
| }, |
| { |
| "epoch": 2.1421119413506275, |
| "grad_norm": 10.682024002075195, |
| "learning_rate": 8.086426914153133e-06, |
| "loss": 0.5357696914672851, |
| "step": 3800 |
| }, |
| { |
| "epoch": 2.15621034823065, |
| "grad_norm": 7.825737476348877, |
| "learning_rate": 8.071925754060325e-06, |
| "loss": 0.5354624176025391, |
| "step": 3825 |
| }, |
| { |
| "epoch": 2.1703087551106726, |
| "grad_norm": 8.716205596923828, |
| "learning_rate": 8.057424593967518e-06, |
| "loss": 0.5709107208251953, |
| "step": 3850 |
| }, |
| { |
| "epoch": 2.184407161990695, |
| "grad_norm": 8.497817993164062, |
| "learning_rate": 8.042923433874711e-06, |
| "loss": 0.559893798828125, |
| "step": 3875 |
| }, |
| { |
| "epoch": 2.1985055688707176, |
| "grad_norm": 10.781414985656738, |
| "learning_rate": 8.028422273781904e-06, |
| "loss": 0.5544267272949219, |
| "step": 3900 |
| }, |
| { |
| "epoch": 2.2126039757507403, |
| "grad_norm": 10.681407928466797, |
| "learning_rate": 8.013921113689096e-06, |
| "loss": 0.533585090637207, |
| "step": 3925 |
| }, |
| { |
| "epoch": 2.2267023826307626, |
| "grad_norm": 9.05926513671875, |
| "learning_rate": 7.999419953596287e-06, |
| "loss": 0.5441395950317383, |
| "step": 3950 |
| }, |
| { |
| "epoch": 2.2408007895107853, |
| "grad_norm": 10.925969123840332, |
| "learning_rate": 7.984918793503482e-06, |
| "loss": 0.5743826675415039, |
| "step": 3975 |
| }, |
| { |
| "epoch": 2.2548991963908076, |
| "grad_norm": 9.930057525634766, |
| "learning_rate": 7.970417633410673e-06, |
| "loss": 0.5230157089233398, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.2548991963908076, |
| "eval_loss": 0.25168365240097046, |
| "eval_runtime": 1521.1061, |
| "eval_samples_per_second": 2.037, |
| "eval_steps_per_second": 0.255, |
| "eval_wer": 0.20968015907115237, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.2689976032708303, |
| "grad_norm": 11.271224021911621, |
| "learning_rate": 7.955916473317865e-06, |
| "loss": 0.5519426727294922, |
| "step": 4025 |
| }, |
| { |
| "epoch": 2.283096010150853, |
| "grad_norm": 9.23180103302002, |
| "learning_rate": 7.941415313225058e-06, |
| "loss": 0.5422761917114258, |
| "step": 4050 |
| }, |
| { |
| "epoch": 2.2971944170308753, |
| "grad_norm": 8.428093910217285, |
| "learning_rate": 7.926914153132251e-06, |
| "loss": 0.5573156356811524, |
| "step": 4075 |
| }, |
| { |
| "epoch": 2.311292823910898, |
| "grad_norm": 9.771265029907227, |
| "learning_rate": 7.912412993039444e-06, |
| "loss": 0.5444869995117188, |
| "step": 4100 |
| }, |
| { |
| "epoch": 2.325391230790921, |
| "grad_norm": 8.109742164611816, |
| "learning_rate": 7.897911832946636e-06, |
| "loss": 0.565984001159668, |
| "step": 4125 |
| }, |
| { |
| "epoch": 2.339489637670943, |
| "grad_norm": 10.035289764404297, |
| "learning_rate": 7.883410672853829e-06, |
| "loss": 0.5919873809814453, |
| "step": 4150 |
| }, |
| { |
| "epoch": 2.353588044550966, |
| "grad_norm": 7.981551647186279, |
| "learning_rate": 7.868909512761022e-06, |
| "loss": 0.5728730392456055, |
| "step": 4175 |
| }, |
| { |
| "epoch": 2.3676864514309885, |
| "grad_norm": 9.20626163482666, |
| "learning_rate": 7.854408352668213e-06, |
| "loss": 0.5345810317993164, |
| "step": 4200 |
| }, |
| { |
| "epoch": 2.381784858311011, |
| "grad_norm": 10.04966926574707, |
| "learning_rate": 7.839907192575407e-06, |
| "loss": 0.5514390563964844, |
| "step": 4225 |
| }, |
| { |
| "epoch": 2.3958832651910336, |
| "grad_norm": 9.066740989685059, |
| "learning_rate": 7.8254060324826e-06, |
| "loss": 0.5442893600463867, |
| "step": 4250 |
| }, |
| { |
| "epoch": 2.409981672071056, |
| "grad_norm": 8.016524314880371, |
| "learning_rate": 7.810904872389791e-06, |
| "loss": 0.5711633682250976, |
| "step": 4275 |
| }, |
| { |
| "epoch": 2.4240800789510786, |
| "grad_norm": 9.421856880187988, |
| "learning_rate": 7.796403712296985e-06, |
| "loss": 0.5826901626586914, |
| "step": 4300 |
| }, |
| { |
| "epoch": 2.4381784858311013, |
| "grad_norm": 9.801589965820312, |
| "learning_rate": 7.781902552204176e-06, |
| "loss": 0.5632424545288086, |
| "step": 4325 |
| }, |
| { |
| "epoch": 2.4522768927111236, |
| "grad_norm": 7.847035884857178, |
| "learning_rate": 7.76740139211137e-06, |
| "loss": 0.5748075485229492, |
| "step": 4350 |
| }, |
| { |
| "epoch": 2.4663752995911463, |
| "grad_norm": 8.85059928894043, |
| "learning_rate": 7.752900232018562e-06, |
| "loss": 0.5258598327636719, |
| "step": 4375 |
| }, |
| { |
| "epoch": 2.4804737064711686, |
| "grad_norm": 9.43190860748291, |
| "learning_rate": 7.738399071925755e-06, |
| "loss": 0.5532307815551758, |
| "step": 4400 |
| }, |
| { |
| "epoch": 2.4945721133511913, |
| "grad_norm": 8.644095420837402, |
| "learning_rate": 7.723897911832947e-06, |
| "loss": 0.5246799850463867, |
| "step": 4425 |
| }, |
| { |
| "epoch": 2.508670520231214, |
| "grad_norm": 10.166682243347168, |
| "learning_rate": 7.70939675174014e-06, |
| "loss": 0.5615069198608399, |
| "step": 4450 |
| }, |
| { |
| "epoch": 2.5227689271112363, |
| "grad_norm": 8.29073429107666, |
| "learning_rate": 7.694895591647333e-06, |
| "loss": 0.5665618133544922, |
| "step": 4475 |
| }, |
| { |
| "epoch": 2.536867333991259, |
| "grad_norm": 11.046154975891113, |
| "learning_rate": 7.680394431554525e-06, |
| "loss": 0.5597576904296875, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.5509657408712814, |
| "grad_norm": 8.011266708374023, |
| "learning_rate": 7.665893271461718e-06, |
| "loss": 0.5483290863037109, |
| "step": 4525 |
| }, |
| { |
| "epoch": 2.565064147751304, |
| "grad_norm": 8.506767272949219, |
| "learning_rate": 7.65139211136891e-06, |
| "loss": 0.5561467361450195, |
| "step": 4550 |
| }, |
| { |
| "epoch": 2.579162554631327, |
| "grad_norm": 8.871068954467773, |
| "learning_rate": 7.636890951276102e-06, |
| "loss": 0.581519546508789, |
| "step": 4575 |
| }, |
| { |
| "epoch": 2.593260961511349, |
| "grad_norm": 7.879359722137451, |
| "learning_rate": 7.622389791183295e-06, |
| "loss": 0.5407870483398437, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.607359368391372, |
| "grad_norm": 6.408371448516846, |
| "learning_rate": 7.607888631090487e-06, |
| "loss": 0.5560993576049804, |
| "step": 4625 |
| }, |
| { |
| "epoch": 2.621457775271394, |
| "grad_norm": 8.178828239440918, |
| "learning_rate": 7.593387470997681e-06, |
| "loss": 0.5660905838012695, |
| "step": 4650 |
| }, |
| { |
| "epoch": 2.635556182151417, |
| "grad_norm": 8.56795597076416, |
| "learning_rate": 7.578886310904873e-06, |
| "loss": 0.5393736648559571, |
| "step": 4675 |
| }, |
| { |
| "epoch": 2.6496545890314396, |
| "grad_norm": 8.56303596496582, |
| "learning_rate": 7.564385150812066e-06, |
| "loss": 0.5549613952636718, |
| "step": 4700 |
| }, |
| { |
| "epoch": 2.663752995911462, |
| "grad_norm": 8.995079040527344, |
| "learning_rate": 7.549883990719258e-06, |
| "loss": 0.5543619537353516, |
| "step": 4725 |
| }, |
| { |
| "epoch": 2.6778514027914846, |
| "grad_norm": 9.455941200256348, |
| "learning_rate": 7.535382830626451e-06, |
| "loss": 0.5422988510131836, |
| "step": 4750 |
| }, |
| { |
| "epoch": 2.691949809671507, |
| "grad_norm": 9.779569625854492, |
| "learning_rate": 7.520881670533643e-06, |
| "loss": 0.5424752044677734, |
| "step": 4775 |
| }, |
| { |
| "epoch": 2.7060482165515296, |
| "grad_norm": 9.737711906433105, |
| "learning_rate": 7.506380510440836e-06, |
| "loss": 0.5491796112060547, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.7201466234315523, |
| "grad_norm": 10.25398063659668, |
| "learning_rate": 7.491879350348028e-06, |
| "loss": 0.5705435180664062, |
| "step": 4825 |
| }, |
| { |
| "epoch": 2.734245030311575, |
| "grad_norm": 9.573110580444336, |
| "learning_rate": 7.477378190255221e-06, |
| "loss": 0.5880419540405274, |
| "step": 4850 |
| }, |
| { |
| "epoch": 2.7483434371915973, |
| "grad_norm": 11.062814712524414, |
| "learning_rate": 7.4628770301624135e-06, |
| "loss": 0.556083984375, |
| "step": 4875 |
| }, |
| { |
| "epoch": 2.7624418440716196, |
| "grad_norm": 9.247238159179688, |
| "learning_rate": 7.448375870069606e-06, |
| "loss": 0.5655975341796875, |
| "step": 4900 |
| }, |
| { |
| "epoch": 2.7765402509516424, |
| "grad_norm": 9.007148742675781, |
| "learning_rate": 7.433874709976798e-06, |
| "loss": 0.5616880035400391, |
| "step": 4925 |
| }, |
| { |
| "epoch": 2.790638657831665, |
| "grad_norm": 8.514342308044434, |
| "learning_rate": 7.419373549883992e-06, |
| "loss": 0.5538288116455078, |
| "step": 4950 |
| }, |
| { |
| "epoch": 2.804737064711688, |
| "grad_norm": 9.440685272216797, |
| "learning_rate": 7.4048723897911835e-06, |
| "loss": 0.5948199081420898, |
| "step": 4975 |
| }, |
| { |
| "epoch": 2.81883547159171, |
| "grad_norm": 6.956933975219727, |
| "learning_rate": 7.390371229698376e-06, |
| "loss": 0.5486139678955078, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.81883547159171, |
| "eval_loss": 0.24652095139026642, |
| "eval_runtime": 1532.9367, |
| "eval_samples_per_second": 2.022, |
| "eval_steps_per_second": 0.253, |
| "eval_wer": 0.20783165635834974, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.832933878471733, |
| "grad_norm": 9.058638572692871, |
| "learning_rate": 7.375870069605568e-06, |
| "loss": 0.5798805999755859, |
| "step": 5025 |
| }, |
| { |
| "epoch": 2.847032285351755, |
| "grad_norm": 7.628641605377197, |
| "learning_rate": 7.361368909512762e-06, |
| "loss": 0.5301958847045899, |
| "step": 5050 |
| }, |
| { |
| "epoch": 2.861130692231778, |
| "grad_norm": 8.487007141113281, |
| "learning_rate": 7.346867749419954e-06, |
| "loss": 0.566702880859375, |
| "step": 5075 |
| }, |
| { |
| "epoch": 2.8752290991118006, |
| "grad_norm": 9.008893013000488, |
| "learning_rate": 7.332366589327147e-06, |
| "loss": 0.564128532409668, |
| "step": 5100 |
| }, |
| { |
| "epoch": 2.889327505991823, |
| "grad_norm": 11.27266788482666, |
| "learning_rate": 7.31786542923434e-06, |
| "loss": 0.5648199462890625, |
| "step": 5125 |
| }, |
| { |
| "epoch": 2.9034259128718456, |
| "grad_norm": 9.269525527954102, |
| "learning_rate": 7.303364269141532e-06, |
| "loss": 0.5541753005981446, |
| "step": 5150 |
| }, |
| { |
| "epoch": 2.917524319751868, |
| "grad_norm": 9.549739837646484, |
| "learning_rate": 7.288863109048725e-06, |
| "loss": 0.5463447952270508, |
| "step": 5175 |
| }, |
| { |
| "epoch": 2.9316227266318906, |
| "grad_norm": 9.327741622924805, |
| "learning_rate": 7.274361948955917e-06, |
| "loss": 0.5498114395141601, |
| "step": 5200 |
| }, |
| { |
| "epoch": 2.9457211335119133, |
| "grad_norm": 10.359440803527832, |
| "learning_rate": 7.25986078886311e-06, |
| "loss": 0.5578446578979492, |
| "step": 5225 |
| }, |
| { |
| "epoch": 2.9598195403919356, |
| "grad_norm": 7.895180702209473, |
| "learning_rate": 7.245359628770302e-06, |
| "loss": 0.5610840606689453, |
| "step": 5250 |
| }, |
| { |
| "epoch": 2.9739179472719584, |
| "grad_norm": 11.096906661987305, |
| "learning_rate": 7.230858468677495e-06, |
| "loss": 0.5936727523803711, |
| "step": 5275 |
| }, |
| { |
| "epoch": 2.9880163541519806, |
| "grad_norm": 10.793472290039062, |
| "learning_rate": 7.216357308584687e-06, |
| "loss": 0.5792999649047852, |
| "step": 5300 |
| }, |
| { |
| "epoch": 3.0016918088256026, |
| "grad_norm": 8.622193336486816, |
| "learning_rate": 7.201856148491881e-06, |
| "loss": 0.5246665573120117, |
| "step": 5325 |
| }, |
| { |
| "epoch": 3.0157902157056253, |
| "grad_norm": 6.704568386077881, |
| "learning_rate": 7.1873549883990726e-06, |
| "loss": 0.40513866424560546, |
| "step": 5350 |
| }, |
| { |
| "epoch": 3.0298886225856476, |
| "grad_norm": 7.683806419372559, |
| "learning_rate": 7.172853828306265e-06, |
| "loss": 0.3967144775390625, |
| "step": 5375 |
| }, |
| { |
| "epoch": 3.0439870294656703, |
| "grad_norm": 7.537134647369385, |
| "learning_rate": 7.158352668213457e-06, |
| "loss": 0.39253467559814453, |
| "step": 5400 |
| }, |
| { |
| "epoch": 3.058085436345693, |
| "grad_norm": 6.951216697692871, |
| "learning_rate": 7.143851508120651e-06, |
| "loss": 0.374131965637207, |
| "step": 5425 |
| }, |
| { |
| "epoch": 3.0721838432257154, |
| "grad_norm": 7.260530471801758, |
| "learning_rate": 7.129350348027843e-06, |
| "loss": 0.35717914581298826, |
| "step": 5450 |
| }, |
| { |
| "epoch": 3.086282250105738, |
| "grad_norm": 7.6884989738464355, |
| "learning_rate": 7.114849187935035e-06, |
| "loss": 0.35510223388671874, |
| "step": 5475 |
| }, |
| { |
| "epoch": 3.100380656985761, |
| "grad_norm": 7.6629109382629395, |
| "learning_rate": 7.100348027842228e-06, |
| "loss": 0.35819530487060547, |
| "step": 5500 |
| }, |
| { |
| "epoch": 3.114479063865783, |
| "grad_norm": 7.406759262084961, |
| "learning_rate": 7.085846867749421e-06, |
| "loss": 0.3919057846069336, |
| "step": 5525 |
| }, |
| { |
| "epoch": 3.128577470745806, |
| "grad_norm": 8.94927978515625, |
| "learning_rate": 7.071345707656613e-06, |
| "loss": 0.3659718704223633, |
| "step": 5550 |
| }, |
| { |
| "epoch": 3.142675877625828, |
| "grad_norm": 8.768269538879395, |
| "learning_rate": 7.056844547563806e-06, |
| "loss": 0.37319766998291015, |
| "step": 5575 |
| }, |
| { |
| "epoch": 3.156774284505851, |
| "grad_norm": 8.374007225036621, |
| "learning_rate": 7.042343387470998e-06, |
| "loss": 0.3749269104003906, |
| "step": 5600 |
| }, |
| { |
| "epoch": 3.1708726913858736, |
| "grad_norm": 6.927631855010986, |
| "learning_rate": 7.027842227378191e-06, |
| "loss": 0.3902094650268555, |
| "step": 5625 |
| }, |
| { |
| "epoch": 3.184971098265896, |
| "grad_norm": 8.199101448059082, |
| "learning_rate": 7.013341067285383e-06, |
| "loss": 0.3912439727783203, |
| "step": 5650 |
| }, |
| { |
| "epoch": 3.1990695051459186, |
| "grad_norm": 6.719486713409424, |
| "learning_rate": 6.998839907192576e-06, |
| "loss": 0.36518966674804687, |
| "step": 5675 |
| }, |
| { |
| "epoch": 3.213167912025941, |
| "grad_norm": 8.620352745056152, |
| "learning_rate": 6.984338747099768e-06, |
| "loss": 0.3811537551879883, |
| "step": 5700 |
| }, |
| { |
| "epoch": 3.2272663189059636, |
| "grad_norm": 7.2147040367126465, |
| "learning_rate": 6.969837587006962e-06, |
| "loss": 0.37800453186035154, |
| "step": 5725 |
| }, |
| { |
| "epoch": 3.2413647257859863, |
| "grad_norm": 8.890436172485352, |
| "learning_rate": 6.9553364269141535e-06, |
| "loss": 0.3713486480712891, |
| "step": 5750 |
| }, |
| { |
| "epoch": 3.2554631326660086, |
| "grad_norm": 8.093574523925781, |
| "learning_rate": 6.940835266821346e-06, |
| "loss": 0.4041537857055664, |
| "step": 5775 |
| }, |
| { |
| "epoch": 3.2695615395460313, |
| "grad_norm": 9.059134483337402, |
| "learning_rate": 6.926334106728538e-06, |
| "loss": 0.397373046875, |
| "step": 5800 |
| }, |
| { |
| "epoch": 3.283659946426054, |
| "grad_norm": 9.615391731262207, |
| "learning_rate": 6.911832946635732e-06, |
| "loss": 0.3877538299560547, |
| "step": 5825 |
| }, |
| { |
| "epoch": 3.2977583533060764, |
| "grad_norm": 7.0843186378479, |
| "learning_rate": 6.8973317865429235e-06, |
| "loss": 0.38612773895263675, |
| "step": 5850 |
| }, |
| { |
| "epoch": 3.311856760186099, |
| "grad_norm": 7.724761009216309, |
| "learning_rate": 6.882830626450116e-06, |
| "loss": 0.3913743209838867, |
| "step": 5875 |
| }, |
| { |
| "epoch": 3.3259551670661214, |
| "grad_norm": 7.574341297149658, |
| "learning_rate": 6.86832946635731e-06, |
| "loss": 0.36103832244873046, |
| "step": 5900 |
| }, |
| { |
| "epoch": 3.340053573946144, |
| "grad_norm": 9.554155349731445, |
| "learning_rate": 6.853828306264502e-06, |
| "loss": 0.39909862518310546, |
| "step": 5925 |
| }, |
| { |
| "epoch": 3.354151980826167, |
| "grad_norm": 9.424493789672852, |
| "learning_rate": 6.839327146171695e-06, |
| "loss": 0.36129764556884764, |
| "step": 5950 |
| }, |
| { |
| "epoch": 3.368250387706189, |
| "grad_norm": 8.635687828063965, |
| "learning_rate": 6.824825986078887e-06, |
| "loss": 0.4005467987060547, |
| "step": 5975 |
| }, |
| { |
| "epoch": 3.382348794586212, |
| "grad_norm": 8.711161613464355, |
| "learning_rate": 6.81032482598608e-06, |
| "loss": 0.37857559204101565, |
| "step": 6000 |
| }, |
| { |
| "epoch": 3.382348794586212, |
| "eval_loss": 0.2660383880138397, |
| "eval_runtime": 1540.0218, |
| "eval_samples_per_second": 2.012, |
| "eval_steps_per_second": 0.252, |
| "eval_wer": 0.21214815999040776, |
| "step": 6000 |
| }, |
| { |
| "epoch": 3.396447201466234, |
| "grad_norm": 8.4149751663208, |
| "learning_rate": 6.795823665893272e-06, |
| "loss": 0.38818477630615233, |
| "step": 6025 |
| }, |
| { |
| "epoch": 3.410545608346257, |
| "grad_norm": 7.9141974449157715, |
| "learning_rate": 6.781322505800465e-06, |
| "loss": 0.3774140930175781, |
| "step": 6050 |
| }, |
| { |
| "epoch": 3.4246440152262796, |
| "grad_norm": 8.901697158813477, |
| "learning_rate": 6.766821345707657e-06, |
| "loss": 0.37076019287109374, |
| "step": 6075 |
| }, |
| { |
| "epoch": 3.438742422106302, |
| "grad_norm": 7.5369696617126465, |
| "learning_rate": 6.75232018561485e-06, |
| "loss": 0.3822758102416992, |
| "step": 6100 |
| }, |
| { |
| "epoch": 3.4528408289863246, |
| "grad_norm": 8.009127616882324, |
| "learning_rate": 6.7378190255220425e-06, |
| "loss": 0.38495445251464844, |
| "step": 6125 |
| }, |
| { |
| "epoch": 3.466939235866347, |
| "grad_norm": 8.295459747314453, |
| "learning_rate": 6.723317865429235e-06, |
| "loss": 0.3907606887817383, |
| "step": 6150 |
| }, |
| { |
| "epoch": 3.4810376427463696, |
| "grad_norm": 10.099355697631836, |
| "learning_rate": 6.708816705336427e-06, |
| "loss": 0.407639274597168, |
| "step": 6175 |
| }, |
| { |
| "epoch": 3.4951360496263923, |
| "grad_norm": 7.597548961639404, |
| "learning_rate": 6.694315545243621e-06, |
| "loss": 0.37315830230712893, |
| "step": 6200 |
| }, |
| { |
| "epoch": 3.5092344565064146, |
| "grad_norm": 8.139415740966797, |
| "learning_rate": 6.6798143851508125e-06, |
| "loss": 0.3879343795776367, |
| "step": 6225 |
| }, |
| { |
| "epoch": 3.5233328633864374, |
| "grad_norm": 8.672515869140625, |
| "learning_rate": 6.665313225058005e-06, |
| "loss": 0.37412925720214846, |
| "step": 6250 |
| }, |
| { |
| "epoch": 3.5374312702664596, |
| "grad_norm": 6.852619171142578, |
| "learning_rate": 6.650812064965198e-06, |
| "loss": 0.3825421142578125, |
| "step": 6275 |
| }, |
| { |
| "epoch": 3.5515296771464824, |
| "grad_norm": 7.91625452041626, |
| "learning_rate": 6.636310904872391e-06, |
| "loss": 0.3875956726074219, |
| "step": 6300 |
| }, |
| { |
| "epoch": 3.565628084026505, |
| "grad_norm": 8.165389060974121, |
| "learning_rate": 6.6218097447795825e-06, |
| "loss": 0.39700664520263673, |
| "step": 6325 |
| }, |
| { |
| "epoch": 3.579726490906528, |
| "grad_norm": 7.832555294036865, |
| "learning_rate": 6.607308584686776e-06, |
| "loss": 0.37678192138671873, |
| "step": 6350 |
| }, |
| { |
| "epoch": 3.59382489778655, |
| "grad_norm": 9.242694854736328, |
| "learning_rate": 6.592807424593968e-06, |
| "loss": 0.3898345184326172, |
| "step": 6375 |
| }, |
| { |
| "epoch": 3.6079233046665724, |
| "grad_norm": 10.566645622253418, |
| "learning_rate": 6.578306264501161e-06, |
| "loss": 0.41113948822021484, |
| "step": 6400 |
| }, |
| { |
| "epoch": 3.622021711546595, |
| "grad_norm": 8.03178882598877, |
| "learning_rate": 6.5638051044083525e-06, |
| "loss": 0.3778879165649414, |
| "step": 6425 |
| }, |
| { |
| "epoch": 3.636120118426618, |
| "grad_norm": 9.302940368652344, |
| "learning_rate": 6.549303944315546e-06, |
| "loss": 0.4171958541870117, |
| "step": 6450 |
| }, |
| { |
| "epoch": 3.6502185253066406, |
| "grad_norm": 8.460973739624023, |
| "learning_rate": 6.534802784222738e-06, |
| "loss": 0.39614273071289063, |
| "step": 6475 |
| }, |
| { |
| "epoch": 3.664316932186663, |
| "grad_norm": 7.800125598907471, |
| "learning_rate": 6.520301624129931e-06, |
| "loss": 0.3654580307006836, |
| "step": 6500 |
| }, |
| { |
| "epoch": 3.6784153390666856, |
| "grad_norm": 9.029678344726562, |
| "learning_rate": 6.505800464037123e-06, |
| "loss": 0.37184043884277346, |
| "step": 6525 |
| }, |
| { |
| "epoch": 3.692513745946708, |
| "grad_norm": 7.120487689971924, |
| "learning_rate": 6.491299303944316e-06, |
| "loss": 0.3724634552001953, |
| "step": 6550 |
| }, |
| { |
| "epoch": 3.7066121528267306, |
| "grad_norm": 7.917516708374023, |
| "learning_rate": 6.476798143851508e-06, |
| "loss": 0.3739112091064453, |
| "step": 6575 |
| }, |
| { |
| "epoch": 3.7207105597067534, |
| "grad_norm": 8.271352767944336, |
| "learning_rate": 6.4622969837587015e-06, |
| "loss": 0.401793212890625, |
| "step": 6600 |
| }, |
| { |
| "epoch": 3.7348089665867756, |
| "grad_norm": 7.502166748046875, |
| "learning_rate": 6.447795823665893e-06, |
| "loss": 0.42757793426513674, |
| "step": 6625 |
| }, |
| { |
| "epoch": 3.7489073734667984, |
| "grad_norm": 7.828073024749756, |
| "learning_rate": 6.433294663573086e-06, |
| "loss": 0.4309410095214844, |
| "step": 6650 |
| }, |
| { |
| "epoch": 3.7630057803468207, |
| "grad_norm": 8.782448768615723, |
| "learning_rate": 6.418793503480279e-06, |
| "loss": 0.40100780487060544, |
| "step": 6675 |
| }, |
| { |
| "epoch": 3.7771041872268434, |
| "grad_norm": 6.84846305847168, |
| "learning_rate": 6.4042923433874715e-06, |
| "loss": 0.3828923797607422, |
| "step": 6700 |
| }, |
| { |
| "epoch": 3.791202594106866, |
| "grad_norm": 8.250489234924316, |
| "learning_rate": 6.389791183294664e-06, |
| "loss": 0.3788992691040039, |
| "step": 6725 |
| }, |
| { |
| "epoch": 3.8053010009868884, |
| "grad_norm": 8.07790470123291, |
| "learning_rate": 6.375290023201857e-06, |
| "loss": 0.3897978210449219, |
| "step": 6750 |
| }, |
| { |
| "epoch": 3.819399407866911, |
| "grad_norm": 8.17908000946045, |
| "learning_rate": 6.36078886310905e-06, |
| "loss": 0.3732691955566406, |
| "step": 6775 |
| }, |
| { |
| "epoch": 3.8334978147469334, |
| "grad_norm": 10.077781677246094, |
| "learning_rate": 6.3462877030162415e-06, |
| "loss": 0.38536983489990234, |
| "step": 6800 |
| }, |
| { |
| "epoch": 3.847596221626956, |
| "grad_norm": 7.02805757522583, |
| "learning_rate": 6.331786542923435e-06, |
| "loss": 0.39071887969970703, |
| "step": 6825 |
| }, |
| { |
| "epoch": 3.861694628506979, |
| "grad_norm": 7.91436243057251, |
| "learning_rate": 6.317285382830627e-06, |
| "loss": 0.3588584899902344, |
| "step": 6850 |
| }, |
| { |
| "epoch": 3.875793035387001, |
| "grad_norm": 8.013671875, |
| "learning_rate": 6.30278422273782e-06, |
| "loss": 0.37952312469482424, |
| "step": 6875 |
| }, |
| { |
| "epoch": 3.889891442267024, |
| "grad_norm": 8.154217720031738, |
| "learning_rate": 6.288283062645012e-06, |
| "loss": 0.37390522003173826, |
| "step": 6900 |
| }, |
| { |
| "epoch": 3.903989849147046, |
| "grad_norm": 8.916945457458496, |
| "learning_rate": 6.273781902552205e-06, |
| "loss": 0.38926349639892577, |
| "step": 6925 |
| }, |
| { |
| "epoch": 3.918088256027069, |
| "grad_norm": 7.6121296882629395, |
| "learning_rate": 6.259280742459397e-06, |
| "loss": 0.3879304504394531, |
| "step": 6950 |
| }, |
| { |
| "epoch": 3.9321866629070916, |
| "grad_norm": 9.87363052368164, |
| "learning_rate": 6.2447795823665905e-06, |
| "loss": 0.3799150466918945, |
| "step": 6975 |
| }, |
| { |
| "epoch": 3.946285069787114, |
| "grad_norm": 6.678126335144043, |
| "learning_rate": 6.230278422273782e-06, |
| "loss": 0.3740867614746094, |
| "step": 7000 |
| }, |
| { |
| "epoch": 3.946285069787114, |
| "eval_loss": 0.270298033952713, |
| "eval_runtime": 1521.4464, |
| "eval_samples_per_second": 2.037, |
| "eval_steps_per_second": 0.255, |
| "eval_wer": 0.21098909882994774, |
| "step": 7000 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 17740, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.1426794605084672e+20, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|