| { | |
| "best_metric": 0.29856827886467024, | |
| "best_model_checkpoint": "/home/senyk/result_wav2vec/best_model_2_copy/checkpoint-15500", | |
| "epoch": 15.0, | |
| "eval_steps": 500, | |
| "global_step": 16485, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.09099181073703366, | |
| "grad_norm": 1.8533661365509033, | |
| "learning_rate": 3.9757355171367915e-05, | |
| "loss": 0.1805, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.18198362147406733, | |
| "grad_norm": 1.234511137008667, | |
| "learning_rate": 3.951471034273582e-05, | |
| "loss": 0.1797, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.272975432211101, | |
| "grad_norm": 2.1243155002593994, | |
| "learning_rate": 3.927206551410373e-05, | |
| "loss": 0.185, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.36396724294813465, | |
| "grad_norm": 1.542136549949646, | |
| "learning_rate": 3.902942068547165e-05, | |
| "loss": 0.1808, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4549590536851683, | |
| "grad_norm": 1.6958433389663696, | |
| "learning_rate": 3.8786775856839554e-05, | |
| "loss": 0.1936, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4549590536851683, | |
| "eval_cer": 0.15896814599550807, | |
| "eval_loss": 0.2081621140241623, | |
| "eval_runtime": 132.4545, | |
| "eval_samples_per_second": 37.749, | |
| "eval_steps_per_second": 1.185, | |
| "eval_wer": 0.3304680304764031, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.545950864422202, | |
| "grad_norm": 1.8750900030136108, | |
| "learning_rate": 3.8544131028207465e-05, | |
| "loss": 0.1848, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6369426751592356, | |
| "grad_norm": 0.7346371412277222, | |
| "learning_rate": 3.8301486199575376e-05, | |
| "loss": 0.1899, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.7279344858962693, | |
| "grad_norm": 0.8798062205314636, | |
| "learning_rate": 3.805884137094329e-05, | |
| "loss": 0.1761, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.818926296633303, | |
| "grad_norm": 3.140528917312622, | |
| "learning_rate": 3.7818622990597514e-05, | |
| "loss": 0.1766, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.9099181073703366, | |
| "grad_norm": 1.5800111293792725, | |
| "learning_rate": 3.7575978161965426e-05, | |
| "loss": 0.174, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9099181073703366, | |
| "eval_cer": 0.15860186921906572, | |
| "eval_loss": 0.20823511481285095, | |
| "eval_runtime": 132.6784, | |
| "eval_samples_per_second": 37.685, | |
| "eval_steps_per_second": 1.183, | |
| "eval_wer": 0.3283748709218275, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.0009099181073704, | |
| "grad_norm": 0.9137107133865356, | |
| "learning_rate": 3.733333333333334e-05, | |
| "loss": 0.1746, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.091901728844404, | |
| "grad_norm": 1.5409084558486938, | |
| "learning_rate": 3.709068850470125e-05, | |
| "loss": 0.175, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.1828935395814377, | |
| "grad_norm": 1.2571290731430054, | |
| "learning_rate": 3.684804367606915e-05, | |
| "loss": 0.161, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.2738853503184713, | |
| "grad_norm": 0.7620792388916016, | |
| "learning_rate": 3.6605398847437065e-05, | |
| "loss": 0.1704, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.364877161055505, | |
| "grad_norm": 1.167091727256775, | |
| "learning_rate": 3.6362754018804976e-05, | |
| "loss": 0.1855, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.364877161055505, | |
| "eval_cer": 0.15846501855533998, | |
| "eval_loss": 0.19808036088943481, | |
| "eval_runtime": 131.4602, | |
| "eval_samples_per_second": 38.034, | |
| "eval_steps_per_second": 1.194, | |
| "eval_wer": 0.3291563171555357, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.4558689717925386, | |
| "grad_norm": 1.6935396194458008, | |
| "learning_rate": 3.612010919017289e-05, | |
| "loss": 0.1698, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.5468607825295724, | |
| "grad_norm": 1.1173765659332275, | |
| "learning_rate": 3.58774643615408e-05, | |
| "loss": 0.1705, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.6378525932666061, | |
| "grad_norm": 1.6780610084533691, | |
| "learning_rate": 3.563481953290871e-05, | |
| "loss": 0.177, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.7288444040036397, | |
| "grad_norm": 0.9590256214141846, | |
| "learning_rate": 3.539217470427662e-05, | |
| "loss": 0.1778, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.8198362147406733, | |
| "grad_norm": 0.6319223642349243, | |
| "learning_rate": 3.5149529875644526e-05, | |
| "loss": 0.1724, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.8198362147406733, | |
| "eval_cer": 0.15803031644703477, | |
| "eval_loss": 0.19977422058582306, | |
| "eval_runtime": 131.3497, | |
| "eval_samples_per_second": 38.066, | |
| "eval_steps_per_second": 1.195, | |
| "eval_wer": 0.3265049817197399, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.910828025477707, | |
| "grad_norm": 1.1627233028411865, | |
| "learning_rate": 3.490688504701244e-05, | |
| "loss": 0.1745, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.001819836214741, | |
| "grad_norm": 0.90104740858078, | |
| "learning_rate": 3.466424021838035e-05, | |
| "loss": 0.1669, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.092811646951774, | |
| "grad_norm": 0.7599895000457764, | |
| "learning_rate": 3.4421595389748254e-05, | |
| "loss": 0.1523, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.183803457688808, | |
| "grad_norm": 0.6392490267753601, | |
| "learning_rate": 3.417895056111617e-05, | |
| "loss": 0.1596, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.2747952684258417, | |
| "grad_norm": 0.7351034283638, | |
| "learning_rate": 3.3936305732484083e-05, | |
| "loss": 0.1667, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.2747952684258417, | |
| "eval_cer": 0.15830804279400756, | |
| "eval_loss": 0.19935080409049988, | |
| "eval_runtime": 131.4244, | |
| "eval_samples_per_second": 38.045, | |
| "eval_steps_per_second": 1.195, | |
| "eval_wer": 0.3288214116268036, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.3657870791628755, | |
| "grad_norm": 1.4110583066940308, | |
| "learning_rate": 3.369366090385199e-05, | |
| "loss": 0.1592, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.4567788898999092, | |
| "grad_norm": 1.1413319110870361, | |
| "learning_rate": 3.34510160752199e-05, | |
| "loss": 0.1557, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.5477707006369426, | |
| "grad_norm": 1.064664363861084, | |
| "learning_rate": 3.320837124658781e-05, | |
| "loss": 0.1551, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.6387625113739763, | |
| "grad_norm": 0.9104379415512085, | |
| "learning_rate": 3.296572641795572e-05, | |
| "loss": 0.1465, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.72975432211101, | |
| "grad_norm": 0.9136043787002563, | |
| "learning_rate": 3.272308158932363e-05, | |
| "loss": 0.1635, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.72975432211101, | |
| "eval_cer": 0.1568509857272808, | |
| "eval_loss": 0.20498071610927582, | |
| "eval_runtime": 131.4378, | |
| "eval_samples_per_second": 38.041, | |
| "eval_steps_per_second": 1.194, | |
| "eval_wer": 0.3222907538165276, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.8207461328480434, | |
| "grad_norm": 1.207688570022583, | |
| "learning_rate": 3.248043676069154e-05, | |
| "loss": 0.1662, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.911737943585077, | |
| "grad_norm": 2.2350857257843018, | |
| "learning_rate": 3.223779193205945e-05, | |
| "loss": 0.1602, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 3.002729754322111, | |
| "grad_norm": 1.0255531072616577, | |
| "learning_rate": 3.199514710342736e-05, | |
| "loss": 0.1485, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 3.0937215650591448, | |
| "grad_norm": 1.2408232688903809, | |
| "learning_rate": 3.175250227479527e-05, | |
| "loss": 0.1528, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 3.1847133757961785, | |
| "grad_norm": 0.6412222981452942, | |
| "learning_rate": 3.1509857446163184e-05, | |
| "loss": 0.14, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.1847133757961785, | |
| "eval_cer": 0.1566980349854697, | |
| "eval_loss": 0.2068762630224228, | |
| "eval_runtime": 131.207, | |
| "eval_samples_per_second": 38.108, | |
| "eval_steps_per_second": 1.197, | |
| "eval_wer": 0.32053249979068404, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.275705186533212, | |
| "grad_norm": 1.1700862646102905, | |
| "learning_rate": 3.126721261753109e-05, | |
| "loss": 0.1522, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 3.3666969972702456, | |
| "grad_norm": 2.105367660522461, | |
| "learning_rate": 3.1024567788899e-05, | |
| "loss": 0.1501, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 3.4576888080072794, | |
| "grad_norm": 0.5450145602226257, | |
| "learning_rate": 3.0784349408553233e-05, | |
| "loss": 0.1446, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 3.548680618744313, | |
| "grad_norm": 0.7369564771652222, | |
| "learning_rate": 3.0541704579921145e-05, | |
| "loss": 0.1509, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 3.6396724294813465, | |
| "grad_norm": 1.5404269695281982, | |
| "learning_rate": 3.0299059751289053e-05, | |
| "loss": 0.1573, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.6396724294813465, | |
| "eval_cer": 0.156923436078665, | |
| "eval_loss": 0.2066684067249298, | |
| "eval_runtime": 132.0661, | |
| "eval_samples_per_second": 37.86, | |
| "eval_steps_per_second": 1.189, | |
| "eval_wer": 0.32067204376098907, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.7306642402183803, | |
| "grad_norm": 1.83568274974823, | |
| "learning_rate": 3.005641492265696e-05, | |
| "loss": 0.1489, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 3.821656050955414, | |
| "grad_norm": 0.9000252485275269, | |
| "learning_rate": 2.9813770094024872e-05, | |
| "loss": 0.1446, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 3.912647861692448, | |
| "grad_norm": 1.9459755420684814, | |
| "learning_rate": 2.957112526539278e-05, | |
| "loss": 0.1541, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 4.003639672429482, | |
| "grad_norm": 1.2044388055801392, | |
| "learning_rate": 2.9328480436760695e-05, | |
| "loss": 0.1555, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 4.094631483166515, | |
| "grad_norm": 0.984316885471344, | |
| "learning_rate": 2.9085835608128607e-05, | |
| "loss": 0.1487, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 4.094631483166515, | |
| "eval_cer": 0.15719713740611643, | |
| "eval_loss": 0.21014198660850525, | |
| "eval_runtime": 131.975, | |
| "eval_samples_per_second": 37.886, | |
| "eval_steps_per_second": 1.19, | |
| "eval_wer": 0.3224023889927716, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 4.185623293903548, | |
| "grad_norm": 0.879389762878418, | |
| "learning_rate": 2.8843190779496515e-05, | |
| "loss": 0.1384, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 4.276615104640582, | |
| "grad_norm": 1.0293374061584473, | |
| "learning_rate": 2.8600545950864426e-05, | |
| "loss": 0.1419, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 4.367606915377616, | |
| "grad_norm": 0.7723912596702576, | |
| "learning_rate": 2.8357901122232334e-05, | |
| "loss": 0.142, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 4.45859872611465, | |
| "grad_norm": 1.056503176689148, | |
| "learning_rate": 2.8115256293600245e-05, | |
| "loss": 0.1492, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 4.549590536851683, | |
| "grad_norm": 1.1004029512405396, | |
| "learning_rate": 2.7872611464968153e-05, | |
| "loss": 0.1501, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 4.549590536851683, | |
| "eval_cer": 0.1563116331114206, | |
| "eval_loss": 0.2110494077205658, | |
| "eval_runtime": 131.8894, | |
| "eval_samples_per_second": 37.911, | |
| "eval_steps_per_second": 1.19, | |
| "eval_wer": 0.3176020764142781, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 4.640582347588717, | |
| "grad_norm": 1.6329824924468994, | |
| "learning_rate": 2.7629966636336068e-05, | |
| "loss": 0.1398, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 4.731574158325751, | |
| "grad_norm": 1.094870924949646, | |
| "learning_rate": 2.7387321807703976e-05, | |
| "loss": 0.1382, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 4.822565969062785, | |
| "grad_norm": 1.094946026802063, | |
| "learning_rate": 2.7144676979071888e-05, | |
| "loss": 0.1471, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 4.9135577797998184, | |
| "grad_norm": 1.1323785781860352, | |
| "learning_rate": 2.6902032150439796e-05, | |
| "loss": 0.1487, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 5.004549590536851, | |
| "grad_norm": 1.0509577989578247, | |
| "learning_rate": 2.6659387321807707e-05, | |
| "loss": 0.1486, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 5.004549590536851, | |
| "eval_cer": 0.15565957994896276, | |
| "eval_loss": 0.20399489998817444, | |
| "eval_runtime": 131.3537, | |
| "eval_samples_per_second": 38.065, | |
| "eval_steps_per_second": 1.195, | |
| "eval_wer": 0.3159275487706176, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 5.095541401273885, | |
| "grad_norm": 1.0751926898956299, | |
| "learning_rate": 2.6416742493175615e-05, | |
| "loss": 0.1311, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 5.186533212010919, | |
| "grad_norm": 1.638108730316162, | |
| "learning_rate": 2.6174097664543526e-05, | |
| "loss": 0.1437, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 5.277525022747953, | |
| "grad_norm": 0.8366700410842896, | |
| "learning_rate": 2.5931452835911434e-05, | |
| "loss": 0.1334, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 5.368516833484986, | |
| "grad_norm": 0.9168310761451721, | |
| "learning_rate": 2.568880800727935e-05, | |
| "loss": 0.1291, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 5.45950864422202, | |
| "grad_norm": 0.8272154331207275, | |
| "learning_rate": 2.5446163178647257e-05, | |
| "loss": 0.1342, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 5.45950864422202, | |
| "eval_cer": 0.15511217729405988, | |
| "eval_loss": 0.20407769083976746, | |
| "eval_runtime": 130.9722, | |
| "eval_samples_per_second": 38.176, | |
| "eval_steps_per_second": 1.199, | |
| "eval_wer": 0.31436465630320115, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 5.550500454959054, | |
| "grad_norm": 0.7047191262245178, | |
| "learning_rate": 2.520351835001517e-05, | |
| "loss": 0.131, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 5.641492265696087, | |
| "grad_norm": 1.3190622329711914, | |
| "learning_rate": 2.4960873521383077e-05, | |
| "loss": 0.1335, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 5.732484076433121, | |
| "grad_norm": 1.6135904788970947, | |
| "learning_rate": 2.472065514103731e-05, | |
| "loss": 0.1352, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 5.823475887170154, | |
| "grad_norm": 1.2571635246276855, | |
| "learning_rate": 2.447801031240522e-05, | |
| "loss": 0.1405, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 5.914467697907188, | |
| "grad_norm": 1.103220820426941, | |
| "learning_rate": 2.423536548377313e-05, | |
| "loss": 0.1396, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 5.914467697907188, | |
| "eval_cer": 0.15522085282113618, | |
| "eval_loss": 0.20565420389175415, | |
| "eval_runtime": 131.5948, | |
| "eval_samples_per_second": 37.995, | |
| "eval_steps_per_second": 1.193, | |
| "eval_wer": 0.31433674750914015, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 6.005459508644222, | |
| "grad_norm": 1.2131693363189697, | |
| "learning_rate": 2.399272065514104e-05, | |
| "loss": 0.1379, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 6.096451319381256, | |
| "grad_norm": 0.8314226865768433, | |
| "learning_rate": 2.375007582650895e-05, | |
| "loss": 0.1289, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 6.1874431301182895, | |
| "grad_norm": 0.929862380027771, | |
| "learning_rate": 2.350743099787686e-05, | |
| "loss": 0.1291, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 6.278434940855323, | |
| "grad_norm": 1.2670739889144897, | |
| "learning_rate": 2.326478616924477e-05, | |
| "loss": 0.1349, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 6.369426751592357, | |
| "grad_norm": 0.979325532913208, | |
| "learning_rate": 2.3022141340612677e-05, | |
| "loss": 0.136, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 6.369426751592357, | |
| "eval_cer": 0.15454464954155028, | |
| "eval_loss": 0.20977585017681122, | |
| "eval_runtime": 132.5521, | |
| "eval_samples_per_second": 37.721, | |
| "eval_steps_per_second": 1.184, | |
| "eval_wer": 0.3130529429823337, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 6.460418562329391, | |
| "grad_norm": 1.1290801763534546, | |
| "learning_rate": 2.277949651198059e-05, | |
| "loss": 0.1419, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 6.551410373066424, | |
| "grad_norm": 0.8063333034515381, | |
| "learning_rate": 2.2536851683348503e-05, | |
| "loss": 0.1251, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 6.6424021838034575, | |
| "grad_norm": 0.7427828907966614, | |
| "learning_rate": 2.229420685471641e-05, | |
| "loss": 0.137, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 6.733393994540491, | |
| "grad_norm": 0.6900395154953003, | |
| "learning_rate": 2.2051562026084322e-05, | |
| "loss": 0.1359, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 6.824385805277525, | |
| "grad_norm": 1.7373096942901611, | |
| "learning_rate": 2.180891719745223e-05, | |
| "loss": 0.1266, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 6.824385805277525, | |
| "eval_cer": 0.15423472303840674, | |
| "eval_loss": 0.2094658762216568, | |
| "eval_runtime": 132.3336, | |
| "eval_samples_per_second": 37.783, | |
| "eval_steps_per_second": 1.186, | |
| "eval_wer": 0.310596969104965, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 6.915377616014559, | |
| "grad_norm": 0.7071816921234131, | |
| "learning_rate": 2.156627236882014e-05, | |
| "loss": 0.1267, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 7.006369426751593, | |
| "grad_norm": 1.2711293697357178, | |
| "learning_rate": 2.132362754018805e-05, | |
| "loss": 0.1284, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 7.097361237488626, | |
| "grad_norm": 0.9852485656738281, | |
| "learning_rate": 2.108098271155596e-05, | |
| "loss": 0.1223, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 7.188353048225659, | |
| "grad_norm": 1.4875195026397705, | |
| "learning_rate": 2.0838337882923872e-05, | |
| "loss": 0.1273, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 7.279344858962693, | |
| "grad_norm": 0.8052563071250916, | |
| "learning_rate": 2.0595693054291784e-05, | |
| "loss": 0.1283, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 7.279344858962693, | |
| "eval_cer": 0.1537718457934521, | |
| "eval_loss": 0.2159704566001892, | |
| "eval_runtime": 132.3477, | |
| "eval_samples_per_second": 37.779, | |
| "eval_steps_per_second": 1.186, | |
| "eval_wer": 0.3085317183444503, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 7.370336669699727, | |
| "grad_norm": 2.028435230255127, | |
| "learning_rate": 2.0353048225659692e-05, | |
| "loss": 0.1346, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 7.461328480436761, | |
| "grad_norm": 0.8297127485275269, | |
| "learning_rate": 2.0110403397027603e-05, | |
| "loss": 0.1235, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 7.552320291173794, | |
| "grad_norm": 1.299826741218567, | |
| "learning_rate": 1.986775856839551e-05, | |
| "loss": 0.1243, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 7.643312101910828, | |
| "grad_norm": 2.9055380821228027, | |
| "learning_rate": 1.9625113739763423e-05, | |
| "loss": 0.1199, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 7.734303912647862, | |
| "grad_norm": 1.5928077697753906, | |
| "learning_rate": 1.9382468911131334e-05, | |
| "loss": 0.1229, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 7.734303912647862, | |
| "eval_cer": 0.1537919708910588, | |
| "eval_loss": 0.21750004589557648, | |
| "eval_runtime": 131.9305, | |
| "eval_samples_per_second": 37.899, | |
| "eval_steps_per_second": 1.19, | |
| "eval_wer": 0.30755491055231504, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 7.825295723384896, | |
| "grad_norm": 1.1604926586151123, | |
| "learning_rate": 1.9139824082499242e-05, | |
| "loss": 0.1237, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 7.916287534121929, | |
| "grad_norm": 2.2635440826416016, | |
| "learning_rate": 1.8897179253867153e-05, | |
| "loss": 0.1248, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 8.007279344858963, | |
| "grad_norm": 0.6642023324966431, | |
| "learning_rate": 1.8654534425235065e-05, | |
| "loss": 0.1276, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 8.098271155595997, | |
| "grad_norm": 2.0704445838928223, | |
| "learning_rate": 1.8411889596602973e-05, | |
| "loss": 0.1124, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 8.18926296633303, | |
| "grad_norm": 0.8484503030776978, | |
| "learning_rate": 1.8169244767970884e-05, | |
| "loss": 0.1267, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 8.18926296633303, | |
| "eval_cer": 0.15306746737721677, | |
| "eval_loss": 0.21141663193702698, | |
| "eval_runtime": 132.5307, | |
| "eval_samples_per_second": 37.727, | |
| "eval_steps_per_second": 1.185, | |
| "eval_wer": 0.30568502135022746, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 8.280254777070065, | |
| "grad_norm": 0.8810114860534668, | |
| "learning_rate": 1.7926599939338792e-05, | |
| "loss": 0.1191, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 8.371246587807097, | |
| "grad_norm": 0.343791663646698, | |
| "learning_rate": 1.7686381558993026e-05, | |
| "loss": 0.1135, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 8.46223839854413, | |
| "grad_norm": 1.2659159898757935, | |
| "learning_rate": 1.7443736730360937e-05, | |
| "loss": 0.1215, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 8.553230209281164, | |
| "grad_norm": 2.19396710395813, | |
| "learning_rate": 1.7201091901728845e-05, | |
| "loss": 0.1228, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 8.644222020018198, | |
| "grad_norm": 0.6617141366004944, | |
| "learning_rate": 1.6958447073096757e-05, | |
| "loss": 0.1127, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 8.644222020018198, | |
| "eval_cer": 0.1528179161668934, | |
| "eval_loss": 0.20628662407398224, | |
| "eval_runtime": 131.7465, | |
| "eval_samples_per_second": 37.952, | |
| "eval_steps_per_second": 1.192, | |
| "eval_wer": 0.3068571907007898, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 8.735213830755232, | |
| "grad_norm": 1.5183671712875366, | |
| "learning_rate": 1.6715802244464668e-05, | |
| "loss": 0.1253, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 8.826205641492265, | |
| "grad_norm": 0.6270197629928589, | |
| "learning_rate": 1.6473157415832576e-05, | |
| "loss": 0.1245, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 8.9171974522293, | |
| "grad_norm": 0.7786601185798645, | |
| "learning_rate": 1.6230512587200488e-05, | |
| "loss": 0.1268, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 9.008189262966333, | |
| "grad_norm": 1.2779630422592163, | |
| "learning_rate": 1.5987867758568396e-05, | |
| "loss": 0.1285, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 9.099181073703367, | |
| "grad_norm": 0.8640280365943909, | |
| "learning_rate": 1.5745222929936307e-05, | |
| "loss": 0.1165, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 9.099181073703367, | |
| "eval_cer": 0.15322041811902787, | |
| "eval_loss": 0.20939494669437408, | |
| "eval_runtime": 130.9926, | |
| "eval_samples_per_second": 38.17, | |
| "eval_steps_per_second": 1.199, | |
| "eval_wer": 0.3048477575283972, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 9.1901728844404, | |
| "grad_norm": 0.9638440012931824, | |
| "learning_rate": 1.550257810130422e-05, | |
| "loss": 0.1076, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 9.281164695177434, | |
| "grad_norm": 0.8095070719718933, | |
| "learning_rate": 1.526235972095845e-05, | |
| "loss": 0.1268, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 9.372156505914468, | |
| "grad_norm": 0.8781161308288574, | |
| "learning_rate": 1.5019714892326358e-05, | |
| "loss": 0.1207, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 9.463148316651502, | |
| "grad_norm": 1.6324824094772339, | |
| "learning_rate": 1.4777070063694268e-05, | |
| "loss": 0.1256, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 9.554140127388536, | |
| "grad_norm": 0.8956096172332764, | |
| "learning_rate": 1.453442523506218e-05, | |
| "loss": 0.1222, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 9.554140127388536, | |
| "eval_cer": 0.1532043180409425, | |
| "eval_loss": 0.20789633691310883, | |
| "eval_runtime": 131.5404, | |
| "eval_samples_per_second": 38.011, | |
| "eval_steps_per_second": 1.194, | |
| "eval_wer": 0.3066897379364238, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 9.64513193812557, | |
| "grad_norm": 1.19681978225708, | |
| "learning_rate": 1.4291780406430089e-05, | |
| "loss": 0.1197, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 9.736123748862603, | |
| "grad_norm": 5.549036026000977, | |
| "learning_rate": 1.4049135577797999e-05, | |
| "loss": 0.1128, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 9.827115559599637, | |
| "grad_norm": 0.7990231513977051, | |
| "learning_rate": 1.380649074916591e-05, | |
| "loss": 0.1164, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 9.918107370336669, | |
| "grad_norm": 0.9332329630851746, | |
| "learning_rate": 1.356384592053382e-05, | |
| "loss": 0.1236, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 10.009099181073703, | |
| "grad_norm": 1.4885659217834473, | |
| "learning_rate": 1.332120109190173e-05, | |
| "loss": 0.1127, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 10.009099181073703, | |
| "eval_cer": 0.15307551741625947, | |
| "eval_loss": 0.20891791582107544, | |
| "eval_runtime": 131.2946, | |
| "eval_samples_per_second": 38.082, | |
| "eval_steps_per_second": 1.196, | |
| "eval_wer": 0.30557338617398344, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 10.100090991810736, | |
| "grad_norm": 0.5533010959625244, | |
| "learning_rate": 1.307855626326964e-05, | |
| "loss": 0.1158, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 10.19108280254777, | |
| "grad_norm": 2.3450381755828857, | |
| "learning_rate": 1.283591143463755e-05, | |
| "loss": 0.1136, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 10.282074613284804, | |
| "grad_norm": 0.6852346062660217, | |
| "learning_rate": 1.259326660600546e-05, | |
| "loss": 0.1296, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 10.373066424021838, | |
| "grad_norm": 0.872776985168457, | |
| "learning_rate": 1.235062177737337e-05, | |
| "loss": 0.1069, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 10.464058234758872, | |
| "grad_norm": 0.547275722026825, | |
| "learning_rate": 1.210797694874128e-05, | |
| "loss": 0.1084, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 10.464058234758872, | |
| "eval_cer": 0.15258044001513407, | |
| "eval_loss": 0.2116706520318985, | |
| "eval_runtime": 131.7531, | |
| "eval_samples_per_second": 37.95, | |
| "eval_steps_per_second": 1.192, | |
| "eval_wer": 0.30317322988473666, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 10.555050045495905, | |
| "grad_norm": 1.4432127475738525, | |
| "learning_rate": 1.1865332120109191e-05, | |
| "loss": 0.1237, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 10.646041856232939, | |
| "grad_norm": 0.5424668192863464, | |
| "learning_rate": 1.1622687291477101e-05, | |
| "loss": 0.116, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 10.737033666969973, | |
| "grad_norm": 0.6486382484436035, | |
| "learning_rate": 1.138004246284501e-05, | |
| "loss": 0.1097, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 10.828025477707007, | |
| "grad_norm": 1.37655770778656, | |
| "learning_rate": 1.113739763421292e-05, | |
| "loss": 0.1026, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 10.91901728844404, | |
| "grad_norm": 0.7191163897514343, | |
| "learning_rate": 1.0894752805580833e-05, | |
| "loss": 0.1155, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 10.91901728844404, | |
| "eval_cer": 0.15271326565933846, | |
| "eval_loss": 0.20751111209392548, | |
| "eval_runtime": 131.6857, | |
| "eval_samples_per_second": 37.969, | |
| "eval_steps_per_second": 1.192, | |
| "eval_wer": 0.3045407607937261, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 11.010009099181074, | |
| "grad_norm": 1.8931193351745605, | |
| "learning_rate": 1.0652107976948743e-05, | |
| "loss": 0.1154, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 11.101000909918108, | |
| "grad_norm": 0.43597936630249023, | |
| "learning_rate": 1.0409463148316651e-05, | |
| "loss": 0.0999, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 11.191992720655142, | |
| "grad_norm": 1.1339422464370728, | |
| "learning_rate": 1.0166818319684561e-05, | |
| "loss": 0.1107, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 11.282984531392175, | |
| "grad_norm": 0.9059270620346069, | |
| "learning_rate": 9.924173491052472e-06, | |
| "loss": 0.1045, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 11.373976342129207, | |
| "grad_norm": 0.8777015209197998, | |
| "learning_rate": 9.681528662420384e-06, | |
| "loss": 0.0955, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 11.373976342129207, | |
| "eval_cer": 0.15231076370720398, | |
| "eval_loss": 0.21829599142074585, | |
| "eval_runtime": 131.8689, | |
| "eval_samples_per_second": 37.916, | |
| "eval_steps_per_second": 1.191, | |
| "eval_wer": 0.3025871452094555, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 11.464968152866241, | |
| "grad_norm": 0.7121880650520325, | |
| "learning_rate": 9.438883833788293e-06, | |
| "loss": 0.1068, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 11.555959963603275, | |
| "grad_norm": 1.0068190097808838, | |
| "learning_rate": 9.196239005156203e-06, | |
| "loss": 0.1047, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 11.646951774340309, | |
| "grad_norm": 1.2295094728469849, | |
| "learning_rate": 8.953594176524115e-06, | |
| "loss": 0.1041, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 11.737943585077343, | |
| "grad_norm": 1.1067237854003906, | |
| "learning_rate": 8.710949347892024e-06, | |
| "loss": 0.1071, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 11.828935395814376, | |
| "grad_norm": 0.9286106824874878, | |
| "learning_rate": 8.468304519259934e-06, | |
| "loss": 0.1146, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 11.828935395814376, | |
| "eval_cer": 0.15205316245783793, | |
| "eval_loss": 0.21160683035850525, | |
| "eval_runtime": 131.9777, | |
| "eval_samples_per_second": 37.885, | |
| "eval_steps_per_second": 1.19, | |
| "eval_wer": 0.30147079344701516, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 11.91992720655141, | |
| "grad_norm": 1.0843188762664795, | |
| "learning_rate": 8.225659690627844e-06, | |
| "loss": 0.1099, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 12.010919017288444, | |
| "grad_norm": 0.8679298162460327, | |
| "learning_rate": 7.983014861995755e-06, | |
| "loss": 0.1076, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 12.101910828025478, | |
| "grad_norm": 1.5552619695663452, | |
| "learning_rate": 7.742796481649985e-06, | |
| "loss": 0.1167, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 12.192902638762511, | |
| "grad_norm": 1.5181180238723755, | |
| "learning_rate": 7.500151653017895e-06, | |
| "loss": 0.1096, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 12.283894449499545, | |
| "grad_norm": 0.6448826789855957, | |
| "learning_rate": 7.257506824385806e-06, | |
| "loss": 0.1094, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 12.283894449499545, | |
| "eval_cer": 0.15156613509575523, | |
| "eval_loss": 0.2089649885892868, | |
| "eval_runtime": 131.4101, | |
| "eval_samples_per_second": 38.049, | |
| "eval_steps_per_second": 1.195, | |
| "eval_wer": 0.2992939075102565, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 12.374886260236579, | |
| "grad_norm": 0.68089759349823, | |
| "learning_rate": 7.014861995753716e-06, | |
| "loss": 0.1042, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 12.465878070973613, | |
| "grad_norm": 0.5364871025085449, | |
| "learning_rate": 6.7722171671216266e-06, | |
| "loss": 0.1042, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 12.556869881710647, | |
| "grad_norm": 0.9213688969612122, | |
| "learning_rate": 6.529572338489536e-06, | |
| "loss": 0.1075, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 12.64786169244768, | |
| "grad_norm": 0.8983300924301147, | |
| "learning_rate": 6.286927509857447e-06, | |
| "loss": 0.1085, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 12.738853503184714, | |
| "grad_norm": 1.987417459487915, | |
| "learning_rate": 6.0442826812253566e-06, | |
| "loss": 0.1072, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 12.738853503184714, | |
| "eval_cer": 0.151727135876609, | |
| "eval_loss": 0.2124236822128296, | |
| "eval_runtime": 131.355, | |
| "eval_samples_per_second": 38.065, | |
| "eval_steps_per_second": 1.195, | |
| "eval_wer": 0.3002148977142698, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 12.829845313921748, | |
| "grad_norm": 1.3773497343063354, | |
| "learning_rate": 5.801637852593267e-06, | |
| "loss": 0.1077, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 12.920837124658782, | |
| "grad_norm": 1.0006029605865479, | |
| "learning_rate": 5.558993023961178e-06, | |
| "loss": 0.0996, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 13.011828935395814, | |
| "grad_norm": 1.134149193763733, | |
| "learning_rate": 5.316348195329087e-06, | |
| "loss": 0.1089, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 13.102820746132847, | |
| "grad_norm": 1.6549540758132935, | |
| "learning_rate": 5.073703366696998e-06, | |
| "loss": 0.1143, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 13.193812556869881, | |
| "grad_norm": 0.8590063452720642, | |
| "learning_rate": 4.831058538064908e-06, | |
| "loss": 0.1125, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 13.193812556869881, | |
| "eval_cer": 0.15173116089613034, | |
| "eval_loss": 0.2130936086177826, | |
| "eval_runtime": 131.6874, | |
| "eval_samples_per_second": 37.969, | |
| "eval_steps_per_second": 1.192, | |
| "eval_wer": 0.3000195361558427, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 13.284804367606915, | |
| "grad_norm": 0.8164013028144836, | |
| "learning_rate": 4.588413709432818e-06, | |
| "loss": 0.0982, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 13.375796178343949, | |
| "grad_norm": 0.8457829356193542, | |
| "learning_rate": 4.345768880800728e-06, | |
| "loss": 0.0988, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 13.466787989080983, | |
| "grad_norm": 0.952691912651062, | |
| "learning_rate": 4.1031240521686385e-06, | |
| "loss": 0.1007, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 13.557779799818016, | |
| "grad_norm": 0.4639749825000763, | |
| "learning_rate": 3.860479223536548e-06, | |
| "loss": 0.1088, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 13.64877161055505, | |
| "grad_norm": 1.1107044219970703, | |
| "learning_rate": 3.6178343949044588e-06, | |
| "loss": 0.1058, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 13.64877161055505, | |
| "eval_cer": 0.1514735596467643, | |
| "eval_loss": 0.217019721865654, | |
| "eval_runtime": 132.0061, | |
| "eval_samples_per_second": 37.877, | |
| "eval_steps_per_second": 1.189, | |
| "eval_wer": 0.29923808992213446, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 13.739763421292084, | |
| "grad_norm": 1.299850583076477, | |
| "learning_rate": 3.375189566272369e-06, | |
| "loss": 0.1036, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 13.830755232029118, | |
| "grad_norm": 1.5436201095581055, | |
| "learning_rate": 3.1325447376402795e-06, | |
| "loss": 0.0973, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 13.921747042766151, | |
| "grad_norm": 1.0776885747909546, | |
| "learning_rate": 2.8898999090081896e-06, | |
| "loss": 0.0963, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 14.012738853503185, | |
| "grad_norm": 0.9970951080322266, | |
| "learning_rate": 2.6472550803760997e-06, | |
| "loss": 0.1104, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 14.103730664240219, | |
| "grad_norm": 0.5252935886383057, | |
| "learning_rate": 2.40461025174401e-06, | |
| "loss": 0.0951, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 14.103730664240219, | |
| "eval_cer": 0.15132463392447454, | |
| "eval_loss": 0.21604645252227783, | |
| "eval_runtime": 132.1387, | |
| "eval_samples_per_second": 37.839, | |
| "eval_steps_per_second": 1.188, | |
| "eval_wer": 0.29856827886467024, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 14.194722474977253, | |
| "grad_norm": 1.1974434852600098, | |
| "learning_rate": 2.16196542311192e-06, | |
| "loss": 0.1019, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 14.285714285714286, | |
| "grad_norm": 0.5327410697937012, | |
| "learning_rate": 1.9193205944798306e-06, | |
| "loss": 0.0968, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 14.376706096451318, | |
| "grad_norm": 0.8405203819274902, | |
| "learning_rate": 1.6766757658477407e-06, | |
| "loss": 0.0941, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 14.467697907188352, | |
| "grad_norm": 0.38401368260383606, | |
| "learning_rate": 1.4340309372156508e-06, | |
| "loss": 0.1036, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 14.558689717925386, | |
| "grad_norm": 1.1276684999465942, | |
| "learning_rate": 1.191386108583561e-06, | |
| "loss": 0.1035, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 14.558689717925386, | |
| "eval_cer": 0.15134475902208125, | |
| "eval_loss": 0.21339672803878784, | |
| "eval_runtime": 132.1196, | |
| "eval_samples_per_second": 37.844, | |
| "eval_steps_per_second": 1.188, | |
| "eval_wer": 0.29856827886467024, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 14.64968152866242, | |
| "grad_norm": 0.5266655683517456, | |
| "learning_rate": 9.487412799514711e-07, | |
| "loss": 0.098, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 14.740673339399454, | |
| "grad_norm": 0.8043445348739624, | |
| "learning_rate": 7.060964513193813e-07, | |
| "loss": 0.1005, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 14.831665150136487, | |
| "grad_norm": 1.0907572507858276, | |
| "learning_rate": 4.6345162268729147e-07, | |
| "loss": 0.1023, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 14.922656960873521, | |
| "grad_norm": 2.0936965942382812, | |
| "learning_rate": 2.2080679405520171e-07, | |
| "loss": 0.0963, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "step": 16485, | |
| "total_flos": 1.21495045308783e+20, | |
| "train_loss": 0.13136799893019088, | |
| "train_runtime": 24963.2457, | |
| "train_samples_per_second": 21.117, | |
| "train_steps_per_second": 0.66 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 16485, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 15, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.21495045308783e+20, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |