Invalid JSON: Unexpected token 'N', ..."ad_norm": NaN,
"... is not valid JSON
| { | |
| "best_global_step": 5000, | |
| "best_metric": 0.28820186853408813, | |
| "best_model_checkpoint": "./Wav2vec2-wolof/checkpoint-5000", | |
| "epoch": 3.485254691689008, | |
| "eval_steps": 500, | |
| "global_step": 6500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.013404825737265416, | |
| "grad_norm": 26.66357421875, | |
| "learning_rate": 6.899999999999999e-06, | |
| "loss": 24.1179, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.02680965147453083, | |
| "grad_norm": 30.594707489013672, | |
| "learning_rate": 1.4099999999999999e-05, | |
| "loss": 23.0079, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.040214477211796246, | |
| "grad_norm": 24.911148071289062, | |
| "learning_rate": 2.1599999999999996e-05, | |
| "loss": 14.5492, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.05361930294906166, | |
| "grad_norm": 6.641660690307617, | |
| "learning_rate": 2.91e-05, | |
| "loss": 5.6879, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06702412868632708, | |
| "grad_norm": 2.7079834938049316, | |
| "learning_rate": 3.6599999999999995e-05, | |
| "loss": 4.0839, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.08042895442359249, | |
| "grad_norm": 1.957312822341919, | |
| "learning_rate": 4.4099999999999995e-05, | |
| "loss": 3.6578, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0938337801608579, | |
| "grad_norm": 0.7005499005317688, | |
| "learning_rate": 5.1599999999999994e-05, | |
| "loss": 3.2777, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.10723860589812333, | |
| "grad_norm": 0.8753517270088196, | |
| "learning_rate": 5.91e-05, | |
| "loss": 3.1782, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.12064343163538874, | |
| "grad_norm": 1.0705907344818115, | |
| "learning_rate": 6.659999999999999e-05, | |
| "loss": 3.066, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.13404825737265416, | |
| "grad_norm": 1.0790441036224365, | |
| "learning_rate": 7.41e-05, | |
| "loss": 3.0007, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.14745308310991956, | |
| "grad_norm": 2.6309916973114014, | |
| "learning_rate": 8.16e-05, | |
| "loss": 2.9045, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.16085790884718498, | |
| "grad_norm": 1.937912106513977, | |
| "learning_rate": 8.909999999999998e-05, | |
| "loss": 2.4134, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.1742627345844504, | |
| "grad_norm": 0.9296526908874512, | |
| "learning_rate": 9.659999999999999e-05, | |
| "loss": 1.6152, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.1876675603217158, | |
| "grad_norm": 2.7531306743621826, | |
| "learning_rate": 0.00010409999999999998, | |
| "loss": 1.3614, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.20107238605898123, | |
| "grad_norm": 0.8136234879493713, | |
| "learning_rate": 0.00011159999999999999, | |
| "loss": 1.1165, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.21447721179624665, | |
| "grad_norm": 1.2151700258255005, | |
| "learning_rate": 0.0001191, | |
| "loss": 1.0421, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.22788203753351208, | |
| "grad_norm": 0.6655362248420715, | |
| "learning_rate": 0.0001266, | |
| "loss": 0.7823, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.24128686327077747, | |
| "grad_norm": 1.2359944581985474, | |
| "learning_rate": 0.00013409999999999998, | |
| "loss": 0.8167, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2546916890080429, | |
| "grad_norm": 0.479769766330719, | |
| "learning_rate": 0.00014159999999999997, | |
| "loss": 0.619, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.2680965147453083, | |
| "grad_norm": 1.5369658470153809, | |
| "learning_rate": 0.0001491, | |
| "loss": 0.6813, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2680965147453083, | |
| "eval_cer": 0.1590862737099735, | |
| "eval_loss": 0.46627911925315857, | |
| "eval_runtime": 187.8681, | |
| "eval_samples_per_second": 8.357, | |
| "eval_steps_per_second": 1.049, | |
| "eval_wer": 0.5099869024230518, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.28150134048257375, | |
| "grad_norm": 0.5673221945762634, | |
| "learning_rate": 0.00015659999999999998, | |
| "loss": 0.5007, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.2949061662198391, | |
| "grad_norm": 1.1437309980392456, | |
| "learning_rate": 0.0001641, | |
| "loss": 0.5798, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.30831099195710454, | |
| "grad_norm": 0.846733808517456, | |
| "learning_rate": 0.00017159999999999997, | |
| "loss": 0.4721, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.32171581769436997, | |
| "grad_norm": 1.40889310836792, | |
| "learning_rate": 0.0001791, | |
| "loss": 0.5589, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3351206434316354, | |
| "grad_norm": 0.6701321601867676, | |
| "learning_rate": 0.00018659999999999998, | |
| "loss": 0.4675, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.3485254691689008, | |
| "grad_norm": 1.3316681385040283, | |
| "learning_rate": 0.0001941, | |
| "loss": 0.5296, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.36193029490616624, | |
| "grad_norm": 0.660426676273346, | |
| "learning_rate": 0.0002016, | |
| "loss": 0.4336, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.3753351206434316, | |
| "grad_norm": 2.398401975631714, | |
| "learning_rate": 0.00020909999999999996, | |
| "loss": 0.5475, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.38873994638069703, | |
| "grad_norm": 0.4912319481372833, | |
| "learning_rate": 0.00021659999999999998, | |
| "loss": 0.4247, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.40214477211796246, | |
| "grad_norm": 1.7861497402191162, | |
| "learning_rate": 0.00022409999999999997, | |
| "loss": 0.5189, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.4155495978552279, | |
| "grad_norm": 0.6414456367492676, | |
| "learning_rate": 0.0002316, | |
| "loss": 0.4367, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.4289544235924933, | |
| "grad_norm": 2.092426061630249, | |
| "learning_rate": 0.00023909999999999998, | |
| "loss": 0.4774, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.44235924932975873, | |
| "grad_norm": 1.7996277809143066, | |
| "learning_rate": 0.0002466, | |
| "loss": 0.4069, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.45576407506702415, | |
| "grad_norm": 1.8674991130828857, | |
| "learning_rate": 0.0002541, | |
| "loss": 0.5321, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.4691689008042895, | |
| "grad_norm": 0.6605350375175476, | |
| "learning_rate": 0.00026159999999999996, | |
| "loss": 0.5289, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.48257372654155495, | |
| "grad_norm": 1.1707311868667603, | |
| "learning_rate": 0.0002691, | |
| "loss": 0.5099, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.4959785522788204, | |
| "grad_norm": 0.7791293263435364, | |
| "learning_rate": 0.0002766, | |
| "loss": 0.427, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.5093833780160858, | |
| "grad_norm": 2.19549822807312, | |
| "learning_rate": 0.00028409999999999997, | |
| "loss": 0.5421, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.5227882037533512, | |
| "grad_norm": 0.9283122420310974, | |
| "learning_rate": 0.0002916, | |
| "loss": 0.4518, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.5361930294906166, | |
| "grad_norm": 1.525386929512024, | |
| "learning_rate": 0.00029909999999999995, | |
| "loss": 0.5015, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5361930294906166, | |
| "eval_cer": 0.14878645727105855, | |
| "eval_loss": 0.3607601523399353, | |
| "eval_runtime": 191.1162, | |
| "eval_samples_per_second": 8.215, | |
| "eval_steps_per_second": 1.031, | |
| "eval_wer": 0.4816088190351452, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5495978552278821, | |
| "grad_norm": 1.58902907371521, | |
| "learning_rate": 0.00029994048692515775, | |
| "loss": 0.4443, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.5630026809651475, | |
| "grad_norm": 1.3990167379379272, | |
| "learning_rate": 0.0002998728584310189, | |
| "loss": 0.5085, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.5764075067024129, | |
| "grad_norm": 1.941627025604248, | |
| "learning_rate": 0.00029980522993688, | |
| "loss": 0.4453, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.5898123324396782, | |
| "grad_norm": 1.8028769493103027, | |
| "learning_rate": 0.0002997376014427412, | |
| "loss": 0.5275, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.6032171581769437, | |
| "grad_norm": 0.8877336978912354, | |
| "learning_rate": 0.00029966997294860235, | |
| "loss": 0.4264, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.6166219839142091, | |
| "grad_norm": 1.3580883741378784, | |
| "learning_rate": 0.00029960234445446343, | |
| "loss": 0.5471, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.6300268096514745, | |
| "grad_norm": 0.9151347875595093, | |
| "learning_rate": 0.00029953471596032456, | |
| "loss": 0.4462, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.6434316353887399, | |
| "grad_norm": 1.6365715265274048, | |
| "learning_rate": 0.00029946708746618575, | |
| "loss": 0.4745, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.6568364611260054, | |
| "grad_norm": 0.9017271995544434, | |
| "learning_rate": 0.0002993994589720469, | |
| "loss": 0.4098, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.6702412868632708, | |
| "grad_norm": 1.4687843322753906, | |
| "learning_rate": 0.00029933183047790797, | |
| "loss": 0.4814, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.6836461126005362, | |
| "grad_norm": 0.9523298144340515, | |
| "learning_rate": 0.0002992642019837691, | |
| "loss": 0.4071, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.6970509383378016, | |
| "grad_norm": 1.300267219543457, | |
| "learning_rate": 0.0002991965734896303, | |
| "loss": 0.489, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.710455764075067, | |
| "grad_norm": 0.8380106091499329, | |
| "learning_rate": 0.00029912894499549143, | |
| "loss": 0.4088, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.7238605898123325, | |
| "grad_norm": 1.501035213470459, | |
| "learning_rate": 0.00029906131650135257, | |
| "loss": 0.4985, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.7372654155495979, | |
| "grad_norm": 0.4843454360961914, | |
| "learning_rate": 0.00029899368800721365, | |
| "loss": 0.4098, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.7506702412868632, | |
| "grad_norm": 1.5197869539260864, | |
| "learning_rate": 0.0002989260595130748, | |
| "loss": 0.481, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.7640750670241286, | |
| "grad_norm": 0.7740542888641357, | |
| "learning_rate": 0.000298858431018936, | |
| "loss": 0.4852, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.7774798927613941, | |
| "grad_norm": 1.8375496864318848, | |
| "learning_rate": 0.0002987908025247971, | |
| "loss": 0.4678, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.7908847184986595, | |
| "grad_norm": 0.48813843727111816, | |
| "learning_rate": 0.00029872317403065824, | |
| "loss": 0.4322, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.8042895442359249, | |
| "grad_norm": 1.3342444896697998, | |
| "learning_rate": 0.0002986555455365193, | |
| "loss": 0.4424, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.8042895442359249, | |
| "eval_cer": 0.14556847281709612, | |
| "eval_loss": 0.3339381515979767, | |
| "eval_runtime": 191.3899, | |
| "eval_samples_per_second": 8.203, | |
| "eval_steps_per_second": 1.029, | |
| "eval_wer": 0.4802444881030343, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.8176943699731903, | |
| "grad_norm": 0.9165742993354797, | |
| "learning_rate": 0.0002985879170423805, | |
| "loss": 0.4495, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.8310991957104558, | |
| "grad_norm": 1.5298058986663818, | |
| "learning_rate": 0.00029852028854824165, | |
| "loss": 0.4839, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.8445040214477212, | |
| "grad_norm": 0.5936838984489441, | |
| "learning_rate": 0.0002984526600541028, | |
| "loss": 0.446, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.8579088471849866, | |
| "grad_norm": 2.4456019401550293, | |
| "learning_rate": 0.00029838503155996387, | |
| "loss": 0.4568, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.871313672922252, | |
| "grad_norm": 0.5276655554771423, | |
| "learning_rate": 0.00029831740306582506, | |
| "loss": 0.4752, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.8847184986595175, | |
| "grad_norm": 8.446830749511719, | |
| "learning_rate": 0.0002982497745716862, | |
| "loss": 0.4943, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.8981233243967829, | |
| "grad_norm": 0.7062143683433533, | |
| "learning_rate": 0.0002981848512173129, | |
| "loss": 0.8819, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.9115281501340483, | |
| "grad_norm": 2.223888397216797, | |
| "learning_rate": 0.000298117222723174, | |
| "loss": 0.4901, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.9249329758713136, | |
| "grad_norm": 0.7132174372673035, | |
| "learning_rate": 0.0002980495942290351, | |
| "loss": 0.4252, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.938337801608579, | |
| "grad_norm": 1.3910300731658936, | |
| "learning_rate": 0.0002979819657348963, | |
| "loss": 0.6194, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.9517426273458445, | |
| "grad_norm": 1.0311365127563477, | |
| "learning_rate": 0.00029791433724075744, | |
| "loss": 0.4023, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.9651474530831099, | |
| "grad_norm": 1.4001951217651367, | |
| "learning_rate": 0.00029784670874661857, | |
| "loss": 0.4346, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.9785522788203753, | |
| "grad_norm": 0.7526248097419739, | |
| "learning_rate": 0.00029777908025247965, | |
| "loss": 0.401, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.9919571045576407, | |
| "grad_norm": NaN, | |
| "learning_rate": 0.00029771145175834084, | |
| "loss": 0.4831, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.0053619302949062, | |
| "grad_norm": 1.6143497228622437, | |
| "learning_rate": 0.00029764652840396754, | |
| "loss": 0.4327, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 1.0187667560321716, | |
| "grad_norm": 0.9131807088851929, | |
| "learning_rate": 0.0002975788999098286, | |
| "loss": 0.3143, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.032171581769437, | |
| "grad_norm": 0.5591608285903931, | |
| "learning_rate": 0.00029751127141568976, | |
| "loss": 0.4522, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 1.0455764075067024, | |
| "grad_norm": 0.6661513447761536, | |
| "learning_rate": 0.0002974436429215509, | |
| "loss": 0.3374, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.0589812332439679, | |
| "grad_norm": 0.5489794015884399, | |
| "learning_rate": 0.0002973760144274121, | |
| "loss": 0.4572, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 1.0723860589812333, | |
| "grad_norm": 0.7652894258499146, | |
| "learning_rate": 0.0002973083859332732, | |
| "loss": 0.3254, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.0723860589812333, | |
| "eval_cer": 0.14708681759466993, | |
| "eval_loss": 0.3146507740020752, | |
| "eval_runtime": 190.0206, | |
| "eval_samples_per_second": 8.262, | |
| "eval_steps_per_second": 1.037, | |
| "eval_wer": 0.47920759659462997, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.0857908847184987, | |
| "grad_norm": 0.4912964105606079, | |
| "learning_rate": 0.0002972407574391343, | |
| "loss": 0.4793, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 1.0991957104557641, | |
| "grad_norm": 0.5184613466262817, | |
| "learning_rate": 0.00029717312894499544, | |
| "loss": 0.3278, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.1126005361930296, | |
| "grad_norm": 0.3988397717475891, | |
| "learning_rate": 0.0002971055004508566, | |
| "loss": 0.4684, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 1.126005361930295, | |
| "grad_norm": 1.1345094442367554, | |
| "learning_rate": 0.00029703787195671776, | |
| "loss": 0.3179, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.1394101876675604, | |
| "grad_norm": 0.4202677309513092, | |
| "learning_rate": 0.0002969702434625789, | |
| "loss": 0.4557, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 1.1528150134048256, | |
| "grad_norm": 0.757359504699707, | |
| "learning_rate": 0.00029690261496844, | |
| "loss": 0.3207, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.1662198391420913, | |
| "grad_norm": 1.6614487171173096, | |
| "learning_rate": 0.0002968376916140667, | |
| "loss": 0.494, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 1.1796246648793565, | |
| "grad_norm": 0.7348321080207825, | |
| "learning_rate": 0.00029677006311992787, | |
| "loss": 0.3561, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.193029490616622, | |
| "grad_norm": 0.577582061290741, | |
| "learning_rate": 0.00029670243462578895, | |
| "loss": 0.454, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 1.2064343163538873, | |
| "grad_norm": 0.6139086484909058, | |
| "learning_rate": 0.0002966348061316501, | |
| "loss": 0.319, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.2198391420911527, | |
| "grad_norm": 0.7023029923439026, | |
| "learning_rate": 0.0002965671776375112, | |
| "loss": 0.4586, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 1.2332439678284182, | |
| "grad_norm": 0.9260369539260864, | |
| "learning_rate": 0.0002964995491433724, | |
| "loss": 0.3373, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.2466487935656836, | |
| "grad_norm": 0.42650407552719116, | |
| "learning_rate": 0.00029643192064923355, | |
| "loss": 0.4738, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 1.260053619302949, | |
| "grad_norm": 0.5607989430427551, | |
| "learning_rate": 0.00029636429215509463, | |
| "loss": 0.328, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.2734584450402144, | |
| "grad_norm": 0.6092125773429871, | |
| "learning_rate": 0.00029629666366095576, | |
| "loss": 0.4625, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 1.2868632707774799, | |
| "grad_norm": 0.9334998726844788, | |
| "learning_rate": 0.00029622903516681695, | |
| "loss": 0.3387, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.3002680965147453, | |
| "grad_norm": 0.49639058113098145, | |
| "learning_rate": 0.0002961614066726781, | |
| "loss": 0.4863, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 1.3136729222520107, | |
| "grad_norm": 0.6151789426803589, | |
| "learning_rate": 0.0002960937781785392, | |
| "loss": 0.323, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.3270777479892761, | |
| "grad_norm": 0.5085999965667725, | |
| "learning_rate": 0.0002960261496844003, | |
| "loss": 0.4538, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 1.3404825737265416, | |
| "grad_norm": 1.1368881464004517, | |
| "learning_rate": 0.00029595852119026144, | |
| "loss": 0.5671, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.3404825737265416, | |
| "eval_cer": 0.1469735082829107, | |
| "eval_loss": 0.3150199055671692, | |
| "eval_runtime": 189.299, | |
| "eval_samples_per_second": 8.294, | |
| "eval_steps_per_second": 1.041, | |
| "eval_wer": 0.4757149094084261, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.353887399463807, | |
| "grad_norm": 0.5132752060890198, | |
| "learning_rate": 0.00029589089269612263, | |
| "loss": 0.452, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 1.3672922252010724, | |
| "grad_norm": 0.8539701700210571, | |
| "learning_rate": 0.00029582326420198377, | |
| "loss": 0.3149, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.3806970509383378, | |
| "grad_norm": 1.3219923973083496, | |
| "learning_rate": 0.00029575563570784485, | |
| "loss": 0.4735, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 1.3941018766756033, | |
| "grad_norm": 1.1187196969985962, | |
| "learning_rate": 0.000295688007213706, | |
| "loss": 0.3357, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.4075067024128687, | |
| "grad_norm": 0.5335268378257751, | |
| "learning_rate": 0.0002956203787195672, | |
| "loss": 0.4755, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 1.420911528150134, | |
| "grad_norm": 1.1260613203048706, | |
| "learning_rate": 0.0002955527502254283, | |
| "loss": 0.3312, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.4343163538873995, | |
| "grad_norm": 0.4925306737422943, | |
| "learning_rate": 0.00029548512173128944, | |
| "loss": 0.4918, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 1.447721179624665, | |
| "grad_norm": 0.8351105451583862, | |
| "learning_rate": 0.0002954174932371505, | |
| "loss": 0.3408, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.4611260053619302, | |
| "grad_norm": 0.45573753118515015, | |
| "learning_rate": 0.0002953498647430117, | |
| "loss": 0.4778, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 1.4745308310991958, | |
| "grad_norm": 0.9699208736419678, | |
| "learning_rate": 0.00029528223624887285, | |
| "loss": 0.3117, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.487935656836461, | |
| "grad_norm": 2.6216440200805664, | |
| "learning_rate": 0.000295214607754734, | |
| "loss": 0.5113, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 1.5013404825737267, | |
| "grad_norm": 0.6581635475158691, | |
| "learning_rate": 0.0002951469792605951, | |
| "loss": 0.3661, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.5147453083109919, | |
| "grad_norm": 0.4799867272377014, | |
| "learning_rate": 0.00029507935076645626, | |
| "loss": 0.492, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 1.5281501340482575, | |
| "grad_norm": 0.5858296751976013, | |
| "learning_rate": 0.0002950117222723174, | |
| "loss": 0.3361, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.5415549597855227, | |
| "grad_norm": 0.5094213485717773, | |
| "learning_rate": 0.00029494409377817853, | |
| "loss": 0.4414, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 1.5549597855227884, | |
| "grad_norm": 0.852539598941803, | |
| "learning_rate": 0.00029487646528403966, | |
| "loss": 0.3532, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.5683646112600536, | |
| "grad_norm": 2.0929360389709473, | |
| "learning_rate": 0.00029480883678990074, | |
| "loss": 0.4614, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 1.5817694369973192, | |
| "grad_norm": 1.2485980987548828, | |
| "learning_rate": 0.00029474120829576193, | |
| "loss": 0.3141, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.5951742627345844, | |
| "grad_norm": 0.5923244953155518, | |
| "learning_rate": 0.00029467357980162307, | |
| "loss": 0.4607, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 1.6085790884718498, | |
| "grad_norm": 0.7467523813247681, | |
| "learning_rate": 0.0002946059513074842, | |
| "loss": 0.3204, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.6085790884718498, | |
| "eval_cer": 0.14647494731117003, | |
| "eval_loss": 0.30716007947921753, | |
| "eval_runtime": 192.6644, | |
| "eval_samples_per_second": 8.149, | |
| "eval_steps_per_second": 1.023, | |
| "eval_wer": 0.46922069417157825, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.6219839142091153, | |
| "grad_norm": 0.6438741683959961, | |
| "learning_rate": 0.00029453832281334534, | |
| "loss": 0.4795, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 1.6353887399463807, | |
| "grad_norm": 0.6687526702880859, | |
| "learning_rate": 0.0002944706943192065, | |
| "loss": 0.3193, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.648793565683646, | |
| "grad_norm": 0.4715401232242584, | |
| "learning_rate": 0.0002944030658250676, | |
| "loss": 0.4776, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 1.6621983914209115, | |
| "grad_norm": 0.8782249093055725, | |
| "learning_rate": 0.00029433543733092875, | |
| "loss": 0.33, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.675603217158177, | |
| "grad_norm": 0.551409900188446, | |
| "learning_rate": 0.0002942678088367899, | |
| "loss": 0.4803, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 1.6890080428954424, | |
| "grad_norm": 0.5042707324028015, | |
| "learning_rate": 0.000294200180342651, | |
| "loss": 0.2894, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.7024128686327078, | |
| "grad_norm": 0.47696781158447266, | |
| "learning_rate": 0.00029413255184851215, | |
| "loss": 0.4747, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 1.7158176943699732, | |
| "grad_norm": 0.8431724905967712, | |
| "learning_rate": 0.0002940649233543733, | |
| "loss": 0.3053, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.7292225201072386, | |
| "grad_norm": 0.71580570936203, | |
| "learning_rate": 0.0002939972948602344, | |
| "loss": 0.4638, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 1.742627345844504, | |
| "grad_norm": 0.7655317187309265, | |
| "learning_rate": 0.00029392966636609556, | |
| "loss": 0.3156, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.7560321715817695, | |
| "grad_norm": 0.5282989144325256, | |
| "learning_rate": 0.0002938620378719567, | |
| "loss": 0.5191, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 1.7694369973190347, | |
| "grad_norm": 0.6365646123886108, | |
| "learning_rate": 0.00029379440937781783, | |
| "loss": 0.3131, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.7828418230563003, | |
| "grad_norm": 0.427397221326828, | |
| "learning_rate": 0.00029372678088367897, | |
| "loss": 0.4547, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 1.7962466487935655, | |
| "grad_norm": 1.0049763917922974, | |
| "learning_rate": 0.0002936591523895401, | |
| "loss": 0.3297, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.8096514745308312, | |
| "grad_norm": 0.541320264339447, | |
| "learning_rate": 0.00029359152389540124, | |
| "loss": 0.4603, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 1.8230563002680964, | |
| "grad_norm": 0.7566213607788086, | |
| "learning_rate": 0.0002935238954012624, | |
| "loss": 0.3123, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.836461126005362, | |
| "grad_norm": 0.46456801891326904, | |
| "learning_rate": 0.0002934562669071235, | |
| "loss": 0.4906, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 1.8498659517426272, | |
| "grad_norm": 0.9275864362716675, | |
| "learning_rate": 0.00029338863841298464, | |
| "loss": 0.3128, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.863270777479893, | |
| "grad_norm": 0.7383331060409546, | |
| "learning_rate": 0.0002933210099188458, | |
| "loss": 0.4679, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 1.876675603217158, | |
| "grad_norm": 0.607969343662262, | |
| "learning_rate": 0.0002932533814247069, | |
| "loss": 0.32, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.876675603217158, | |
| "eval_cer": 0.1412287261767172, | |
| "eval_loss": 0.29745474457740784, | |
| "eval_runtime": 194.0942, | |
| "eval_samples_per_second": 8.089, | |
| "eval_steps_per_second": 1.015, | |
| "eval_wer": 0.4685112420868806, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.8900804289544237, | |
| "grad_norm": 0.6645176410675049, | |
| "learning_rate": 0.00029318575293056805, | |
| "loss": 0.4393, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 1.903485254691689, | |
| "grad_norm": 0.4631984531879425, | |
| "learning_rate": 0.0002931181244364292, | |
| "loss": 0.3076, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.9168900804289544, | |
| "grad_norm": 0.5980284810066223, | |
| "learning_rate": 0.0002930504959422903, | |
| "loss": 0.4694, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 1.9302949061662198, | |
| "grad_norm": 0.504612922668457, | |
| "learning_rate": 0.00029298286744815146, | |
| "loss": 0.322, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.9436997319034852, | |
| "grad_norm": 0.43368223309516907, | |
| "learning_rate": 0.0002929152389540126, | |
| "loss": 0.4843, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 1.9571045576407506, | |
| "grad_norm": 0.9730172753334045, | |
| "learning_rate": 0.00029284761045987373, | |
| "loss": 0.3064, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.970509383378016, | |
| "grad_norm": 0.42133307456970215, | |
| "learning_rate": 0.00029277998196573486, | |
| "loss": 0.4305, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 1.9839142091152815, | |
| "grad_norm": 0.8820632100105286, | |
| "learning_rate": 0.000292712353471596, | |
| "loss": 0.3189, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.997319034852547, | |
| "grad_norm": 0.5759428143501282, | |
| "learning_rate": 0.00029264472497745713, | |
| "loss": 0.4366, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 2.0107238605898123, | |
| "grad_norm": 1.1622861623764038, | |
| "learning_rate": 0.00029257709648331827, | |
| "loss": 0.385, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.0241286863270775, | |
| "grad_norm": 0.8884519934654236, | |
| "learning_rate": 0.0002925094679891794, | |
| "loss": 0.3082, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 2.037533512064343, | |
| "grad_norm": 0.42693793773651123, | |
| "learning_rate": 0.00029244183949504054, | |
| "loss": 0.3979, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.0509383378016084, | |
| "grad_norm": 0.8100738525390625, | |
| "learning_rate": 0.0002923742110009017, | |
| "loss": 0.3165, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 2.064343163538874, | |
| "grad_norm": 0.5522972941398621, | |
| "learning_rate": 0.0002923065825067628, | |
| "loss": 0.3963, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.0777479892761392, | |
| "grad_norm": 0.6663551330566406, | |
| "learning_rate": 0.00029223895401262395, | |
| "loss": 0.2666, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 2.091152815013405, | |
| "grad_norm": 0.5006572604179382, | |
| "learning_rate": 0.00029217132551848514, | |
| "loss": 0.3829, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.10455764075067, | |
| "grad_norm": 1.3403582572937012, | |
| "learning_rate": 0.0002921036970243462, | |
| "loss": 0.3436, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 2.1179624664879357, | |
| "grad_norm": 0.798907995223999, | |
| "learning_rate": 0.00029203606853020735, | |
| "loss": 0.3897, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.131367292225201, | |
| "grad_norm": 0.6176706552505493, | |
| "learning_rate": 0.0002919684400360685, | |
| "loss": 0.2982, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 2.1447721179624666, | |
| "grad_norm": 1.0198493003845215, | |
| "learning_rate": 0.0002919035166816952, | |
| "loss": 0.3854, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.1447721179624666, | |
| "eval_cer": 0.14156865411199493, | |
| "eval_loss": 0.2999935746192932, | |
| "eval_runtime": 193.6993, | |
| "eval_samples_per_second": 8.105, | |
| "eval_steps_per_second": 1.017, | |
| "eval_wer": 0.4663828858327876, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.158176943699732, | |
| "grad_norm": 0.8894768357276917, | |
| "learning_rate": 0.0002918358881875563, | |
| "loss": 0.3069, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 2.1715817694369974, | |
| "grad_norm": 0.5018978118896484, | |
| "learning_rate": 0.00029176825969341746, | |
| "loss": 0.3727, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.1849865951742626, | |
| "grad_norm": 1.342336654663086, | |
| "learning_rate": 0.0002917006311992786, | |
| "loss": 0.2903, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 2.1983914209115283, | |
| "grad_norm": 1.140587329864502, | |
| "learning_rate": 0.00029163300270513973, | |
| "loss": 0.5605, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.2117962466487935, | |
| "grad_norm": 0.8588898181915283, | |
| "learning_rate": 0.00029156537421100087, | |
| "loss": 0.31, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 2.225201072386059, | |
| "grad_norm": 0.5349767804145813, | |
| "learning_rate": 0.000291497745716862, | |
| "loss": 0.413, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.2386058981233243, | |
| "grad_norm": 1.0299067497253418, | |
| "learning_rate": 0.00029143011722272314, | |
| "loss": 0.3054, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 2.25201072386059, | |
| "grad_norm": 1.3161804676055908, | |
| "learning_rate": 0.0002913624887285843, | |
| "loss": 0.4185, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.265415549597855, | |
| "grad_norm": 1.2179690599441528, | |
| "learning_rate": 0.00029129486023444546, | |
| "loss": 0.3208, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 2.278820375335121, | |
| "grad_norm": 0.7258560061454773, | |
| "learning_rate": 0.00029122723174030655, | |
| "loss": 0.4248, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.292225201072386, | |
| "grad_norm": 0.7003055810928345, | |
| "learning_rate": 0.0002911596032461677, | |
| "loss": 0.3122, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 2.3056300268096512, | |
| "grad_norm": 0.7047171592712402, | |
| "learning_rate": 0.0002910919747520288, | |
| "loss": 0.4407, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.319034852546917, | |
| "grad_norm": 0.8658078908920288, | |
| "learning_rate": 0.00029102434625788995, | |
| "loss": 0.3302, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 2.3324396782841825, | |
| "grad_norm": 0.7273276448249817, | |
| "learning_rate": 0.0002909567177637511, | |
| "loss": 0.4077, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.3458445040214477, | |
| "grad_norm": 0.8991754651069641, | |
| "learning_rate": 0.0002908890892696122, | |
| "loss": 0.3087, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 2.359249329758713, | |
| "grad_norm": 2.1196842193603516, | |
| "learning_rate": 0.00029082146077547336, | |
| "loss": 0.3878, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.3726541554959786, | |
| "grad_norm": 1.3642330169677734, | |
| "learning_rate": 0.0002907538322813345, | |
| "loss": 0.3097, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 2.386058981233244, | |
| "grad_norm": 0.4507950246334076, | |
| "learning_rate": 0.0002906862037871957, | |
| "loss": 0.3878, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 2.3994638069705094, | |
| "grad_norm": 0.6684398055076599, | |
| "learning_rate": 0.00029061857529305677, | |
| "loss": 0.2896, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 2.4128686327077746, | |
| "grad_norm": 2.0050833225250244, | |
| "learning_rate": 0.0002905509467989179, | |
| "loss": 0.3809, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.4128686327077746, | |
| "eval_cer": 0.13980102884855078, | |
| "eval_loss": 0.2892570197582245, | |
| "eval_runtime": 193.0289, | |
| "eval_samples_per_second": 8.133, | |
| "eval_steps_per_second": 1.021, | |
| "eval_wer": 0.46065269591792185, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.4262734584450403, | |
| "grad_norm": 0.7828475832939148, | |
| "learning_rate": 0.00029048331830477904, | |
| "loss": 0.3337, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 2.4396782841823055, | |
| "grad_norm": 0.5278825759887695, | |
| "learning_rate": 0.0002904156898106402, | |
| "loss": 0.3781, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 2.453083109919571, | |
| "grad_norm": 1.2074604034423828, | |
| "learning_rate": 0.00029034806131650136, | |
| "loss": 0.332, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 2.4664879356568363, | |
| "grad_norm": 0.5711201429367065, | |
| "learning_rate": 0.00029028043282236244, | |
| "loss": 0.43, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.479892761394102, | |
| "grad_norm": 1.5318876504898071, | |
| "learning_rate": 0.0002902128043282236, | |
| "loss": 0.3717, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 2.493297587131367, | |
| "grad_norm": 0.6812917590141296, | |
| "learning_rate": 0.0002901451758340847, | |
| "loss": 0.3999, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 2.506702412868633, | |
| "grad_norm": 1.1768240928649902, | |
| "learning_rate": 0.0002900775473399459, | |
| "loss": 0.3182, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 2.520107238605898, | |
| "grad_norm": 2.612589120864868, | |
| "learning_rate": 0.000290009918845807, | |
| "loss": 0.3892, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.5335120643431637, | |
| "grad_norm": 0.8447991609573364, | |
| "learning_rate": 0.0002899422903516681, | |
| "loss": 0.3011, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 2.546916890080429, | |
| "grad_norm": 0.5008765459060669, | |
| "learning_rate": 0.00028987466185752926, | |
| "loss": 0.4069, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 2.5603217158176945, | |
| "grad_norm": 0.8445732593536377, | |
| "learning_rate": 0.00028980703336339045, | |
| "loss": 0.3358, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 2.5737265415549597, | |
| "grad_norm": 0.5978402495384216, | |
| "learning_rate": 0.0002897394048692516, | |
| "loss": 0.4049, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.5871313672922254, | |
| "grad_norm": 0.932671844959259, | |
| "learning_rate": 0.00028967177637511266, | |
| "loss": 0.2997, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 2.6005361930294906, | |
| "grad_norm": 0.38006141781806946, | |
| "learning_rate": 0.0002896041478809738, | |
| "loss": 0.3849, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 2.6139410187667558, | |
| "grad_norm": 0.8640321493148804, | |
| "learning_rate": 0.000289536519386835, | |
| "loss": 0.2714, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 2.6273458445040214, | |
| "grad_norm": 0.5667803883552551, | |
| "learning_rate": 0.0002894688908926961, | |
| "loss": 0.3881, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 2.640750670241287, | |
| "grad_norm": 0.8906360864639282, | |
| "learning_rate": 0.00028940126239855726, | |
| "loss": 0.3157, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 2.6541554959785523, | |
| "grad_norm": 0.6210130453109741, | |
| "learning_rate": 0.00028933363390441834, | |
| "loss": 0.4235, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 2.6675603217158175, | |
| "grad_norm": 0.8941358327865601, | |
| "learning_rate": 0.00028926600541027953, | |
| "loss": 0.3045, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 2.680965147453083, | |
| "grad_norm": 0.7417230606079102, | |
| "learning_rate": 0.00028919837691614066, | |
| "loss": 0.3977, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.680965147453083, | |
| "eval_cer": 0.1418859201849208, | |
| "eval_loss": 0.28820186853408813, | |
| "eval_runtime": 192.4195, | |
| "eval_samples_per_second": 8.159, | |
| "eval_steps_per_second": 1.024, | |
| "eval_wer": 0.46436367605326345, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.6943699731903488, | |
| "grad_norm": 0.9162290692329407, | |
| "learning_rate": 0.0002891307484220018, | |
| "loss": 0.3241, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 2.707774798927614, | |
| "grad_norm": 0.48595941066741943, | |
| "learning_rate": 0.0002890631199278629, | |
| "loss": 0.4285, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 2.721179624664879, | |
| "grad_norm": 0.8688729405403137, | |
| "learning_rate": 0.000288995491433724, | |
| "loss": 0.3234, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 2.734584450402145, | |
| "grad_norm": 1.4491957426071167, | |
| "learning_rate": 0.0002889278629395852, | |
| "loss": 0.4005, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 2.7479892761394105, | |
| "grad_norm": 0.6688870191574097, | |
| "learning_rate": 0.00028886023444544634, | |
| "loss": 0.3033, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 2.7613941018766757, | |
| "grad_norm": 0.6579515933990479, | |
| "learning_rate": 0.0002887926059513075, | |
| "loss": 0.4209, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 2.774798927613941, | |
| "grad_norm": 0.7731506824493408, | |
| "learning_rate": 0.00028872497745716856, | |
| "loss": 0.3119, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 2.7882037533512065, | |
| "grad_norm": 0.4523961842060089, | |
| "learning_rate": 0.00028865734896302975, | |
| "loss": 0.4168, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 2.8016085790884717, | |
| "grad_norm": 0.9980618953704834, | |
| "learning_rate": 0.0002885897204688909, | |
| "loss": 0.2943, | |
| "step": 5225 | |
| }, | |
| { | |
| "epoch": 2.8150134048257374, | |
| "grad_norm": 1.1214523315429688, | |
| "learning_rate": 0.000288522091974752, | |
| "loss": 0.4058, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 2.8284182305630026, | |
| "grad_norm": 3.955530881881714, | |
| "learning_rate": 0.00028845446348061316, | |
| "loss": 0.2864, | |
| "step": 5275 | |
| }, | |
| { | |
| "epoch": 2.841823056300268, | |
| "grad_norm": 0.5203446745872498, | |
| "learning_rate": 0.0002883868349864743, | |
| "loss": 0.4013, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 2.8552278820375334, | |
| "grad_norm": 1.0400564670562744, | |
| "learning_rate": 0.0002883192064923354, | |
| "loss": 0.2924, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 2.868632707774799, | |
| "grad_norm": 0.5297791361808777, | |
| "learning_rate": 0.00028825157799819656, | |
| "loss": 0.4127, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 2.8820375335120643, | |
| "grad_norm": 2.9058032035827637, | |
| "learning_rate": 0.0002881839495040577, | |
| "loss": 0.3282, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 2.89544235924933, | |
| "grad_norm": 0.5886743664741516, | |
| "learning_rate": 0.0002881163210099188, | |
| "loss": 0.4285, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.908847184986595, | |
| "grad_norm": 1.0201635360717773, | |
| "learning_rate": 0.00028804869251577997, | |
| "loss": 0.3718, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 2.9222520107238603, | |
| "grad_norm": 0.44661447405815125, | |
| "learning_rate": 0.0002879810640216411, | |
| "loss": 0.4447, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 2.935656836461126, | |
| "grad_norm": 0.9066615104675293, | |
| "learning_rate": 0.00028791343552750224, | |
| "loss": 0.2977, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 2.9490616621983916, | |
| "grad_norm": 0.4934927523136139, | |
| "learning_rate": 0.0002878458070333634, | |
| "loss": 0.3966, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.9490616621983916, | |
| "eval_cer": 0.14172728714845786, | |
| "eval_loss": 0.30226650834083557, | |
| "eval_runtime": 191.4811, | |
| "eval_samples_per_second": 8.199, | |
| "eval_steps_per_second": 1.029, | |
| "eval_wer": 0.46567343374808995, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.962466487935657, | |
| "grad_norm": 2.8394415378570557, | |
| "learning_rate": 0.0002877781785392245, | |
| "loss": 0.2746, | |
| "step": 5525 | |
| }, | |
| { | |
| "epoch": 2.975871313672922, | |
| "grad_norm": 0.5205143094062805, | |
| "learning_rate": 0.00028771055004508565, | |
| "loss": 0.3791, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 2.9892761394101877, | |
| "grad_norm": 1.6631228923797607, | |
| "learning_rate": 0.0002876429215509468, | |
| "loss": 0.3137, | |
| "step": 5575 | |
| }, | |
| { | |
| "epoch": 3.002680965147453, | |
| "grad_norm": 0.5890147089958191, | |
| "learning_rate": 0.0002875752930568079, | |
| "loss": 0.3853, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 3.0160857908847185, | |
| "grad_norm": 0.6414802670478821, | |
| "learning_rate": 0.00028750766456266905, | |
| "loss": 0.2758, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 3.0294906166219837, | |
| "grad_norm": 0.44740453362464905, | |
| "learning_rate": 0.0002874400360685302, | |
| "loss": 0.3238, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 3.0428954423592494, | |
| "grad_norm": 0.456421822309494, | |
| "learning_rate": 0.0002873724075743913, | |
| "loss": 0.309, | |
| "step": 5675 | |
| }, | |
| { | |
| "epoch": 3.0563002680965146, | |
| "grad_norm": 0.36823776364326477, | |
| "learning_rate": 0.00028730477908025246, | |
| "loss": 0.3289, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 3.06970509383378, | |
| "grad_norm": 0.5027665495872498, | |
| "learning_rate": 0.0002872371505861136, | |
| "loss": 0.2792, | |
| "step": 5725 | |
| }, | |
| { | |
| "epoch": 3.0831099195710454, | |
| "grad_norm": 0.5492646098136902, | |
| "learning_rate": 0.00028716952209197473, | |
| "loss": 0.3331, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 3.096514745308311, | |
| "grad_norm": 0.42318084836006165, | |
| "learning_rate": 0.00028710189359783586, | |
| "loss": 0.2944, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 3.1099195710455763, | |
| "grad_norm": 0.42389240860939026, | |
| "learning_rate": 0.000287034265103697, | |
| "loss": 0.3291, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 3.123324396782842, | |
| "grad_norm": 0.8874741196632385, | |
| "learning_rate": 0.00028696663660955814, | |
| "loss": 0.2894, | |
| "step": 5825 | |
| }, | |
| { | |
| "epoch": 3.136729222520107, | |
| "grad_norm": 0.5217534303665161, | |
| "learning_rate": 0.00028689900811541927, | |
| "loss": 0.3404, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 3.1501340482573728, | |
| "grad_norm": 0.5872902274131775, | |
| "learning_rate": 0.0002868313796212804, | |
| "loss": 0.2703, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 3.163538873994638, | |
| "grad_norm": 0.6532200574874878, | |
| "learning_rate": 0.00028676375112714154, | |
| "loss": 0.366, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 3.1769436997319036, | |
| "grad_norm": 0.6171639561653137, | |
| "learning_rate": 0.0002866961226330027, | |
| "loss": 0.3054, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 3.190348525469169, | |
| "grad_norm": 0.5158005356788635, | |
| "learning_rate": 0.0002866284941388638, | |
| "loss": 0.355, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 3.2037533512064345, | |
| "grad_norm": 0.47394490242004395, | |
| "learning_rate": 0.00028656086564472495, | |
| "loss": 0.3056, | |
| "step": 5975 | |
| }, | |
| { | |
| "epoch": 3.2171581769436997, | |
| "grad_norm": 0.49569785594940186, | |
| "learning_rate": 0.0002864932371505861, | |
| "loss": 0.3585, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.2171581769436997, | |
| "eval_cer": 0.14582908423414237, | |
| "eval_loss": 0.3506743311882019, | |
| "eval_runtime": 192.8218, | |
| "eval_samples_per_second": 8.142, | |
| "eval_steps_per_second": 1.022, | |
| "eval_wer": 0.4764243614931238, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.2305630026809653, | |
| "grad_norm": 0.5296260714530945, | |
| "learning_rate": 0.0002864256086564472, | |
| "loss": 0.3262, | |
| "step": 6025 | |
| }, | |
| { | |
| "epoch": 3.2439678284182305, | |
| "grad_norm": 0.38092923164367676, | |
| "learning_rate": 0.0002863606853020739, | |
| "loss": 0.3516, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 3.257372654155496, | |
| "grad_norm": 0.7664552927017212, | |
| "learning_rate": 0.00028629305680793506, | |
| "loss": 0.3316, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 3.2707774798927614, | |
| "grad_norm": 0.5759456753730774, | |
| "learning_rate": 0.0002862254283137962, | |
| "loss": 0.3403, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 3.284182305630027, | |
| "grad_norm": 1.9567292928695679, | |
| "learning_rate": 0.00028615779981965733, | |
| "loss": 0.3166, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 3.297587131367292, | |
| "grad_norm": 0.49598929286003113, | |
| "learning_rate": 0.00028609017132551846, | |
| "loss": 0.3824, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 3.310991957104558, | |
| "grad_norm": 0.5681086778640747, | |
| "learning_rate": 0.0002860225428313796, | |
| "loss": 0.3187, | |
| "step": 6175 | |
| }, | |
| { | |
| "epoch": 3.324396782841823, | |
| "grad_norm": 0.43048450350761414, | |
| "learning_rate": 0.00028595491433724073, | |
| "loss": 0.3564, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 3.3378016085790883, | |
| "grad_norm": 0.45195090770721436, | |
| "learning_rate": 0.00028588728584310187, | |
| "loss": 0.3022, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 3.351206434316354, | |
| "grad_norm": 0.6295568346977234, | |
| "learning_rate": 0.000285819657348963, | |
| "loss": 0.3714, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 3.3646112600536195, | |
| "grad_norm": 0.5551475882530212, | |
| "learning_rate": 0.00028575202885482414, | |
| "loss": 0.444, | |
| "step": 6275 | |
| }, | |
| { | |
| "epoch": 3.3780160857908847, | |
| "grad_norm": 0.5752814412117004, | |
| "learning_rate": 0.0002856844003606853, | |
| "loss": 0.3906, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 3.39142091152815, | |
| "grad_norm": 0.44677379727363586, | |
| "learning_rate": 0.0002856167718665464, | |
| "loss": 0.2972, | |
| "step": 6325 | |
| }, | |
| { | |
| "epoch": 3.4048257372654156, | |
| "grad_norm": 0.592958390712738, | |
| "learning_rate": 0.00028554914337240755, | |
| "loss": 0.3616, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 3.418230563002681, | |
| "grad_norm": 0.29190266132354736, | |
| "learning_rate": 0.0002854815148782687, | |
| "loss": 0.2773, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 3.4316353887399464, | |
| "grad_norm": 0.318469762802124, | |
| "learning_rate": 0.0002854138863841298, | |
| "loss": 0.3451, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 3.4450402144772116, | |
| "grad_norm": 0.4345405399799347, | |
| "learning_rate": 0.00028534625788999095, | |
| "loss": 0.286, | |
| "step": 6425 | |
| }, | |
| { | |
| "epoch": 3.4584450402144773, | |
| "grad_norm": 0.6703570485115051, | |
| "learning_rate": 0.0002852786293958521, | |
| "loss": 0.3612, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 3.4718498659517425, | |
| "grad_norm": 0.7129529118537903, | |
| "learning_rate": 0.0002852110009017132, | |
| "loss": 0.3035, | |
| "step": 6475 | |
| }, | |
| { | |
| "epoch": 3.485254691689008, | |
| "grad_norm": 1.0257657766342163, | |
| "learning_rate": 0.00028514337240757436, | |
| "loss": 0.338, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 3.485254691689008, | |
| "eval_cer": 0.14121739524554128, | |
| "eval_loss": 0.29056990146636963, | |
| "eval_runtime": 191.9797, | |
| "eval_samples_per_second": 8.178, | |
| "eval_steps_per_second": 1.026, | |
| "eval_wer": 0.46463654223968565, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 3.485254691689008, | |
| "step": 6500, | |
| "total_flos": 3.3381487836753715e+19, | |
| "train_loss": 0.7533885692449717, | |
| "train_runtime": 52302.0768, | |
| "train_samples_per_second": 68.456, | |
| "train_steps_per_second": 2.139 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 111900, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 60, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 3 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.3381487836753715e+19, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |