Invalid JSON: Unexpected token 'N', ..."ad_norm": NaN,
"... is not valid JSON
| { | |
| "best_global_step": 3000, | |
| "best_metric": 0.22896352410316467, | |
| "best_model_checkpoint": "./Wav2vec2-Hausa/checkpoint-3000", | |
| "epoch": 14.947804473902236, | |
| "eval_steps": 500, | |
| "global_step": 4500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08285004142502071, | |
| "grad_norm": 9.556963920593262, | |
| "learning_rate": 7.499999999999999e-06, | |
| "loss": 18.2394, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.16570008285004142, | |
| "grad_norm": 18.581457138061523, | |
| "learning_rate": 1.47e-05, | |
| "loss": 16.9665, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.24855012427506215, | |
| "grad_norm": 15.846641540527344, | |
| "learning_rate": 2.2199999999999998e-05, | |
| "loss": 10.9287, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.33140016570008285, | |
| "grad_norm": 16.06224822998047, | |
| "learning_rate": 2.97e-05, | |
| "loss": 6.6297, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4142502071251036, | |
| "grad_norm": 9.293783187866211, | |
| "learning_rate": 3.7199999999999996e-05, | |
| "loss": 5.2272, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.4971002485501243, | |
| "grad_norm": 8.752418518066406, | |
| "learning_rate": 4.4699999999999996e-05, | |
| "loss": 4.4165, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.579950289975145, | |
| "grad_norm": 4.067853927612305, | |
| "learning_rate": 5.2199999999999995e-05, | |
| "loss": 3.8388, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.6628003314001657, | |
| "grad_norm": NaN, | |
| "learning_rate": 5.94e-05, | |
| "loss": 3.4621, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.7456503728251864, | |
| "grad_norm": 0.7896580100059509, | |
| "learning_rate": 6.69e-05, | |
| "loss": 3.0669, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.8285004142502072, | |
| "grad_norm": 0.7531073689460754, | |
| "learning_rate": 7.439999999999999e-05, | |
| "loss": 2.8899, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.9113504556752279, | |
| "grad_norm": 0.3927903473377228, | |
| "learning_rate": 8.19e-05, | |
| "loss": 2.8231, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.9942004971002486, | |
| "grad_norm": 0.9705857038497925, | |
| "learning_rate": 8.939999999999999e-05, | |
| "loss": 2.7892, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.0795360397680198, | |
| "grad_norm": 0.5089633464813232, | |
| "learning_rate": 9.69e-05, | |
| "loss": 2.8119, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.1623860811930407, | |
| "grad_norm": 0.313754141330719, | |
| "learning_rate": 0.00010439999999999999, | |
| "loss": 2.7591, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.2452361226180613, | |
| "grad_norm": 0.7091944217681885, | |
| "learning_rate": 0.0001119, | |
| "loss": 2.7743, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.328086164043082, | |
| "grad_norm": 0.2798990309238434, | |
| "learning_rate": 0.0001194, | |
| "loss": 2.6965, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.4109362054681027, | |
| "grad_norm": 0.22546878457069397, | |
| "learning_rate": 0.0001269, | |
| "loss": 2.6989, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.4937862468931233, | |
| "grad_norm": 0.3145970404148102, | |
| "learning_rate": 0.0001344, | |
| "loss": 2.6602, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.5766362883181442, | |
| "grad_norm": 0.46619847416877747, | |
| "learning_rate": 0.00014189999999999998, | |
| "loss": 2.6611, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.659486329743165, | |
| "grad_norm": 1.2694575786590576, | |
| "learning_rate": 0.0001494, | |
| "loss": 2.2402, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.659486329743165, | |
| "eval_cer": 0.6183230906678939, | |
| "eval_loss": 1.7612072229385376, | |
| "eval_runtime": 180.0066, | |
| "eval_samples_per_second": 6.722, | |
| "eval_steps_per_second": 0.844, | |
| "eval_wer": 0.9446567586694975, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.7423363711681856, | |
| "grad_norm": 0.9558189511299133, | |
| "learning_rate": 0.0001569, | |
| "loss": 1.5439, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.8251864125932062, | |
| "grad_norm": 1.1704384088516235, | |
| "learning_rate": 0.0001644, | |
| "loss": 1.0171, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.908036454018227, | |
| "grad_norm": 0.6667094826698303, | |
| "learning_rate": 0.00017189999999999998, | |
| "loss": 0.9064, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.9908864954432477, | |
| "grad_norm": 1.4252177476882935, | |
| "learning_rate": 0.00017939999999999997, | |
| "loss": 0.7337, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.0762220381110192, | |
| "grad_norm": 0.727504312992096, | |
| "learning_rate": 0.00018659999999999998, | |
| "loss": 0.7596, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 2.1590720795360396, | |
| "grad_norm": 1.1159340143203735, | |
| "learning_rate": 0.0001941, | |
| "loss": 0.5878, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.2419221209610605, | |
| "grad_norm": 0.969331681728363, | |
| "learning_rate": 0.0002016, | |
| "loss": 0.7385, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 2.3247721623860813, | |
| "grad_norm": 1.1958396434783936, | |
| "learning_rate": 0.00020909999999999996, | |
| "loss": 0.5456, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.407622203811102, | |
| "grad_norm": 0.7370328307151794, | |
| "learning_rate": 0.00021659999999999998, | |
| "loss": 0.6401, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 2.4904722452361225, | |
| "grad_norm": 0.9496448040008545, | |
| "learning_rate": 0.00022409999999999997, | |
| "loss": 0.5123, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.5733222866611434, | |
| "grad_norm": 0.8332341909408569, | |
| "learning_rate": 0.0002316, | |
| "loss": 0.6013, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 2.656172328086164, | |
| "grad_norm": 1.3926903009414673, | |
| "learning_rate": 0.00023909999999999998, | |
| "loss": 0.4888, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.7390223695111846, | |
| "grad_norm": 0.8815742135047913, | |
| "learning_rate": 0.0002466, | |
| "loss": 0.5939, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 2.8218724109362054, | |
| "grad_norm": 1.0045580863952637, | |
| "learning_rate": 0.0002541, | |
| "loss": 0.4609, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.9047224523612263, | |
| "grad_norm": 0.6735367178916931, | |
| "learning_rate": 0.00026129999999999995, | |
| "loss": 0.5541, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 2.9875724937862467, | |
| "grad_norm": 0.8095821738243103, | |
| "learning_rate": 0.0002688, | |
| "loss": 0.4487, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.0729080364540184, | |
| "grad_norm": 1.2476941347122192, | |
| "learning_rate": 0.0002763, | |
| "loss": 0.5603, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 3.155758077879039, | |
| "grad_norm": 1.500581979751587, | |
| "learning_rate": 0.00028379999999999996, | |
| "loss": 0.3883, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 3.2386081193040597, | |
| "grad_norm": 0.7201348543167114, | |
| "learning_rate": 0.0002913, | |
| "loss": 0.5197, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 3.3214581607290805, | |
| "grad_norm": 1.012593388557434, | |
| "learning_rate": 0.0002988, | |
| "loss": 0.3626, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.3214581607290805, | |
| "eval_cer": 0.12113869561686114, | |
| "eval_loss": 0.36199814081192017, | |
| "eval_runtime": 178.0685, | |
| "eval_samples_per_second": 6.795, | |
| "eval_steps_per_second": 0.854, | |
| "eval_wer": 0.44961075725406935, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.404308202154101, | |
| "grad_norm": 0.9544251561164856, | |
| "learning_rate": 0.00029964830011723327, | |
| "loss": 0.515, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 3.4871582435791217, | |
| "grad_norm": 0.855119526386261, | |
| "learning_rate": 0.0002992086752637749, | |
| "loss": 0.3888, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 3.5700082850041426, | |
| "grad_norm": 0.9061466455459595, | |
| "learning_rate": 0.0002987690504103165, | |
| "loss": 0.4937, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 3.652858326429163, | |
| "grad_norm": 0.8522987365722656, | |
| "learning_rate": 0.00029832942555685815, | |
| "loss": 0.3774, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 3.735708367854184, | |
| "grad_norm": 0.7319638729095459, | |
| "learning_rate": 0.00029788980070339976, | |
| "loss": 0.4526, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 3.8185584092792046, | |
| "grad_norm": 0.9154307842254639, | |
| "learning_rate": 0.00029745017584994137, | |
| "loss": 0.3795, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 3.9014084507042255, | |
| "grad_norm": 0.8338823318481445, | |
| "learning_rate": 0.000297010550996483, | |
| "loss": 0.4426, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 3.9842584921292463, | |
| "grad_norm": 0.9627026915550232, | |
| "learning_rate": 0.0002965709261430246, | |
| "loss": 0.3554, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 4.069594034797017, | |
| "grad_norm": 0.5187656879425049, | |
| "learning_rate": 0.0002961313012895662, | |
| "loss": 0.4345, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 4.1524440762220385, | |
| "grad_norm": 0.9797393679618835, | |
| "learning_rate": 0.0002956916764361078, | |
| "loss": 0.3001, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 4.235294117647059, | |
| "grad_norm": 0.9796245098114014, | |
| "learning_rate": 0.00029525205158264947, | |
| "loss": 0.4158, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 4.318144159072079, | |
| "grad_norm": 1.0057493448257446, | |
| "learning_rate": 0.0002948124267291911, | |
| "loss": 0.2856, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 4.4009942004971006, | |
| "grad_norm": 0.6741095781326294, | |
| "learning_rate": 0.0002943728018757327, | |
| "loss": 0.4099, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 4.483844241922121, | |
| "grad_norm": 0.7850795984268188, | |
| "learning_rate": 0.0002939331770222743, | |
| "loss": 0.3011, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 4.566694283347141, | |
| "grad_norm": 0.5698910355567932, | |
| "learning_rate": 0.0002934935521688159, | |
| "loss": 0.4201, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 4.649544324772163, | |
| "grad_norm": 0.5989360809326172, | |
| "learning_rate": 0.0002930539273153575, | |
| "loss": 0.2986, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 4.732394366197183, | |
| "grad_norm": 0.6864707469940186, | |
| "learning_rate": 0.0002926318874560375, | |
| "loss": 0.4006, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 4.815244407622204, | |
| "grad_norm": 0.8572924137115479, | |
| "learning_rate": 0.0002921922626025791, | |
| "loss": 0.2953, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 4.898094449047225, | |
| "grad_norm": 0.4575251638889313, | |
| "learning_rate": 0.0002917526377491207, | |
| "loss": 0.4145, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 4.980944490472245, | |
| "grad_norm": 0.7143483757972717, | |
| "learning_rate": 0.00029131301289566237, | |
| "loss": 0.2916, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.980944490472245, | |
| "eval_cer": 0.0923193545767117, | |
| "eval_loss": 0.2641300559043884, | |
| "eval_runtime": 179.3147, | |
| "eval_samples_per_second": 6.748, | |
| "eval_steps_per_second": 0.848, | |
| "eval_wer": 0.36036801132342533, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 5.066280033140017, | |
| "grad_norm": 1.980385184288025, | |
| "learning_rate": 0.000290873388042204, | |
| "loss": 0.3535, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 5.149130074565037, | |
| "grad_norm": 0.8608137965202332, | |
| "learning_rate": 0.0002904337631887456, | |
| "loss": 0.2469, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 5.231980115990058, | |
| "grad_norm": 0.5075603723526001, | |
| "learning_rate": 0.0002899941383352872, | |
| "loss": 0.3486, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 5.314830157415079, | |
| "grad_norm": 0.7245315909385681, | |
| "learning_rate": 0.0002895545134818288, | |
| "loss": 0.2515, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 5.397680198840099, | |
| "grad_norm": 0.4333842694759369, | |
| "learning_rate": 0.0002891148886283704, | |
| "loss": 0.3337, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 5.48053024026512, | |
| "grad_norm": 0.7363935112953186, | |
| "learning_rate": 0.000288675263774912, | |
| "loss": 0.2576, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 5.563380281690141, | |
| "grad_norm": 0.5363740921020508, | |
| "learning_rate": 0.000288253223915592, | |
| "loss": 0.3484, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 5.646230323115161, | |
| "grad_norm": 0.5194985866546631, | |
| "learning_rate": 0.0002878135990621336, | |
| "loss": 0.2545, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 5.729080364540183, | |
| "grad_norm": 0.5578182935714722, | |
| "learning_rate": 0.00028737397420867527, | |
| "loss": 0.3502, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 5.811930405965203, | |
| "grad_norm": 0.6931495070457458, | |
| "learning_rate": 0.0002869343493552169, | |
| "loss": 0.2312, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 5.894780447390223, | |
| "grad_norm": 0.59634929895401, | |
| "learning_rate": 0.0002864947245017585, | |
| "loss": 0.3525, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 5.977630488815245, | |
| "grad_norm": 0.544572651386261, | |
| "learning_rate": 0.0002860550996483001, | |
| "loss": 0.241, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 6.062966031483016, | |
| "grad_norm": 0.5554734468460083, | |
| "learning_rate": 0.0002856154747948417, | |
| "loss": 0.3355, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 6.145816072908037, | |
| "grad_norm": 0.9242589473724365, | |
| "learning_rate": 0.0002851758499413833, | |
| "loss": 0.2105, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 6.228666114333057, | |
| "grad_norm": 0.45407700538635254, | |
| "learning_rate": 0.0002847362250879249, | |
| "loss": 0.3085, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 6.311516155758078, | |
| "grad_norm": 1.0744433403015137, | |
| "learning_rate": 0.0002842966002344666, | |
| "loss": 0.2101, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 6.394366197183099, | |
| "grad_norm": 0.4946906864643097, | |
| "learning_rate": 0.0002838569753810082, | |
| "loss": 0.3161, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 6.477216238608119, | |
| "grad_norm": 0.6537393927574158, | |
| "learning_rate": 0.0002834173505275498, | |
| "loss": 0.2341, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 6.56006628003314, | |
| "grad_norm": 0.3927314281463623, | |
| "learning_rate": 0.0002829777256740914, | |
| "loss": 0.3191, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 6.642916321458161, | |
| "grad_norm": 1.1492557525634766, | |
| "learning_rate": 0.000282538100820633, | |
| "loss": 0.2105, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 6.642916321458161, | |
| "eval_cer": 0.0847930259631264, | |
| "eval_loss": 0.2543812096118927, | |
| "eval_runtime": 179.4122, | |
| "eval_samples_per_second": 6.744, | |
| "eval_steps_per_second": 0.847, | |
| "eval_wer": 0.3295116772823779, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 6.725766362883181, | |
| "grad_norm": 0.4488711953163147, | |
| "learning_rate": 0.000282116060961313, | |
| "loss": 0.307, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 6.808616404308202, | |
| "grad_norm": 0.7720121145248413, | |
| "learning_rate": 0.0002816764361078546, | |
| "loss": 0.2181, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 6.891466445733223, | |
| "grad_norm": 0.47668084502220154, | |
| "learning_rate": 0.0002812368112543962, | |
| "loss": 0.3084, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 6.9743164871582435, | |
| "grad_norm": 0.8669754266738892, | |
| "learning_rate": 0.00028079718640093787, | |
| "loss": 0.2189, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 7.059652029826015, | |
| "grad_norm": 0.40827029943466187, | |
| "learning_rate": 0.0002803575615474795, | |
| "loss": 0.2932, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 7.142502071251036, | |
| "grad_norm": 0.5760928988456726, | |
| "learning_rate": 0.0002799179366940211, | |
| "loss": 0.2059, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 7.225352112676056, | |
| "grad_norm": 0.46470338106155396, | |
| "learning_rate": 0.0002794783118405627, | |
| "loss": 0.2942, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 7.308202154101077, | |
| "grad_norm": 0.6504044532775879, | |
| "learning_rate": 0.0002790386869871043, | |
| "loss": 0.2016, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 7.391052195526098, | |
| "grad_norm": 0.41604796051979065, | |
| "learning_rate": 0.0002785990621336459, | |
| "loss": 0.2735, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 7.473902236951118, | |
| "grad_norm": 0.9678609371185303, | |
| "learning_rate": 0.00027815943728018753, | |
| "loss": 0.1919, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 7.556752278376139, | |
| "grad_norm": 0.39137783646583557, | |
| "learning_rate": 0.0002777373974208675, | |
| "loss": 0.285, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 7.63960231980116, | |
| "grad_norm": 0.500848114490509, | |
| "learning_rate": 0.0002772977725674091, | |
| "loss": 0.1994, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 7.72245236122618, | |
| "grad_norm": 0.4283003807067871, | |
| "learning_rate": 0.00027685814771395077, | |
| "loss": 0.2917, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 7.8053024026512015, | |
| "grad_norm": 0.607266366481781, | |
| "learning_rate": 0.0002764185228604924, | |
| "loss": 0.1982, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 7.888152444076222, | |
| "grad_norm": 0.45504409074783325, | |
| "learning_rate": 0.000275978898007034, | |
| "loss": 0.2724, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 7.971002485501243, | |
| "grad_norm": 0.8149850368499756, | |
| "learning_rate": 0.0002755392731535756, | |
| "loss": 0.1888, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 8.056338028169014, | |
| "grad_norm": 1.0355818271636963, | |
| "learning_rate": 0.0002750996483001172, | |
| "loss": 0.2714, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 8.139188069594034, | |
| "grad_norm": 0.4683228135108948, | |
| "learning_rate": 0.0002746600234466588, | |
| "loss": 0.1818, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 8.222038111019055, | |
| "grad_norm": 0.4191352128982544, | |
| "learning_rate": 0.0002742203985932004, | |
| "loss": 0.2556, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 8.304888152444077, | |
| "grad_norm": 0.7036840915679932, | |
| "learning_rate": 0.0002737807737397421, | |
| "loss": 0.1804, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 8.304888152444077, | |
| "eval_cer": 0.07958144949508623, | |
| "eval_loss": 0.23702508211135864, | |
| "eval_runtime": 180.5015, | |
| "eval_samples_per_second": 6.704, | |
| "eval_steps_per_second": 0.842, | |
| "eval_wer": 0.3151450813871196, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 8.387738193869097, | |
| "grad_norm": 0.34809935092926025, | |
| "learning_rate": 0.0002733411488862837, | |
| "loss": 0.262, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 8.470588235294118, | |
| "grad_norm": 1.302828311920166, | |
| "learning_rate": 0.0002729015240328253, | |
| "loss": 0.1991, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 8.553438276719138, | |
| "grad_norm": 0.5225396156311035, | |
| "learning_rate": 0.0002724618991793669, | |
| "loss": 0.2477, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 8.636288318144159, | |
| "grad_norm": 0.6288148760795593, | |
| "learning_rate": 0.0002720222743259085, | |
| "loss": 0.1829, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 8.719138359569179, | |
| "grad_norm": 0.370772123336792, | |
| "learning_rate": 0.00027158264947245013, | |
| "loss": 0.2546, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 8.801988400994201, | |
| "grad_norm": 0.5763248801231384, | |
| "learning_rate": 0.00027114302461899174, | |
| "loss": 0.1728, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 8.884838442419221, | |
| "grad_norm": 0.4807268977165222, | |
| "learning_rate": 0.0002707033997655334, | |
| "loss": 0.2604, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 8.967688483844242, | |
| "grad_norm": 1.5762399435043335, | |
| "learning_rate": 0.000270263774912075, | |
| "loss": 0.1808, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 9.053024026512013, | |
| "grad_norm": 0.36433079838752747, | |
| "learning_rate": 0.000269841735052755, | |
| "loss": 0.2444, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 9.135874067937033, | |
| "grad_norm": 1.2679426670074463, | |
| "learning_rate": 0.0002694021101992966, | |
| "loss": 0.1699, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 9.218724109362055, | |
| "grad_norm": 0.38808515667915344, | |
| "learning_rate": 0.0002689624853458382, | |
| "loss": 0.2331, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 9.301574150787076, | |
| "grad_norm": 1.0244839191436768, | |
| "learning_rate": 0.0002685228604923798, | |
| "loss": 0.1615, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 9.384424192212096, | |
| "grad_norm": 0.4677698314189911, | |
| "learning_rate": 0.0002681008206330598, | |
| "loss": 0.2436, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 9.467274233637117, | |
| "grad_norm": 0.9670608639717102, | |
| "learning_rate": 0.0002676611957796014, | |
| "loss": 0.1672, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 9.550124275062137, | |
| "grad_norm": 0.6787462830543518, | |
| "learning_rate": 0.000267221570926143, | |
| "loss": 0.2447, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 9.632974316487159, | |
| "grad_norm": 0.5140098333358765, | |
| "learning_rate": 0.0002667819460726846, | |
| "loss": 0.1588, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 9.71582435791218, | |
| "grad_norm": 0.678119957447052, | |
| "learning_rate": 0.00026634232121922627, | |
| "loss": 0.2347, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 9.7986743993372, | |
| "grad_norm": 0.668165385723114, | |
| "learning_rate": 0.0002659026963657679, | |
| "loss": 0.1838, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 9.88152444076222, | |
| "grad_norm": 0.4628326892852783, | |
| "learning_rate": 0.0002654630715123095, | |
| "loss": 0.2462, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 9.96437448218724, | |
| "grad_norm": 0.3794308602809906, | |
| "learning_rate": 0.0002650234466588511, | |
| "loss": 0.182, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 9.96437448218724, | |
| "eval_cer": 0.07455938489861115, | |
| "eval_loss": 0.22896352410316467, | |
| "eval_runtime": 180.0793, | |
| "eval_samples_per_second": 6.719, | |
| "eval_steps_per_second": 0.844, | |
| "eval_wer": 0.2959660297239915, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 10.049710024855013, | |
| "grad_norm": 0.7407336831092834, | |
| "learning_rate": 0.0002645838218053927, | |
| "loss": 0.2413, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 10.132560066280034, | |
| "grad_norm": 0.71273273229599, | |
| "learning_rate": 0.0002641441969519343, | |
| "loss": 0.1593, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 10.215410107705054, | |
| "grad_norm": 0.4058437943458557, | |
| "learning_rate": 0.00026370457209847593, | |
| "loss": 0.2376, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 10.298260149130074, | |
| "grad_norm": 1.03704833984375, | |
| "learning_rate": 0.0002632649472450176, | |
| "loss": 0.1545, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 10.381110190555095, | |
| "grad_norm": 0.3222190737724304, | |
| "learning_rate": 0.0002628253223915592, | |
| "loss": 0.2186, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 10.463960231980115, | |
| "grad_norm": 0.4006061851978302, | |
| "learning_rate": 0.0002623856975381008, | |
| "loss": 0.1506, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 10.546810273405137, | |
| "grad_norm": 1.0020666122436523, | |
| "learning_rate": 0.0002619460726846424, | |
| "loss": 0.2305, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 10.629660314830158, | |
| "grad_norm": 0.6109996438026428, | |
| "learning_rate": 0.00026150644783118403, | |
| "loss": 0.163, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 10.712510356255178, | |
| "grad_norm": 0.5075812935829163, | |
| "learning_rate": 0.00026106682297772564, | |
| "loss": 0.2304, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 10.795360397680199, | |
| "grad_norm": 0.7409548759460449, | |
| "learning_rate": 0.00026062719812426725, | |
| "loss": 0.182, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 10.878210439105219, | |
| "grad_norm": 0.6909148097038269, | |
| "learning_rate": 0.0002601875732708089, | |
| "loss": 0.2306, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 10.96106048053024, | |
| "grad_norm": 0.5709498524665833, | |
| "learning_rate": 0.0002597479484173505, | |
| "loss": 0.1667, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 11.046396023198012, | |
| "grad_norm": 0.5173778533935547, | |
| "learning_rate": 0.0002593259085580305, | |
| "loss": 0.2397, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 11.129246064623032, | |
| "grad_norm": 0.628476083278656, | |
| "learning_rate": 0.0002588862837045721, | |
| "loss": 0.1709, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 11.212096106048053, | |
| "grad_norm": 0.782648503780365, | |
| "learning_rate": 0.0002584466588511137, | |
| "loss": 0.2319, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 11.294946147473073, | |
| "grad_norm": 0.6060341000556946, | |
| "learning_rate": 0.0002580070339976553, | |
| "loss": 0.1792, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 11.377796188898094, | |
| "grad_norm": 0.9537221789360046, | |
| "learning_rate": 0.0002575674091441969, | |
| "loss": 0.2451, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 11.460646230323116, | |
| "grad_norm": 0.6490224599838257, | |
| "learning_rate": 0.00025712778429073853, | |
| "loss": 0.1818, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 11.543496271748136, | |
| "grad_norm": 0.9516769647598267, | |
| "learning_rate": 0.0002567057444314185, | |
| "loss": 0.2567, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 11.626346313173157, | |
| "grad_norm": 1.2261236906051636, | |
| "learning_rate": 0.0002562661195779601, | |
| "loss": 0.2024, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 11.626346313173157, | |
| "eval_cer": 0.07910766981617348, | |
| "eval_loss": 0.25268253684043884, | |
| "eval_runtime": 179.0467, | |
| "eval_samples_per_second": 6.758, | |
| "eval_steps_per_second": 0.849, | |
| "eval_wer": 0.3087048832271762, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 11.709196354598177, | |
| "grad_norm": 1.0352481603622437, | |
| "learning_rate": 0.0002558264947245018, | |
| "loss": 0.2509, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 11.792046396023197, | |
| "grad_norm": 1.3052682876586914, | |
| "learning_rate": 0.0002553868698710434, | |
| "loss": 0.1896, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 11.87489643744822, | |
| "grad_norm": 0.7363812923431396, | |
| "learning_rate": 0.000254947245017585, | |
| "loss": 0.2877, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 11.95774647887324, | |
| "grad_norm": 1.4226346015930176, | |
| "learning_rate": 0.0002545076201641266, | |
| "loss": 0.2163, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 12.04308202154101, | |
| "grad_norm": 1.1405287981033325, | |
| "learning_rate": 0.0002540679953106682, | |
| "loss": 0.2954, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 12.125932062966031, | |
| "grad_norm": 3.9781391620635986, | |
| "learning_rate": 0.0002536283704572098, | |
| "loss": 0.2087, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 12.208782104391052, | |
| "grad_norm": 0.7986987829208374, | |
| "learning_rate": 0.00025318874560375143, | |
| "loss": 0.2763, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 12.291632145816074, | |
| "grad_norm": 2.4786369800567627, | |
| "learning_rate": 0.0002527491207502931, | |
| "loss": 0.2251, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 12.374482187241094, | |
| "grad_norm": 0.7642366290092468, | |
| "learning_rate": 0.0002523094958968347, | |
| "loss": 0.2828, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 12.457332228666115, | |
| "grad_norm": 2.443129777908325, | |
| "learning_rate": 0.0002518698710433763, | |
| "loss": 0.2458, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 12.540182270091135, | |
| "grad_norm": 1.845415711402893, | |
| "learning_rate": 0.0002514302461899179, | |
| "loss": 0.2796, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 12.623032311516155, | |
| "grad_norm": 1.1014904975891113, | |
| "learning_rate": 0.00025099062133645953, | |
| "loss": 0.2366, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 12.705882352941176, | |
| "grad_norm": 1.1710816621780396, | |
| "learning_rate": 0.0002505685814771395, | |
| "loss": 0.2931, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 12.788732394366198, | |
| "grad_norm": 0.7777267098426819, | |
| "learning_rate": 0.0002501289566236811, | |
| "loss": 0.2452, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 12.871582435791218, | |
| "grad_norm": 0.5831831693649292, | |
| "learning_rate": 0.0002496893317702227, | |
| "loss": 0.2844, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 12.954432477216239, | |
| "grad_norm": 0.8225266337394714, | |
| "learning_rate": 0.00024924970691676433, | |
| "loss": 0.2112, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 13.03976801988401, | |
| "grad_norm": 0.8047693967819214, | |
| "learning_rate": 0.000248810082063306, | |
| "loss": 0.2547, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 13.12261806130903, | |
| "grad_norm": 0.9043530225753784, | |
| "learning_rate": 0.0002483704572098476, | |
| "loss": 0.2062, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 13.205468102734052, | |
| "grad_norm": 0.4219953715801239, | |
| "learning_rate": 0.0002479308323563892, | |
| "loss": 0.2632, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 13.288318144159073, | |
| "grad_norm": 1.659414529800415, | |
| "learning_rate": 0.0002474912075029308, | |
| "loss": 0.2358, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 13.288318144159073, | |
| "eval_cer": 0.0747353602079216, | |
| "eval_loss": 0.24355952441692352, | |
| "eval_runtime": 180.0102, | |
| "eval_samples_per_second": 6.722, | |
| "eval_steps_per_second": 0.844, | |
| "eval_wer": 0.2952583156404813, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 13.371168185584093, | |
| "grad_norm": 0.5549167990684509, | |
| "learning_rate": 0.00024705158264947243, | |
| "loss": 0.2611, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 13.454018227009113, | |
| "grad_norm": 0.9828294515609741, | |
| "learning_rate": 0.00024661195779601404, | |
| "loss": 0.2381, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 13.536868268434134, | |
| "grad_norm": 0.5421575903892517, | |
| "learning_rate": 0.00024617233294255565, | |
| "loss": 0.2858, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 13.619718309859154, | |
| "grad_norm": 0.5966264009475708, | |
| "learning_rate": 0.0002457327080890973, | |
| "loss": 0.261, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 13.702568351284176, | |
| "grad_norm": 0.5706139206886292, | |
| "learning_rate": 0.0002453106682297773, | |
| "loss": 0.2843, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 13.785418392709197, | |
| "grad_norm": 0.43442562222480774, | |
| "learning_rate": 0.0002448710433763189, | |
| "loss": 0.303, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 13.868268434134217, | |
| "grad_norm": 0.6309686899185181, | |
| "learning_rate": 0.0002444314185228605, | |
| "loss": 0.3506, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 13.951118475559237, | |
| "grad_norm": 0.6217506527900696, | |
| "learning_rate": 0.00024399179366940208, | |
| "loss": 0.3241, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 14.036454018227008, | |
| "grad_norm": 0.3866030275821686, | |
| "learning_rate": 0.00024355216881594372, | |
| "loss": 0.3698, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 14.11930405965203, | |
| "grad_norm": 0.30347779393196106, | |
| "learning_rate": 0.00024311254396248532, | |
| "loss": 0.3304, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 14.202154101077051, | |
| "grad_norm": 0.5292103290557861, | |
| "learning_rate": 0.00024267291910902693, | |
| "loss": 0.3129, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 14.285004142502071, | |
| "grad_norm": 0.7136854529380798, | |
| "learning_rate": 0.00024223329425556854, | |
| "loss": 0.2908, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 14.367854183927092, | |
| "grad_norm": 0.3742729127407074, | |
| "learning_rate": 0.00024179366940211018, | |
| "loss": 0.3209, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 14.450704225352112, | |
| "grad_norm": 0.33382654190063477, | |
| "learning_rate": 0.00024135404454865182, | |
| "loss": 0.2917, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 14.533554266777134, | |
| "grad_norm": 0.3485744893550873, | |
| "learning_rate": 0.00024091441969519343, | |
| "loss": 0.3577, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 14.616404308202155, | |
| "grad_norm": 0.6169712543487549, | |
| "learning_rate": 0.00024047479484173503, | |
| "loss": 0.3632, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 14.699254349627175, | |
| "grad_norm": 0.3555282652378082, | |
| "learning_rate": 0.00024003516998827664, | |
| "loss": 0.3632, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 14.782104391052195, | |
| "grad_norm": 0.3529140055179596, | |
| "learning_rate": 0.00023959554513481825, | |
| "loss": 0.3397, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 14.864954432477216, | |
| "grad_norm": 0.6318807005882263, | |
| "learning_rate": 0.00023915592028135986, | |
| "loss": 0.3358, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 14.947804473902236, | |
| "grad_norm": 0.6898398399353027, | |
| "learning_rate": 0.00023871629542790153, | |
| "loss": 0.3622, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 14.947804473902236, | |
| "eval_cer": 0.07164902401386145, | |
| "eval_loss": 0.2824631929397583, | |
| "eval_runtime": 184.989, | |
| "eval_samples_per_second": 6.541, | |
| "eval_steps_per_second": 0.822, | |
| "eval_wer": 0.28365180467091294, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 14.947804473902236, | |
| "step": 4500, | |
| "total_flos": 3.1185273254659265e+19, | |
| "train_loss": 0.8522552142143249, | |
| "train_runtime": 37475.2897, | |
| "train_samples_per_second": 15.455, | |
| "train_steps_per_second": 0.482 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 18060, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 60, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 3 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.1185273254659265e+19, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |