Invalid JSON: Unexpected token 'I', ..."ad_norm": Infinity,
"... is not valid JSON
| { | |
| "best_global_step": 32500, | |
| "best_metric": 0.09504964202642441, | |
| "best_model_checkpoint": "./whisper-quran-finetuned_small\\checkpoint-32500", | |
| "epoch": 3.0, | |
| "eval_steps": 2500, | |
| "global_step": 50595, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0029648077322185656, | |
| "grad_norm": 3.1639976501464844, | |
| "learning_rate": 9.800000000000001e-07, | |
| "loss": 0.1027, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.005929615464437131, | |
| "grad_norm": 3.5852792263031006, | |
| "learning_rate": 1.98e-06, | |
| "loss": 0.0837, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.008894423196655697, | |
| "grad_norm": 4.179174423217773, | |
| "learning_rate": 2.9800000000000003e-06, | |
| "loss": 0.071, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.011859230928874262, | |
| "grad_norm": 3.4920785427093506, | |
| "learning_rate": 3.980000000000001e-06, | |
| "loss": 0.0514, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.014824038661092828, | |
| "grad_norm": 3.6322460174560547, | |
| "learning_rate": 4.980000000000001e-06, | |
| "loss": 0.0498, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.017788846393311394, | |
| "grad_norm": 2.3850507736206055, | |
| "learning_rate": 5.98e-06, | |
| "loss": 0.0449, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.020753654125529958, | |
| "grad_norm": 3.212959051132202, | |
| "learning_rate": 6.98e-06, | |
| "loss": 0.0367, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.023718461857748525, | |
| "grad_norm": 3.586538553237915, | |
| "learning_rate": 7.980000000000002e-06, | |
| "loss": 0.0408, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.02668326958996709, | |
| "grad_norm": 4.874953746795654, | |
| "learning_rate": 8.98e-06, | |
| "loss": 0.0305, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.029648077322185655, | |
| "grad_norm": 4.741893291473389, | |
| "learning_rate": 9.980000000000001e-06, | |
| "loss": 0.0334, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.03261288505440422, | |
| "grad_norm": 3.2416727542877197, | |
| "learning_rate": 9.999976392890719e-06, | |
| "loss": 0.0335, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.03557769278662279, | |
| "grad_norm": 3.642324686050415, | |
| "learning_rate": 9.99990363485342e-06, | |
| "loss": 0.0263, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.03854250051884135, | |
| "grad_norm": 4.084117889404297, | |
| "learning_rate": 9.999781716771276e-06, | |
| "loss": 0.0277, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.041507308251059916, | |
| "grad_norm": 5.314033031463623, | |
| "learning_rate": 9.999610639843005e-06, | |
| "loss": 0.0698, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.044472115983278486, | |
| "grad_norm": 6.3697381019592285, | |
| "learning_rate": 9.999390405750668e-06, | |
| "loss": 0.0742, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.04743692371549705, | |
| "grad_norm": 3.1028659343719482, | |
| "learning_rate": 9.999121016659655e-06, | |
| "loss": 0.0678, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.05040173144771561, | |
| "grad_norm": 3.242400884628296, | |
| "learning_rate": 9.99880247521865e-06, | |
| "loss": 0.0723, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.05336653917993418, | |
| "grad_norm": 5.612239360809326, | |
| "learning_rate": 9.998434784559616e-06, | |
| "loss": 0.0715, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.05633134691215275, | |
| "grad_norm": 2.9017271995544434, | |
| "learning_rate": 9.99801794829776e-06, | |
| "loss": 0.0589, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.05929615464437131, | |
| "grad_norm": 5.012439727783203, | |
| "learning_rate": 9.997551970531501e-06, | |
| "loss": 0.0705, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.06226096237658988, | |
| "grad_norm": 2.756843328475952, | |
| "learning_rate": 9.997036855842423e-06, | |
| "loss": 0.06, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.06522577010880844, | |
| "grad_norm": 5.2626566886901855, | |
| "learning_rate": 9.996472609295236e-06, | |
| "loss": 0.0528, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.06819057784102701, | |
| "grad_norm": 3.3507096767425537, | |
| "learning_rate": 9.995859236437724e-06, | |
| "loss": 0.0564, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.07115538557324558, | |
| "grad_norm": 3.6945300102233887, | |
| "learning_rate": 9.995196743300693e-06, | |
| "loss": 0.0514, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.07412019330546414, | |
| "grad_norm": 1.9075812101364136, | |
| "learning_rate": 9.994485136397903e-06, | |
| "loss": 0.0526, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.0770850010376827, | |
| "grad_norm": 4.296079635620117, | |
| "learning_rate": 9.993724422726017e-06, | |
| "loss": 0.0519, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.08004980876990127, | |
| "grad_norm": 4.138225078582764, | |
| "learning_rate": 9.992914609764521e-06, | |
| "loss": 0.0529, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.08301461650211983, | |
| "grad_norm": 2.9909701347351074, | |
| "learning_rate": 9.99205570547566e-06, | |
| "loss": 0.0406, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.08597942423433841, | |
| "grad_norm": 5.061216354370117, | |
| "learning_rate": 9.991147718304348e-06, | |
| "loss": 0.0468, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.08894423196655697, | |
| "grad_norm": 3.402036190032959, | |
| "learning_rate": 9.990190657178099e-06, | |
| "loss": 0.0504, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.09190903969877554, | |
| "grad_norm": 4.70094108581543, | |
| "learning_rate": 9.989184531506922e-06, | |
| "loss": 0.0451, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.0948738474309941, | |
| "grad_norm": 3.8658971786499023, | |
| "learning_rate": 9.98812935118325e-06, | |
| "loss": 0.0415, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.09783865516321266, | |
| "grad_norm": 4.501025199890137, | |
| "learning_rate": 9.98702512658182e-06, | |
| "loss": 0.0434, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.10080346289543123, | |
| "grad_norm": 1.1796941757202148, | |
| "learning_rate": 9.985871868559593e-06, | |
| "loss": 0.0476, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.1037682706276498, | |
| "grad_norm": 3.604438066482544, | |
| "learning_rate": 9.984669588455626e-06, | |
| "loss": 0.042, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.10673307835986837, | |
| "grad_norm": 3.441258668899536, | |
| "learning_rate": 9.98341829809098e-06, | |
| "loss": 0.0389, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.10969788609208693, | |
| "grad_norm": 2.7927887439727783, | |
| "learning_rate": 9.982118009768587e-06, | |
| "loss": 0.0417, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.1126626938243055, | |
| "grad_norm": 3.311816453933716, | |
| "learning_rate": 9.98076873627314e-06, | |
| "loss": 0.041, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.11562750155652406, | |
| "grad_norm": 3.63645076751709, | |
| "learning_rate": 9.979370490870968e-06, | |
| "loss": 0.0459, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.11859230928874262, | |
| "grad_norm": 4.9164204597473145, | |
| "learning_rate": 9.97792328730989e-06, | |
| "loss": 0.0397, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.1215571170209612, | |
| "grad_norm": 4.712317943572998, | |
| "learning_rate": 9.976427139819106e-06, | |
| "loss": 0.0356, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.12452192475317976, | |
| "grad_norm": 1.0168401002883911, | |
| "learning_rate": 9.974882063109027e-06, | |
| "loss": 0.0361, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.12748673248539832, | |
| "grad_norm": 2.246588945388794, | |
| "learning_rate": 9.973288072371163e-06, | |
| "loss": 0.0398, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.13045154021761687, | |
| "grad_norm": 3.196394920349121, | |
| "learning_rate": 9.97164518327794e-06, | |
| "loss": 0.0419, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.13341634794983545, | |
| "grad_norm": 2.1984238624572754, | |
| "learning_rate": 9.969953411982574e-06, | |
| "loss": 0.0395, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.13638115568205403, | |
| "grad_norm": 1.9637821912765503, | |
| "learning_rate": 9.968212775118895e-06, | |
| "loss": 0.0396, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.13934596341427258, | |
| "grad_norm": 2.859842300415039, | |
| "learning_rate": 9.966423289801187e-06, | |
| "loss": 0.0412, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.14231077114649116, | |
| "grad_norm": 3.271143674850464, | |
| "learning_rate": 9.96458497362403e-06, | |
| "loss": 0.0393, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.1452755788787097, | |
| "grad_norm": 2.784824848175049, | |
| "learning_rate": 9.96269784466211e-06, | |
| "loss": 0.036, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.14824038661092828, | |
| "grad_norm": 2.1318204402923584, | |
| "learning_rate": 9.960761921470053e-06, | |
| "loss": 0.0361, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.14824038661092828, | |
| "eval_loss": 0.1340717226266861, | |
| "eval_runtime": 1798.3265, | |
| "eval_samples_per_second": 3.256, | |
| "eval_steps_per_second": 0.814, | |
| "eval_wer": 13.782618229661574, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.15120519434314686, | |
| "grad_norm": 3.058884620666504, | |
| "learning_rate": 9.958777223082243e-06, | |
| "loss": 0.0377, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.1541700020753654, | |
| "grad_norm": 1.4118783473968506, | |
| "learning_rate": 9.956743769012627e-06, | |
| "loss": 0.0348, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.15713480980758399, | |
| "grad_norm": 1.7757619619369507, | |
| "learning_rate": 9.954661579254528e-06, | |
| "loss": 0.0348, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.16009961753980254, | |
| "grad_norm": 3.934190511703491, | |
| "learning_rate": 9.95253067428045e-06, | |
| "loss": 0.0335, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.1630644252720211, | |
| "grad_norm": 2.1128933429718018, | |
| "learning_rate": 9.950351075041872e-06, | |
| "loss": 0.0338, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.16602923300423966, | |
| "grad_norm": 1.1197174787521362, | |
| "learning_rate": 9.94812280296905e-06, | |
| "loss": 0.0288, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.16899404073645824, | |
| "grad_norm": 3.3981051445007324, | |
| "learning_rate": 9.945845879970794e-06, | |
| "loss": 0.0348, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.17195884846867682, | |
| "grad_norm": 1.5275200605392456, | |
| "learning_rate": 9.943520328434266e-06, | |
| "loss": 0.0307, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.17492365620089537, | |
| "grad_norm": 1.5511682033538818, | |
| "learning_rate": 9.941146171224745e-06, | |
| "loss": 0.0297, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.17788846393311394, | |
| "grad_norm": 1.4682283401489258, | |
| "learning_rate": 9.93872343168542e-06, | |
| "loss": 0.0385, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.1808532716653325, | |
| "grad_norm": 4.4206085205078125, | |
| "learning_rate": 9.936252133637146e-06, | |
| "loss": 0.0283, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.18381807939755107, | |
| "grad_norm": 3.574676275253296, | |
| "learning_rate": 9.933732301378216e-06, | |
| "loss": 0.0361, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.18678288712976965, | |
| "grad_norm": 3.5772438049316406, | |
| "learning_rate": 9.93116395968412e-06, | |
| "loss": 0.0334, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.1897476948619882, | |
| "grad_norm": 1.9996916055679321, | |
| "learning_rate": 9.928547133807304e-06, | |
| "loss": 0.0313, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.19271250259420677, | |
| "grad_norm": 2.3533923625946045, | |
| "learning_rate": 9.925881849476921e-06, | |
| "loss": 0.0329, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.19567731032642532, | |
| "grad_norm": 2.9052157402038574, | |
| "learning_rate": 9.923168132898573e-06, | |
| "loss": 0.0315, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.1986421180586439, | |
| "grad_norm": 6.513851165771484, | |
| "learning_rate": 9.92040601075406e-06, | |
| "loss": 0.0278, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.20160692579086245, | |
| "grad_norm": 8.648693084716797, | |
| "learning_rate": 9.917595510201115e-06, | |
| "loss": 0.0311, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.20457173352308103, | |
| "grad_norm": 4.101097106933594, | |
| "learning_rate": 9.914736658873138e-06, | |
| "loss": 0.0295, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.2075365412552996, | |
| "grad_norm": 1.969928503036499, | |
| "learning_rate": 9.911829484878925e-06, | |
| "loss": 0.0252, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.21050134898751816, | |
| "grad_norm": 1.3020251989364624, | |
| "learning_rate": 9.908874016802384e-06, | |
| "loss": 0.0282, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.21346615671973673, | |
| "grad_norm": 4.441299915313721, | |
| "learning_rate": 9.905870283702265e-06, | |
| "loss": 0.0341, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.21643096445195528, | |
| "grad_norm": 2.398268222808838, | |
| "learning_rate": 9.90281831511187e-06, | |
| "loss": 0.0268, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.21939577218417386, | |
| "grad_norm": 1.5908421277999878, | |
| "learning_rate": 9.899718141038755e-06, | |
| "loss": 0.0236, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.2223605799163924, | |
| "grad_norm": 1.215734601020813, | |
| "learning_rate": 9.896569791964452e-06, | |
| "loss": 0.0288, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.225325387648611, | |
| "grad_norm": 2.0204992294311523, | |
| "learning_rate": 9.893373298844149e-06, | |
| "loss": 0.0181, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.22829019538082956, | |
| "grad_norm": 1.6711108684539795, | |
| "learning_rate": 9.890128693106407e-06, | |
| "loss": 0.0247, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.2312550031130481, | |
| "grad_norm": 0.4981835186481476, | |
| "learning_rate": 9.886836006652827e-06, | |
| "loss": 0.0265, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.2342198108452667, | |
| "grad_norm": 3.5468335151672363, | |
| "learning_rate": 9.883495271857761e-06, | |
| "loss": 0.0259, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.23718461857748524, | |
| "grad_norm": 1.3472596406936646, | |
| "learning_rate": 9.880106521567974e-06, | |
| "loss": 0.0181, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.24014942630970382, | |
| "grad_norm": 0.5029717683792114, | |
| "learning_rate": 9.87666978910233e-06, | |
| "loss": 0.0247, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.2431142340419224, | |
| "grad_norm": 1.5131045579910278, | |
| "learning_rate": 9.873185108251466e-06, | |
| "loss": 0.0201, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.24607904177414094, | |
| "grad_norm": 3.9095380306243896, | |
| "learning_rate": 9.86965251327746e-06, | |
| "loss": 0.0222, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.24904384950635952, | |
| "grad_norm": 1.2582632303237915, | |
| "learning_rate": 9.866072038913478e-06, | |
| "loss": 0.0202, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.25200865723857807, | |
| "grad_norm": 2.0971181392669678, | |
| "learning_rate": 9.862443720363461e-06, | |
| "loss": 0.0198, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.25497346497079665, | |
| "grad_norm": 0.3940514326095581, | |
| "learning_rate": 9.858767593301757e-06, | |
| "loss": 0.0222, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.2579382727030152, | |
| "grad_norm": 1.9935857057571411, | |
| "learning_rate": 9.855043693872774e-06, | |
| "loss": 0.0249, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.26090308043523375, | |
| "grad_norm": 3.2804274559020996, | |
| "learning_rate": 9.851272058690633e-06, | |
| "loss": 0.0253, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.2638678881674523, | |
| "grad_norm": 1.2694517374038696, | |
| "learning_rate": 9.847452724838797e-06, | |
| "loss": 0.0235, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.2668326958996709, | |
| "grad_norm": 2.894775152206421, | |
| "learning_rate": 9.843585729869716e-06, | |
| "loss": 0.0242, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.2697975036318895, | |
| "grad_norm": 3.3155581951141357, | |
| "learning_rate": 9.839671111804452e-06, | |
| "loss": 0.0206, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.27276231136410806, | |
| "grad_norm": 2.7083659172058105, | |
| "learning_rate": 9.835708909132307e-06, | |
| "loss": 0.0247, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.2757271190963266, | |
| "grad_norm": 3.1053194999694824, | |
| "learning_rate": 9.831699160810443e-06, | |
| "loss": 0.0241, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.27869192682854516, | |
| "grad_norm": 2.450427532196045, | |
| "learning_rate": 9.8276419062635e-06, | |
| "loss": 0.0274, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.28165673456076373, | |
| "grad_norm": 1.419350028038025, | |
| "learning_rate": 9.823537185383214e-06, | |
| "loss": 0.0212, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.2846215422929823, | |
| "grad_norm": 3.7026216983795166, | |
| "learning_rate": 9.819385038528014e-06, | |
| "loss": 0.0208, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.2875863500252009, | |
| "grad_norm": 0.2673606872558594, | |
| "learning_rate": 9.815185506522629e-06, | |
| "loss": 0.0244, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.2905511577574194, | |
| "grad_norm": 1.0690964460372925, | |
| "learning_rate": 9.810938630657693e-06, | |
| "loss": 0.0224, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.293515965489638, | |
| "grad_norm": 2.188352346420288, | |
| "learning_rate": 9.806644452689334e-06, | |
| "loss": 0.0197, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.29648077322185656, | |
| "grad_norm": 4.49456262588501, | |
| "learning_rate": 9.80230301483876e-06, | |
| "loss": 0.0255, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.29648077322185656, | |
| "eval_loss": 0.12216129153966904, | |
| "eval_runtime": 1783.7018, | |
| "eval_samples_per_second": 3.282, | |
| "eval_steps_per_second": 0.821, | |
| "eval_wer": 11.202397485563921, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.29944558095407514, | |
| "grad_norm": 2.8785881996154785, | |
| "learning_rate": 9.79791435979185e-06, | |
| "loss": 0.0255, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.3024103886862937, | |
| "grad_norm": 1.6858680248260498, | |
| "learning_rate": 9.793478530698732e-06, | |
| "loss": 0.0179, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.30537519641851224, | |
| "grad_norm": 2.8224334716796875, | |
| "learning_rate": 9.788995571173356e-06, | |
| "loss": 0.0222, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.3083400041507308, | |
| "grad_norm": 4.539381980895996, | |
| "learning_rate": 9.784465525293075e-06, | |
| "loss": 0.0194, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.3113048118829494, | |
| "grad_norm": 1.027093529701233, | |
| "learning_rate": 9.779888437598191e-06, | |
| "loss": 0.0193, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.31426961961516797, | |
| "grad_norm": 2.6384902000427246, | |
| "learning_rate": 9.775264353091545e-06, | |
| "loss": 0.0206, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.31723442734738655, | |
| "grad_norm": 1.0687259435653687, | |
| "learning_rate": 9.770593317238052e-06, | |
| "loss": 0.0204, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.32019923507960507, | |
| "grad_norm": 3.0316076278686523, | |
| "learning_rate": 9.765875375964263e-06, | |
| "loss": 0.0228, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.32316404281182365, | |
| "grad_norm": 2.7116587162017822, | |
| "learning_rate": 9.761110575657914e-06, | |
| "loss": 0.0196, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.3261288505440422, | |
| "grad_norm": 0.9300628900527954, | |
| "learning_rate": 9.756298963167466e-06, | |
| "loss": 0.0181, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.3290936582762608, | |
| "grad_norm": 1.9629039764404297, | |
| "learning_rate": 9.751440585801648e-06, | |
| "loss": 0.0165, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.3320584660084793, | |
| "grad_norm": 0.2907628118991852, | |
| "learning_rate": 9.746535491328991e-06, | |
| "loss": 0.0216, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.3350232737406979, | |
| "grad_norm": 2.418546199798584, | |
| "learning_rate": 9.741583727977357e-06, | |
| "loss": 0.0228, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.3379880814729165, | |
| "grad_norm": 1.3516942262649536, | |
| "learning_rate": 9.736585344433466e-06, | |
| "loss": 0.0171, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.34095288920513506, | |
| "grad_norm": 1.436192512512207, | |
| "learning_rate": 9.731540389842417e-06, | |
| "loss": 0.0209, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.34391769693735363, | |
| "grad_norm": 2.503681182861328, | |
| "learning_rate": 9.726448913807208e-06, | |
| "loss": 0.02, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.34688250466957216, | |
| "grad_norm": 2.688851833343506, | |
| "learning_rate": 9.721310966388235e-06, | |
| "loss": 0.021, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.34984731240179073, | |
| "grad_norm": 1.2639074325561523, | |
| "learning_rate": 9.716126598102825e-06, | |
| "loss": 0.021, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.3528121201340093, | |
| "grad_norm": 0.9510352611541748, | |
| "learning_rate": 9.710895859924708e-06, | |
| "loss": 0.0221, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.3557769278662279, | |
| "grad_norm": 1.8296053409576416, | |
| "learning_rate": 9.705618803283545e-06, | |
| "loss": 0.0171, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.35874173559844647, | |
| "grad_norm": 0.9670634865760803, | |
| "learning_rate": 9.700295480064402e-06, | |
| "loss": 0.0151, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.361706543330665, | |
| "grad_norm": 1.096726894378662, | |
| "learning_rate": 9.69492594260725e-06, | |
| "loss": 0.019, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.36467135106288356, | |
| "grad_norm": 3.529752731323242, | |
| "learning_rate": 9.689510243706448e-06, | |
| "loss": 0.02, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.36763615879510214, | |
| "grad_norm": 1.7916548252105713, | |
| "learning_rate": 9.684048436610221e-06, | |
| "loss": 0.0179, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.3706009665273207, | |
| "grad_norm": 1.1350276470184326, | |
| "learning_rate": 9.67854057502014e-06, | |
| "loss": 0.0184, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.3735657742595393, | |
| "grad_norm": 2.405104875564575, | |
| "learning_rate": 9.672986713090596e-06, | |
| "loss": 0.0169, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.3765305819917578, | |
| "grad_norm": 1.0677447319030762, | |
| "learning_rate": 9.667386905428258e-06, | |
| "loss": 0.0174, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.3794953897239764, | |
| "grad_norm": 1.0359286069869995, | |
| "learning_rate": 9.661741207091548e-06, | |
| "loss": 0.0171, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.382460197456195, | |
| "grad_norm": 2.040727376937866, | |
| "learning_rate": 9.656049673590093e-06, | |
| "loss": 0.0148, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.38542500518841355, | |
| "grad_norm": 2.2050840854644775, | |
| "learning_rate": 9.65031236088418e-06, | |
| "loss": 0.0163, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.38838981292063207, | |
| "grad_norm": 0.5841448903083801, | |
| "learning_rate": 9.644529325384207e-06, | |
| "loss": 0.0239, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.39135462065285065, | |
| "grad_norm": 1.0610971450805664, | |
| "learning_rate": 9.638700623950124e-06, | |
| "loss": 0.0175, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.3943194283850692, | |
| "grad_norm": 1.2772971391677856, | |
| "learning_rate": 9.632826313890883e-06, | |
| "loss": 0.0144, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.3972842361172878, | |
| "grad_norm": 1.1676218509674072, | |
| "learning_rate": 9.626906452963864e-06, | |
| "loss": 0.0181, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.4002490438495064, | |
| "grad_norm": 3.738065719604492, | |
| "learning_rate": 9.620941099374315e-06, | |
| "loss": 0.0189, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.4032138515817249, | |
| "grad_norm": 3.0982308387756348, | |
| "learning_rate": 9.61493031177478e-06, | |
| "loss": 0.0181, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.4061786593139435, | |
| "grad_norm": 2.3827264308929443, | |
| "learning_rate": 9.608874149264511e-06, | |
| "loss": 0.0181, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.40914346704616206, | |
| "grad_norm": 0.6366240382194519, | |
| "learning_rate": 9.602772671388901e-06, | |
| "loss": 0.0155, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.41210827477838063, | |
| "grad_norm": 0.6223490834236145, | |
| "learning_rate": 9.596625938138892e-06, | |
| "loss": 0.02, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.4150730825105992, | |
| "grad_norm": 1.028148889541626, | |
| "learning_rate": 9.590434009950386e-06, | |
| "loss": 0.0184, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.41803789024281773, | |
| "grad_norm": 2.3406949043273926, | |
| "learning_rate": 9.58419694770365e-06, | |
| "loss": 0.0205, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.4210026979750363, | |
| "grad_norm": 3.0843560695648193, | |
| "learning_rate": 9.577914812722716e-06, | |
| "loss": 0.0176, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.4239675057072549, | |
| "grad_norm": 2.7670023441314697, | |
| "learning_rate": 9.571587666774783e-06, | |
| "loss": 0.0165, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.42693231343947347, | |
| "grad_norm": 1.995368480682373, | |
| "learning_rate": 9.565215572069604e-06, | |
| "loss": 0.0168, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.42989712117169204, | |
| "grad_norm": 2.7526652812957764, | |
| "learning_rate": 9.55879859125888e-06, | |
| "loss": 0.0209, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.43286192890391056, | |
| "grad_norm": 0.437929630279541, | |
| "learning_rate": 9.552336787435641e-06, | |
| "loss": 0.0211, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.43582673663612914, | |
| "grad_norm": 3.0327157974243164, | |
| "learning_rate": 9.545830224133625e-06, | |
| "loss": 0.016, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.4387915443683477, | |
| "grad_norm": 1.455119013786316, | |
| "learning_rate": 9.539278965326653e-06, | |
| "loss": 0.0145, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.4417563521005663, | |
| "grad_norm": 1.194345474243164, | |
| "learning_rate": 9.53281543019193e-06, | |
| "loss": 0.0158, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.4447211598327848, | |
| "grad_norm": 0.1790812462568283, | |
| "learning_rate": 9.526175864740102e-06, | |
| "loss": 0.0165, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.4447211598327848, | |
| "eval_loss": 0.11122792959213257, | |
| "eval_runtime": 1792.6078, | |
| "eval_samples_per_second": 3.266, | |
| "eval_steps_per_second": 0.817, | |
| "eval_wer": 11.146845990790148, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.4476859675650034, | |
| "grad_norm": 3.042560577392578, | |
| "learning_rate": 9.519491797028879e-06, | |
| "loss": 0.0143, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.450650775297222, | |
| "grad_norm": 3.459578275680542, | |
| "learning_rate": 9.512763292777343e-06, | |
| "loss": 0.0188, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.45361558302944055, | |
| "grad_norm": 0.7293067574501038, | |
| "learning_rate": 9.505990418141481e-06, | |
| "loss": 0.0172, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.4565803907616591, | |
| "grad_norm": 2.7804245948791504, | |
| "learning_rate": 9.499173239713543e-06, | |
| "loss": 0.017, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.45954519849387765, | |
| "grad_norm": 2.085571527481079, | |
| "learning_rate": 9.492311824521379e-06, | |
| "loss": 0.0152, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.4625100062260962, | |
| "grad_norm": 2.437695264816284, | |
| "learning_rate": 9.485406240027784e-06, | |
| "loss": 0.0159, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.4654748139583148, | |
| "grad_norm": 1.5121135711669922, | |
| "learning_rate": 9.478456554129837e-06, | |
| "loss": 0.0161, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 0.4684396216905334, | |
| "grad_norm": 1.3547513484954834, | |
| "learning_rate": 9.471462835158226e-06, | |
| "loss": 0.0195, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.47140442942275196, | |
| "grad_norm": 2.362305164337158, | |
| "learning_rate": 9.46442515187659e-06, | |
| "loss": 0.0129, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.4743692371549705, | |
| "grad_norm": 0.3598177433013916, | |
| "learning_rate": 9.457343573480824e-06, | |
| "loss": 0.0162, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.47733404488718906, | |
| "grad_norm": 2.5027735233306885, | |
| "learning_rate": 9.450218169598411e-06, | |
| "loss": 0.0153, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 0.48029885261940763, | |
| "grad_norm": 1.3616218566894531, | |
| "learning_rate": 9.443049010287737e-06, | |
| "loss": 0.0137, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.4832636603516262, | |
| "grad_norm": 2.2974565029144287, | |
| "learning_rate": 9.4358361660374e-06, | |
| "loss": 0.0163, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 0.4862284680838448, | |
| "grad_norm": 0.3149053156375885, | |
| "learning_rate": 9.428579707765515e-06, | |
| "loss": 0.0105, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.4891932758160633, | |
| "grad_norm": 0.2883080244064331, | |
| "learning_rate": 9.421279706819021e-06, | |
| "loss": 0.0174, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.4921580835482819, | |
| "grad_norm": 1.0277905464172363, | |
| "learning_rate": 9.413936234972977e-06, | |
| "loss": 0.018, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.49512289128050047, | |
| "grad_norm": 3.766566514968872, | |
| "learning_rate": 9.406549364429854e-06, | |
| "loss": 0.0135, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 0.49808769901271904, | |
| "grad_norm": 0.7654131650924683, | |
| "learning_rate": 9.39911916781883e-06, | |
| "loss": 0.0159, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.5010525067449376, | |
| "grad_norm": 2.5445258617401123, | |
| "learning_rate": 9.39164571819507e-06, | |
| "loss": 0.0165, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 0.5040173144771561, | |
| "grad_norm": 0.7928656935691833, | |
| "learning_rate": 9.384129089039017e-06, | |
| "loss": 0.0152, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.5069821222093748, | |
| "grad_norm": 2.195737600326538, | |
| "learning_rate": 9.376569354255663e-06, | |
| "loss": 0.0138, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 0.5099469299415933, | |
| "grad_norm": 1.8981691598892212, | |
| "learning_rate": 9.368966588173815e-06, | |
| "loss": 0.0139, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.5129117376738118, | |
| "grad_norm": 0.7925296425819397, | |
| "learning_rate": 9.36132086554538e-06, | |
| "loss": 0.016, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 0.5158765454060305, | |
| "grad_norm": 1.9348927736282349, | |
| "learning_rate": 9.353632261544623e-06, | |
| "loss": 0.0127, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.518841353138249, | |
| "grad_norm": 0.1095159500837326, | |
| "learning_rate": 9.345900851767422e-06, | |
| "loss": 0.0118, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 0.5218061608704675, | |
| "grad_norm": 1.476868748664856, | |
| "learning_rate": 9.338126712230533e-06, | |
| "loss": 0.0168, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.5247709686026861, | |
| "grad_norm": 1.4792319536209106, | |
| "learning_rate": 9.330309919370837e-06, | |
| "loss": 0.014, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 0.5277357763349046, | |
| "grad_norm": 0.9512746930122375, | |
| "learning_rate": 9.322450550044594e-06, | |
| "loss": 0.0126, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.5307005840671233, | |
| "grad_norm": 1.9876360893249512, | |
| "learning_rate": 9.314548681526685e-06, | |
| "loss": 0.015, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 0.5336653917993418, | |
| "grad_norm": 3.569706916809082, | |
| "learning_rate": 9.306604391509847e-06, | |
| "loss": 0.0164, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.5366301995315603, | |
| "grad_norm": 1.3703384399414062, | |
| "learning_rate": 9.298617758103918e-06, | |
| "loss": 0.0146, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 0.539595007263779, | |
| "grad_norm": 1.3747698068618774, | |
| "learning_rate": 9.290588859835062e-06, | |
| "loss": 0.012, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.5425598149959975, | |
| "grad_norm": 2.5859375, | |
| "learning_rate": 9.282517775645e-06, | |
| "loss": 0.0191, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 0.5455246227282161, | |
| "grad_norm": 0.323015958070755, | |
| "learning_rate": 9.274404584890233e-06, | |
| "loss": 0.0153, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.5484894304604346, | |
| "grad_norm": 1.5014373064041138, | |
| "learning_rate": 9.266249367341262e-06, | |
| "loss": 0.0115, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 0.5514542381926532, | |
| "grad_norm": 2.329052448272705, | |
| "learning_rate": 9.258052203181809e-06, | |
| "loss": 0.0137, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.5544190459248718, | |
| "grad_norm": 2.8387389183044434, | |
| "learning_rate": 9.249813173008014e-06, | |
| "loss": 0.0179, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 0.5573838536570903, | |
| "grad_norm": 1.3269604444503784, | |
| "learning_rate": 9.241532357827658e-06, | |
| "loss": 0.0119, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.5603486613893089, | |
| "grad_norm": Infinity, | |
| "learning_rate": 9.23320983905936e-06, | |
| "loss": 0.0147, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 0.5633134691215275, | |
| "grad_norm": 1.0867879390716553, | |
| "learning_rate": 9.225013388704327e-06, | |
| "loss": 0.0129, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.566278276853746, | |
| "grad_norm": 0.15310339629650116, | |
| "learning_rate": 9.2166085386372e-06, | |
| "loss": 0.0142, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 0.5692430845859646, | |
| "grad_norm": 1.267125129699707, | |
| "learning_rate": 9.208162230038069e-06, | |
| "loss": 0.0132, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.5722078923181831, | |
| "grad_norm": 7.623719692230225, | |
| "learning_rate": 9.199674545952714e-06, | |
| "loss": 0.0113, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 0.5751727000504018, | |
| "grad_norm": 2.1407358646392822, | |
| "learning_rate": 9.19114556983373e-06, | |
| "loss": 0.0137, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.5781375077826203, | |
| "grad_norm": 1.6153864860534668, | |
| "learning_rate": 9.182575385539705e-06, | |
| "loss": 0.012, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 0.5811023155148388, | |
| "grad_norm": 0.7798183560371399, | |
| "learning_rate": 9.17396407733439e-06, | |
| "loss": 0.0117, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.5840671232470575, | |
| "grad_norm": 1.1946767568588257, | |
| "learning_rate": 9.165311729885875e-06, | |
| "loss": 0.0129, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 0.587031930979276, | |
| "grad_norm": 1.4665184020996094, | |
| "learning_rate": 9.15661842826576e-06, | |
| "loss": 0.0128, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.5899967387114946, | |
| "grad_norm": 1.9920064210891724, | |
| "learning_rate": 9.147884257948311e-06, | |
| "loss": 0.0124, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 0.5929615464437131, | |
| "grad_norm": 1.1829341650009155, | |
| "learning_rate": 9.139109304809624e-06, | |
| "loss": 0.0099, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.5929615464437131, | |
| "eval_loss": 0.10716072469949722, | |
| "eval_runtime": 1812.2336, | |
| "eval_samples_per_second": 3.231, | |
| "eval_steps_per_second": 0.808, | |
| "eval_wer": 12.313427381039398, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.5959263541759316, | |
| "grad_norm": 0.11610784381628036, | |
| "learning_rate": 9.130293655126778e-06, | |
| "loss": 0.0108, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 0.5988911619081503, | |
| "grad_norm": 2.0204625129699707, | |
| "learning_rate": 9.121437395576994e-06, | |
| "loss": 0.0141, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.6018559696403688, | |
| "grad_norm": 1.855035662651062, | |
| "learning_rate": 9.112540613236769e-06, | |
| "loss": 0.0107, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 0.6048207773725874, | |
| "grad_norm": 1.2485501766204834, | |
| "learning_rate": 9.103603395581036e-06, | |
| "loss": 0.0131, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.607785585104806, | |
| "grad_norm": 2.173248052597046, | |
| "learning_rate": 9.094625830482292e-06, | |
| "loss": 0.0132, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 0.6107503928370245, | |
| "grad_norm": 2.0845844745635986, | |
| "learning_rate": 9.08560800620974e-06, | |
| "loss": 0.0162, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.6137152005692431, | |
| "grad_norm": 0.12550178170204163, | |
| "learning_rate": 9.076550011428415e-06, | |
| "loss": 0.0103, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 0.6166800083014616, | |
| "grad_norm": 1.7097872495651245, | |
| "learning_rate": 9.067451935198322e-06, | |
| "loss": 0.0118, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.6196448160336803, | |
| "grad_norm": 1.811226725578308, | |
| "learning_rate": 9.05831386697355e-06, | |
| "loss": 0.0103, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 0.6226096237658988, | |
| "grad_norm": 2.809597969055176, | |
| "learning_rate": 9.0491358966014e-06, | |
| "loss": 0.0115, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.6255744314981173, | |
| "grad_norm": 0.09170404076576233, | |
| "learning_rate": 9.0399181143215e-06, | |
| "loss": 0.0109, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 0.6285392392303359, | |
| "grad_norm": 1.455367922782898, | |
| "learning_rate": 9.030660610764913e-06, | |
| "loss": 0.013, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.6315040469625545, | |
| "grad_norm": 1.2813187837600708, | |
| "learning_rate": 9.021363476953252e-06, | |
| "loss": 0.0142, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 0.6344688546947731, | |
| "grad_norm": 3.7294063568115234, | |
| "learning_rate": 9.012026804297783e-06, | |
| "loss": 0.0151, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.6374336624269916, | |
| "grad_norm": 1.4082695245742798, | |
| "learning_rate": 9.002650684598519e-06, | |
| "loss": 0.0114, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 0.6403984701592101, | |
| "grad_norm": 0.6885934472084045, | |
| "learning_rate": 8.993235210043339e-06, | |
| "loss": 0.0132, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.6433632778914288, | |
| "grad_norm": 1.1015677452087402, | |
| "learning_rate": 8.983780473207049e-06, | |
| "loss": 0.0095, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 0.6463280856236473, | |
| "grad_norm": 2.8200488090515137, | |
| "learning_rate": 8.974286567050503e-06, | |
| "loss": 0.0133, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.6492928933558658, | |
| "grad_norm": 1.1393108367919922, | |
| "learning_rate": 8.964753584919668e-06, | |
| "loss": 0.0097, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 0.6522577010880845, | |
| "grad_norm": 1.8517667055130005, | |
| "learning_rate": 8.955181620544718e-06, | |
| "loss": 0.0102, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.655222508820303, | |
| "grad_norm": 1.8156200647354126, | |
| "learning_rate": 8.945570768039104e-06, | |
| "loss": 0.0114, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 0.6581873165525216, | |
| "grad_norm": 0.035381220281124115, | |
| "learning_rate": 8.935921121898639e-06, | |
| "loss": 0.0108, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.6611521242847401, | |
| "grad_norm": 12.99875545501709, | |
| "learning_rate": 8.926232777000556e-06, | |
| "loss": 0.0157, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 0.6641169320169586, | |
| "grad_norm": 0.34489646553993225, | |
| "learning_rate": 8.916505828602585e-06, | |
| "loss": 0.014, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.6670817397491773, | |
| "grad_norm": 1.3919397592544556, | |
| "learning_rate": 8.906740372342016e-06, | |
| "loss": 0.0124, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 0.6700465474813958, | |
| "grad_norm": 2.139253854751587, | |
| "learning_rate": 8.896936504234749e-06, | |
| "loss": 0.0124, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.6730113552136144, | |
| "grad_norm": 2.3928396701812744, | |
| "learning_rate": 8.887094320674363e-06, | |
| "loss": 0.0111, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 0.675976162945833, | |
| "grad_norm": 0.7535815834999084, | |
| "learning_rate": 8.87721391843116e-06, | |
| "loss": 0.0115, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.6789409706780515, | |
| "grad_norm": 1.517271637916565, | |
| "learning_rate": 8.867295394651216e-06, | |
| "loss": 0.0119, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 0.6819057784102701, | |
| "grad_norm": 1.9762825965881348, | |
| "learning_rate": 8.85753834981413e-06, | |
| "loss": 0.0132, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.6848705861424886, | |
| "grad_norm": 0.9906989932060242, | |
| "learning_rate": 8.847544633457886e-06, | |
| "loss": 0.0118, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 0.6878353938747073, | |
| "grad_norm": 0.677872896194458, | |
| "learning_rate": 8.837513087279198e-06, | |
| "loss": 0.0128, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.6908002016069258, | |
| "grad_norm": 0.6202975511550903, | |
| "learning_rate": 8.827443809910221e-06, | |
| "loss": 0.012, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 0.6937650093391443, | |
| "grad_norm": 0.8511649370193481, | |
| "learning_rate": 8.817336900354088e-06, | |
| "loss": 0.0116, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.696729817071363, | |
| "grad_norm": 1.111178994178772, | |
| "learning_rate": 8.807192457983939e-06, | |
| "loss": 0.0119, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 0.6996946248035815, | |
| "grad_norm": 2.57309889793396, | |
| "learning_rate": 8.797010582541944e-06, | |
| "loss": 0.0108, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.7026594325358001, | |
| "grad_norm": 3.5267422199249268, | |
| "learning_rate": 8.786791374138324e-06, | |
| "loss": 0.0148, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 0.7056242402680186, | |
| "grad_norm": 0.18291831016540527, | |
| "learning_rate": 8.776534933250364e-06, | |
| "loss": 0.0119, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.7085890480002371, | |
| "grad_norm": 1.7078309059143066, | |
| "learning_rate": 8.766241360721427e-06, | |
| "loss": 0.0095, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 0.7115538557324558, | |
| "grad_norm": 1.5815846920013428, | |
| "learning_rate": 8.755910757759958e-06, | |
| "loss": 0.0109, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.7145186634646743, | |
| "grad_norm": 0.38515791296958923, | |
| "learning_rate": 8.745543225938502e-06, | |
| "loss": 0.0095, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 0.7174834711968929, | |
| "grad_norm": 0.8359034061431885, | |
| "learning_rate": 8.735138867192683e-06, | |
| "loss": 0.011, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.7204482789291115, | |
| "grad_norm": 1.3351069688796997, | |
| "learning_rate": 8.724697783820223e-06, | |
| "loss": 0.0115, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 0.72341308666133, | |
| "grad_norm": 1.6904191970825195, | |
| "learning_rate": 8.714220078479929e-06, | |
| "loss": 0.012, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.7263778943935486, | |
| "grad_norm": 1.6055872440338135, | |
| "learning_rate": 8.703705854190675e-06, | |
| "loss": 0.0125, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 0.7293427021257671, | |
| "grad_norm": 2.7653372287750244, | |
| "learning_rate": 8.693155214330405e-06, | |
| "loss": 0.0117, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.7323075098579858, | |
| "grad_norm": 3.0810275077819824, | |
| "learning_rate": 8.682568262635101e-06, | |
| "loss": 0.0119, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 0.7352723175902043, | |
| "grad_norm": 1.5143022537231445, | |
| "learning_rate": 8.671945103197772e-06, | |
| "loss": 0.0103, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.7382371253224228, | |
| "grad_norm": 1.342732548713684, | |
| "learning_rate": 8.661285840467434e-06, | |
| "loss": 0.0107, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 0.7412019330546414, | |
| "grad_norm": 1.0729478597640991, | |
| "learning_rate": 8.650590579248071e-06, | |
| "loss": 0.0083, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.7412019330546414, | |
| "eval_loss": 0.10575078427791595, | |
| "eval_runtime": 3755.3699, | |
| "eval_samples_per_second": 1.559, | |
| "eval_steps_per_second": 0.39, | |
| "eval_wer": 13.099919596520722, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.74416674078686, | |
| "grad_norm": 0.11206492781639099, | |
| "learning_rate": 8.639859424697615e-06, | |
| "loss": 0.0116, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 0.7471315485190786, | |
| "grad_norm": 3.2454352378845215, | |
| "learning_rate": 8.629092482326908e-06, | |
| "loss": 0.0106, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.7500963562512971, | |
| "grad_norm": 2.604566812515259, | |
| "learning_rate": 8.618289857998661e-06, | |
| "loss": 0.0104, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 0.7530611639835156, | |
| "grad_norm": 0.28832483291625977, | |
| "learning_rate": 8.607451657926424e-06, | |
| "loss": 0.011, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.7560259717157343, | |
| "grad_norm": 0.390303373336792, | |
| "learning_rate": 8.59657798867353e-06, | |
| "loss": 0.01, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 0.7589907794479528, | |
| "grad_norm": 1.1761589050292969, | |
| "learning_rate": 8.58566895715205e-06, | |
| "loss": 0.0113, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.7619555871801713, | |
| "grad_norm": 1.9691640138626099, | |
| "learning_rate": 8.574724670621753e-06, | |
| "loss": 0.0084, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 0.76492039491239, | |
| "grad_norm": 0.49470266699790955, | |
| "learning_rate": 8.563745236689031e-06, | |
| "loss": 0.0085, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.7678852026446085, | |
| "grad_norm": 0.1691243201494217, | |
| "learning_rate": 8.552730763305862e-06, | |
| "loss": 0.0096, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 0.7708500103768271, | |
| "grad_norm": 2.035419225692749, | |
| "learning_rate": 8.54168135876873e-06, | |
| "loss": 0.0099, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.7738148181090456, | |
| "grad_norm": 3.7284252643585205, | |
| "learning_rate": 8.530597131717577e-06, | |
| "loss": 0.0105, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 0.7767796258412641, | |
| "grad_norm": 1.6268372535705566, | |
| "learning_rate": 8.51947819113472e-06, | |
| "loss": 0.0106, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.7797444335734828, | |
| "grad_norm": 3.1598916053771973, | |
| "learning_rate": 8.508324646343791e-06, | |
| "loss": 0.0111, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 0.7827092413057013, | |
| "grad_norm": 0.1609897017478943, | |
| "learning_rate": 8.497136607008656e-06, | |
| "loss": 0.0122, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.7856740490379199, | |
| "grad_norm": 0.5666967034339905, | |
| "learning_rate": 8.485914183132338e-06, | |
| "loss": 0.0085, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 0.7886388567701385, | |
| "grad_norm": 1.9069159030914307, | |
| "learning_rate": 8.474657485055936e-06, | |
| "loss": 0.0106, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.791603664502357, | |
| "grad_norm": 1.5358110666275024, | |
| "learning_rate": 8.463366623457538e-06, | |
| "loss": 0.0081, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 0.7945684722345756, | |
| "grad_norm": 1.486150860786438, | |
| "learning_rate": 8.45204170935114e-06, | |
| "loss": 0.0083, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.7975332799667941, | |
| "grad_norm": 2.6618776321411133, | |
| "learning_rate": 8.440682854085543e-06, | |
| "loss": 0.0112, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 0.8004980876990128, | |
| "grad_norm": 1.1893341541290283, | |
| "learning_rate": 8.429290169343264e-06, | |
| "loss": 0.012, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.8034628954312313, | |
| "grad_norm": 2.5905745029449463, | |
| "learning_rate": 8.417863767139444e-06, | |
| "loss": 0.0093, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 0.8064277031634498, | |
| "grad_norm": 1.5279064178466797, | |
| "learning_rate": 8.406403759820734e-06, | |
| "loss": 0.0109, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.8093925108956684, | |
| "grad_norm": 1.5376503467559814, | |
| "learning_rate": 8.394910260064203e-06, | |
| "loss": 0.0098, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 0.812357318627887, | |
| "grad_norm": 0.1439572125673294, | |
| "learning_rate": 8.38338338087622e-06, | |
| "loss": 0.0077, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.8153221263601056, | |
| "grad_norm": 1.7664012908935547, | |
| "learning_rate": 8.371823235591352e-06, | |
| "loss": 0.0101, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 0.8182869340923241, | |
| "grad_norm": 0.2659100592136383, | |
| "learning_rate": 8.360229937871237e-06, | |
| "loss": 0.0093, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.8212517418245426, | |
| "grad_norm": 0.21515263617038727, | |
| "learning_rate": 8.348603601703483e-06, | |
| "loss": 0.0069, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 0.8242165495567613, | |
| "grad_norm": 1.8138411045074463, | |
| "learning_rate": 8.33717784852218e-06, | |
| "loss": 0.011, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.8271813572889798, | |
| "grad_norm": 1.0726341009140015, | |
| "learning_rate": 8.325486433784653e-06, | |
| "loss": 0.0105, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 0.8301461650211984, | |
| "grad_norm": 3.987571954727173, | |
| "learning_rate": 8.313762322204512e-06, | |
| "loss": 0.0111, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.833110972753417, | |
| "grad_norm": 0.5988409519195557, | |
| "learning_rate": 8.302005629055549e-06, | |
| "loss": 0.0123, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 0.8360757804856355, | |
| "grad_norm": 1.5194250345230103, | |
| "learning_rate": 8.290216469931907e-06, | |
| "loss": 0.0105, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.8390405882178541, | |
| "grad_norm": 1.19874107837677, | |
| "learning_rate": 8.278394960746936e-06, | |
| "loss": 0.0091, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 0.8420053959500726, | |
| "grad_norm": 1.3419668674468994, | |
| "learning_rate": 8.26654121773206e-06, | |
| "loss": 0.011, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.8449702036822913, | |
| "grad_norm": 2.4264957904815674, | |
| "learning_rate": 8.254655357435635e-06, | |
| "loss": 0.0099, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 0.8479350114145098, | |
| "grad_norm": 0.842897355556488, | |
| "learning_rate": 8.242737496721797e-06, | |
| "loss": 0.0083, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.8508998191467283, | |
| "grad_norm": 0.6375104784965515, | |
| "learning_rate": 8.230787752769317e-06, | |
| "loss": 0.0084, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 0.8538646268789469, | |
| "grad_norm": 0.03742964193224907, | |
| "learning_rate": 8.218806243070451e-06, | |
| "loss": 0.0074, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.8568294346111655, | |
| "grad_norm": 0.6775150299072266, | |
| "learning_rate": 8.206793085429781e-06, | |
| "loss": 0.0096, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 0.8597942423433841, | |
| "grad_norm": 1.218371868133545, | |
| "learning_rate": 8.194748397963053e-06, | |
| "loss": 0.0106, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.8627590500756026, | |
| "grad_norm": 0.3041909635066986, | |
| "learning_rate": 8.182672299096028e-06, | |
| "loss": 0.01, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 0.8657238578078211, | |
| "grad_norm": 0.5829514861106873, | |
| "learning_rate": 8.170564907563308e-06, | |
| "loss": 0.0086, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.8686886655400398, | |
| "grad_norm": 1.9637973308563232, | |
| "learning_rate": 8.158426342407164e-06, | |
| "loss": 0.0104, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 0.8716534732722583, | |
| "grad_norm": 3.4624621868133545, | |
| "learning_rate": 8.146256722976383e-06, | |
| "loss": 0.0077, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.8746182810044768, | |
| "grad_norm": 0.18807068467140198, | |
| "learning_rate": 8.134056168925077e-06, | |
| "loss": 0.0058, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 0.8775830887366954, | |
| "grad_norm": 2.7400877475738525, | |
| "learning_rate": 8.121824800211514e-06, | |
| "loss": 0.0105, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.880547896468914, | |
| "grad_norm": 0.5604029893875122, | |
| "learning_rate": 8.109562737096938e-06, | |
| "loss": 0.0104, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 0.8835127042011326, | |
| "grad_norm": 0.2727389335632324, | |
| "learning_rate": 8.09727010014439e-06, | |
| "loss": 0.0094, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.8864775119333511, | |
| "grad_norm": 0.35746484994888306, | |
| "learning_rate": 8.084947010217514e-06, | |
| "loss": 0.0085, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 0.8894423196655696, | |
| "grad_norm": 0.06409318745136261, | |
| "learning_rate": 8.072593588479378e-06, | |
| "loss": 0.0094, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.8894423196655696, | |
| "eval_loss": 0.09929487109184265, | |
| "eval_runtime": 4386.1659, | |
| "eval_samples_per_second": 1.335, | |
| "eval_steps_per_second": 0.334, | |
| "eval_wer": 24.461662159198887, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.8924071273977883, | |
| "grad_norm": 0.6662552952766418, | |
| "learning_rate": 8.060209956391271e-06, | |
| "loss": 0.0095, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 0.8953719351300068, | |
| "grad_norm": 3.1687803268432617, | |
| "learning_rate": 8.047796235711527e-06, | |
| "loss": 0.0101, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.8983367428622254, | |
| "grad_norm": 0.07528534531593323, | |
| "learning_rate": 8.035352548494311e-06, | |
| "loss": 0.0096, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 0.901301550594444, | |
| "grad_norm": 0.8370321989059448, | |
| "learning_rate": 8.022879017088422e-06, | |
| "loss": 0.0113, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.9042663583266625, | |
| "grad_norm": 0.9781240224838257, | |
| "learning_rate": 8.010375764136098e-06, | |
| "loss": 0.0114, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 0.9072311660588811, | |
| "grad_norm": 0.2995459735393524, | |
| "learning_rate": 7.997842912571805e-06, | |
| "loss": 0.009, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.9101959737910996, | |
| "grad_norm": 1.0502598285675049, | |
| "learning_rate": 7.985280585621024e-06, | |
| "loss": 0.0089, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 0.9131607815233183, | |
| "grad_norm": 2.38645339012146, | |
| "learning_rate": 7.97268890679905e-06, | |
| "loss": 0.0128, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.9161255892555368, | |
| "grad_norm": 0.21894457936286926, | |
| "learning_rate": 7.960067999909767e-06, | |
| "loss": 0.0088, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 0.9190903969877553, | |
| "grad_norm": 0.12525251507759094, | |
| "learning_rate": 7.947417989044434e-06, | |
| "loss": 0.0104, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.9220552047199739, | |
| "grad_norm": 0.13473589718341827, | |
| "learning_rate": 7.934738998580473e-06, | |
| "loss": 0.0091, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 0.9250200124521925, | |
| "grad_norm": 0.14185883104801178, | |
| "learning_rate": 7.92203115318023e-06, | |
| "loss": 0.0092, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.9279848201844111, | |
| "grad_norm": 0.34865042567253113, | |
| "learning_rate": 7.909294577789765e-06, | |
| "loss": 0.0074, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 0.9309496279166296, | |
| "grad_norm": 0.6784268021583557, | |
| "learning_rate": 7.896529397637615e-06, | |
| "loss": 0.0073, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.9339144356488481, | |
| "grad_norm": 0.7907705903053284, | |
| "learning_rate": 7.883735738233565e-06, | |
| "loss": 0.0084, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 0.9368792433810668, | |
| "grad_norm": 2.074660301208496, | |
| "learning_rate": 7.870913725367413e-06, | |
| "loss": 0.0113, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.9398440511132853, | |
| "grad_norm": 0.11929433792829514, | |
| "learning_rate": 7.858063485107736e-06, | |
| "loss": 0.007, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 0.9428088588455039, | |
| "grad_norm": 1.5103803873062134, | |
| "learning_rate": 7.845185143800644e-06, | |
| "loss": 0.0081, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.9457736665777224, | |
| "grad_norm": 2.054821491241455, | |
| "learning_rate": 7.832278828068546e-06, | |
| "loss": 0.0109, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 0.948738474309941, | |
| "grad_norm": 0.14713267982006073, | |
| "learning_rate": 7.8193446648089e-06, | |
| "loss": 0.0091, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.9517032820421596, | |
| "grad_norm": 0.16898584365844727, | |
| "learning_rate": 7.806382781192962e-06, | |
| "loss": 0.0081, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 0.9546680897743781, | |
| "grad_norm": 1.8700170516967773, | |
| "learning_rate": 7.793393304664549e-06, | |
| "loss": 0.0103, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.9576328975065967, | |
| "grad_norm": 0.3140694499015808, | |
| "learning_rate": 7.780376362938773e-06, | |
| "loss": 0.0114, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 0.9605977052388153, | |
| "grad_norm": 2.2821805477142334, | |
| "learning_rate": 7.767332084000784e-06, | |
| "loss": 0.01, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.9635625129710338, | |
| "grad_norm": 0.20296697318553925, | |
| "learning_rate": 7.754522291679406e-06, | |
| "loss": 0.0089, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 0.9665273207032524, | |
| "grad_norm": 0.31332024931907654, | |
| "learning_rate": 7.741424263693839e-06, | |
| "loss": 0.0092, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.969492128435471, | |
| "grad_norm": 0.6727187633514404, | |
| "learning_rate": 7.728299281480833e-06, | |
| "loss": 0.0102, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 0.9724569361676896, | |
| "grad_norm": 1.2364569902420044, | |
| "learning_rate": 7.715147474087817e-06, | |
| "loss": 0.0088, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.9754217438999081, | |
| "grad_norm": 0.06689300388097763, | |
| "learning_rate": 7.701968970825973e-06, | |
| "loss": 0.0102, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 0.9783865516321266, | |
| "grad_norm": 2.35491681098938, | |
| "learning_rate": 7.688763901268955e-06, | |
| "loss": 0.0078, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.9813513593643453, | |
| "grad_norm": 1.78278386592865, | |
| "learning_rate": 7.675532395251633e-06, | |
| "loss": 0.0069, | |
| "step": 16550 | |
| }, | |
| { | |
| "epoch": 0.9843161670965638, | |
| "grad_norm": 0.45280998945236206, | |
| "learning_rate": 7.6622745828688e-06, | |
| "loss": 0.0081, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.9872809748287823, | |
| "grad_norm": 1.32427978515625, | |
| "learning_rate": 7.648990594473897e-06, | |
| "loss": 0.0077, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 0.9902457825610009, | |
| "grad_norm": 2.651205062866211, | |
| "learning_rate": 7.635680560677734e-06, | |
| "loss": 0.0094, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.9932105902932195, | |
| "grad_norm": 1.2493482828140259, | |
| "learning_rate": 7.622344612347206e-06, | |
| "loss": 0.0139, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 0.9961753980254381, | |
| "grad_norm": 0.7914796471595764, | |
| "learning_rate": 7.6089828806040015e-06, | |
| "loss": 0.0086, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.9991402057576566, | |
| "grad_norm": 2.1952311992645264, | |
| "learning_rate": 7.595595496823321e-06, | |
| "loss": 0.011, | |
| "step": 16850 | |
| }, | |
| { | |
| "epoch": 1.002075365412553, | |
| "grad_norm": 0.2566259801387787, | |
| "learning_rate": 7.582182592632577e-06, | |
| "loss": 0.0042, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 1.0050401731447716, | |
| "grad_norm": 0.19997215270996094, | |
| "learning_rate": 7.568744299910109e-06, | |
| "loss": 0.0058, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 1.0080049808769902, | |
| "grad_norm": 0.04884817823767662, | |
| "learning_rate": 7.555280750783876e-06, | |
| "loss": 0.005, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.0109697886092086, | |
| "grad_norm": 0.07050412893295288, | |
| "learning_rate": 7.541792077630163e-06, | |
| "loss": 0.005, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 1.0139345963414272, | |
| "grad_norm": 0.8894023299217224, | |
| "learning_rate": 7.528278413072285e-06, | |
| "loss": 0.0049, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 1.0168994040736459, | |
| "grad_norm": 0.611845076084137, | |
| "learning_rate": 7.514739889979272e-06, | |
| "loss": 0.0052, | |
| "step": 17150 | |
| }, | |
| { | |
| "epoch": 1.0198642118058643, | |
| "grad_norm": 0.12328474968671799, | |
| "learning_rate": 7.501176641464569e-06, | |
| "loss": 0.004, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 1.022829019538083, | |
| "grad_norm": 1.4184530973434448, | |
| "learning_rate": 7.4875888008847294e-06, | |
| "loss": 0.0029, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 1.0257938272703016, | |
| "grad_norm": 0.09332071244716644, | |
| "learning_rate": 7.4739765018380986e-06, | |
| "loss": 0.0035, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 1.0287586350025202, | |
| "grad_norm": 0.786600649356842, | |
| "learning_rate": 7.460339878163501e-06, | |
| "loss": 0.0041, | |
| "step": 17350 | |
| }, | |
| { | |
| "epoch": 1.0317234427347386, | |
| "grad_norm": 0.8689146041870117, | |
| "learning_rate": 7.446679063938926e-06, | |
| "loss": 0.0029, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 1.0346882504669572, | |
| "grad_norm": 2.0220930576324463, | |
| "learning_rate": 7.432994193480214e-06, | |
| "loss": 0.0044, | |
| "step": 17450 | |
| }, | |
| { | |
| "epoch": 1.0376530581991759, | |
| "grad_norm": 0.15765543282032013, | |
| "learning_rate": 7.419285401339723e-06, | |
| "loss": 0.0057, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.0376530581991759, | |
| "eval_loss": 0.10125549137592316, | |
| "eval_runtime": 4053.4099, | |
| "eval_samples_per_second": 1.444, | |
| "eval_steps_per_second": 0.361, | |
| "eval_wer": 17.37738469410131, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.0406178659313943, | |
| "grad_norm": 1.3988970518112183, | |
| "learning_rate": 7.40555282230502e-06, | |
| "loss": 0.0041, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 1.043582673663613, | |
| "grad_norm": 0.5283538103103638, | |
| "learning_rate": 7.391796591397548e-06, | |
| "loss": 0.005, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 1.0465474813958315, | |
| "grad_norm": 0.8953123688697815, | |
| "learning_rate": 7.378016843871301e-06, | |
| "loss": 0.0036, | |
| "step": 17650 | |
| }, | |
| { | |
| "epoch": 1.04951228912805, | |
| "grad_norm": 0.2542392313480377, | |
| "learning_rate": 7.364490006042365e-06, | |
| "loss": 0.0058, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 1.0524770968602686, | |
| "grad_norm": 0.13877496123313904, | |
| "learning_rate": 7.350664095540969e-06, | |
| "loss": 0.0047, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 1.0554419045924872, | |
| "grad_norm": 0.668488621711731, | |
| "learning_rate": 7.3368150728436595e-06, | |
| "loss": 0.0041, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 1.0584067123247058, | |
| "grad_norm": 1.1569958925247192, | |
| "learning_rate": 7.322943074116774e-06, | |
| "loss": 0.0045, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 1.0613715200569243, | |
| "grad_norm": 0.23624324798583984, | |
| "learning_rate": 7.3090482357525595e-06, | |
| "loss": 0.0047, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 1.0643363277891429, | |
| "grad_norm": 0.19879819452762604, | |
| "learning_rate": 7.295130694367823e-06, | |
| "loss": 0.0046, | |
| "step": 17950 | |
| }, | |
| { | |
| "epoch": 1.0673011355213615, | |
| "grad_norm": 0.0767776146531105, | |
| "learning_rate": 7.2811905868025936e-06, | |
| "loss": 0.004, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.07026594325358, | |
| "grad_norm": 0.13302555680274963, | |
| "learning_rate": 7.267228050118776e-06, | |
| "loss": 0.0032, | |
| "step": 18050 | |
| }, | |
| { | |
| "epoch": 1.0732307509857986, | |
| "grad_norm": 0.2847168743610382, | |
| "learning_rate": 7.253243221598803e-06, | |
| "loss": 0.0044, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 1.0761955587180172, | |
| "grad_norm": 0.3598785698413849, | |
| "learning_rate": 7.2392362387442815e-06, | |
| "loss": 0.0041, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 1.0791603664502356, | |
| "grad_norm": 0.021318763494491577, | |
| "learning_rate": 7.22520723927465e-06, | |
| "loss": 0.0038, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 1.0821251741824542, | |
| "grad_norm": 0.11015797406435013, | |
| "learning_rate": 7.211156361125812e-06, | |
| "loss": 0.0038, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 1.0850899819146729, | |
| "grad_norm": 1.6273869276046753, | |
| "learning_rate": 7.197083742448792e-06, | |
| "loss": 0.0026, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 1.0880547896468915, | |
| "grad_norm": 0.035194288939237595, | |
| "learning_rate": 7.182989521608371e-06, | |
| "loss": 0.003, | |
| "step": 18350 | |
| }, | |
| { | |
| "epoch": 1.09101959737911, | |
| "grad_norm": 0.05955716222524643, | |
| "learning_rate": 7.168873837181725e-06, | |
| "loss": 0.0033, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 1.0939844051113286, | |
| "grad_norm": 0.02106299065053463, | |
| "learning_rate": 7.154736827957063e-06, | |
| "loss": 0.0032, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 1.0969492128435472, | |
| "grad_norm": 0.046555496752262115, | |
| "learning_rate": 7.140578632932267e-06, | |
| "loss": 0.0065, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.0999140205757656, | |
| "grad_norm": 1.2249200344085693, | |
| "learning_rate": 7.126399391313519e-06, | |
| "loss": 0.0035, | |
| "step": 18550 | |
| }, | |
| { | |
| "epoch": 1.1028788283079842, | |
| "grad_norm": 0.06700051575899124, | |
| "learning_rate": 7.112199242513939e-06, | |
| "loss": 0.0034, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 1.1058436360402029, | |
| "grad_norm": 0.5902153253555298, | |
| "learning_rate": 7.097978326152205e-06, | |
| "loss": 0.0031, | |
| "step": 18650 | |
| }, | |
| { | |
| "epoch": 1.1088084437724213, | |
| "grad_norm": 0.08541566133499146, | |
| "learning_rate": 7.0837367820511916e-06, | |
| "loss": 0.0038, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 1.11177325150464, | |
| "grad_norm": 0.33190247416496277, | |
| "learning_rate": 7.069474750236586e-06, | |
| "loss": 0.0043, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 1.1147380592368585, | |
| "grad_norm": 0.038232989609241486, | |
| "learning_rate": 7.055192370935517e-06, | |
| "loss": 0.004, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 1.117702866969077, | |
| "grad_norm": 0.06271852552890778, | |
| "learning_rate": 7.040889784575171e-06, | |
| "loss": 0.0025, | |
| "step": 18850 | |
| }, | |
| { | |
| "epoch": 1.1206676747012956, | |
| "grad_norm": 0.044638846069574356, | |
| "learning_rate": 7.026567131781414e-06, | |
| "loss": 0.0033, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 1.1236324824335142, | |
| "grad_norm": 2.2346765995025635, | |
| "learning_rate": 7.012224553377413e-06, | |
| "loss": 0.0044, | |
| "step": 18950 | |
| }, | |
| { | |
| "epoch": 1.1265972901657328, | |
| "grad_norm": 1.7368957996368408, | |
| "learning_rate": 6.9978621903822454e-06, | |
| "loss": 0.0038, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.1295620978979513, | |
| "grad_norm": 0.01738637126982212, | |
| "learning_rate": 6.983480184009515e-06, | |
| "loss": 0.0019, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 1.1325269056301699, | |
| "grad_norm": 1.5712268352508545, | |
| "learning_rate": 6.969078675665962e-06, | |
| "loss": 0.0031, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 1.1354917133623885, | |
| "grad_norm": 3.365665912628174, | |
| "learning_rate": 6.954657806950074e-06, | |
| "loss": 0.0045, | |
| "step": 19150 | |
| }, | |
| { | |
| "epoch": 1.138456521094607, | |
| "grad_norm": 2.0195226669311523, | |
| "learning_rate": 6.940217719650695e-06, | |
| "loss": 0.0044, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 1.1414213288268256, | |
| "grad_norm": 3.0657689571380615, | |
| "learning_rate": 6.925758555745628e-06, | |
| "loss": 0.0042, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 1.1443861365590442, | |
| "grad_norm": 0.5422585010528564, | |
| "learning_rate": 6.911280457400244e-06, | |
| "loss": 0.0031, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 1.1473509442912628, | |
| "grad_norm": 0.1482527256011963, | |
| "learning_rate": 6.896783566966079e-06, | |
| "loss": 0.0037, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 1.1503157520234812, | |
| "grad_norm": 1.2458192110061646, | |
| "learning_rate": 6.882268026979436e-06, | |
| "loss": 0.0031, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 1.1532805597556999, | |
| "grad_norm": 0.020457390695810318, | |
| "learning_rate": 6.867733980159986e-06, | |
| "loss": 0.0049, | |
| "step": 19450 | |
| }, | |
| { | |
| "epoch": 1.1562453674879185, | |
| "grad_norm": 0.8948466181755066, | |
| "learning_rate": 6.853181569409362e-06, | |
| "loss": 0.0037, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.159210175220137, | |
| "grad_norm": 0.7108380794525146, | |
| "learning_rate": 6.8386109378097536e-06, | |
| "loss": 0.0035, | |
| "step": 19550 | |
| }, | |
| { | |
| "epoch": 1.1621749829523556, | |
| "grad_norm": 1.8224056959152222, | |
| "learning_rate": 6.824022228622502e-06, | |
| "loss": 0.0035, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 1.1651397906845742, | |
| "grad_norm": 0.9493487477302551, | |
| "learning_rate": 6.8094155852866915e-06, | |
| "loss": 0.0028, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 1.1681045984167926, | |
| "grad_norm": 0.969045877456665, | |
| "learning_rate": 6.794791151417741e-06, | |
| "loss": 0.0042, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 1.1710694061490112, | |
| "grad_norm": 0.3356039524078369, | |
| "learning_rate": 6.780149070805983e-06, | |
| "loss": 0.0028, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 1.1740342138812299, | |
| "grad_norm": 0.19961191713809967, | |
| "learning_rate": 6.765489487415263e-06, | |
| "loss": 0.0042, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 1.1769990216134483, | |
| "grad_norm": 0.4981558322906494, | |
| "learning_rate": 6.750812545381513e-06, | |
| "loss": 0.0038, | |
| "step": 19850 | |
| }, | |
| { | |
| "epoch": 1.179963829345667, | |
| "grad_norm": 3.4783573150634766, | |
| "learning_rate": 6.73611838901134e-06, | |
| "loss": 0.0041, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 1.1829286370778855, | |
| "grad_norm": 0.049179501831531525, | |
| "learning_rate": 6.721407162780605e-06, | |
| "loss": 0.0035, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 1.185893444810104, | |
| "grad_norm": 1.4858205318450928, | |
| "learning_rate": 6.706679011333004e-06, | |
| "loss": 0.0031, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.185893444810104, | |
| "eval_loss": 0.1006452813744545, | |
| "eval_runtime": 4425.7501, | |
| "eval_samples_per_second": 1.323, | |
| "eval_steps_per_second": 0.331, | |
| "eval_wer": 24.584460200277757, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.1888582525423226, | |
| "grad_norm": 1.8448115587234497, | |
| "learning_rate": 6.691934079478648e-06, | |
| "loss": 0.0041, | |
| "step": 20050 | |
| }, | |
| { | |
| "epoch": 1.1918230602745412, | |
| "grad_norm": 1.0905754566192627, | |
| "learning_rate": 6.67717251219263e-06, | |
| "loss": 0.0047, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 1.1947878680067598, | |
| "grad_norm": 0.17178182303905487, | |
| "learning_rate": 6.662394454613611e-06, | |
| "loss": 0.0049, | |
| "step": 20150 | |
| }, | |
| { | |
| "epoch": 1.1977526757389783, | |
| "grad_norm": 1.1887060403823853, | |
| "learning_rate": 6.647600052042384e-06, | |
| "loss": 0.0041, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 1.200717483471197, | |
| "grad_norm": 0.19539013504981995, | |
| "learning_rate": 6.632789449940454e-06, | |
| "loss": 0.0029, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 1.2036822912034155, | |
| "grad_norm": 0.24206945300102234, | |
| "learning_rate": 6.617962793928598e-06, | |
| "loss": 0.0041, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 1.206647098935634, | |
| "grad_norm": 0.26349180936813354, | |
| "learning_rate": 6.603120229785443e-06, | |
| "loss": 0.0045, | |
| "step": 20350 | |
| }, | |
| { | |
| "epoch": 1.2096119066678526, | |
| "grad_norm": 0.38725215196609497, | |
| "learning_rate": 6.588261903446022e-06, | |
| "loss": 0.006, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 1.2125767144000712, | |
| "grad_norm": 0.3836526870727539, | |
| "learning_rate": 6.57338796100035e-06, | |
| "loss": 0.0037, | |
| "step": 20450 | |
| }, | |
| { | |
| "epoch": 1.2155415221322898, | |
| "grad_norm": 0.07236127555370331, | |
| "learning_rate": 6.558498548691984e-06, | |
| "loss": 0.0049, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.2185063298645082, | |
| "grad_norm": 0.2256426066160202, | |
| "learning_rate": 6.543593812916576e-06, | |
| "loss": 0.0034, | |
| "step": 20550 | |
| }, | |
| { | |
| "epoch": 1.2214711375967269, | |
| "grad_norm": 0.41789326071739197, | |
| "learning_rate": 6.528673900220449e-06, | |
| "loss": 0.0023, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 1.2244359453289455, | |
| "grad_norm": 1.1350836753845215, | |
| "learning_rate": 6.513738957299145e-06, | |
| "loss": 0.0034, | |
| "step": 20650 | |
| }, | |
| { | |
| "epoch": 1.227400753061164, | |
| "grad_norm": 1.7438832521438599, | |
| "learning_rate": 6.4987891309959885e-06, | |
| "loss": 0.0037, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 1.2303655607933826, | |
| "grad_norm": 0.8658862113952637, | |
| "learning_rate": 6.483824568300636e-06, | |
| "loss": 0.0037, | |
| "step": 20750 | |
| }, | |
| { | |
| "epoch": 1.2333303685256012, | |
| "grad_norm": 0.012403754517436028, | |
| "learning_rate": 6.46884541634764e-06, | |
| "loss": 0.0038, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 1.2362951762578196, | |
| "grad_norm": 0.08823706954717636, | |
| "learning_rate": 6.453851822414994e-06, | |
| "loss": 0.0063, | |
| "step": 20850 | |
| }, | |
| { | |
| "epoch": 1.2392599839900382, | |
| "grad_norm": 2.1406474113464355, | |
| "learning_rate": 6.438843933922691e-06, | |
| "loss": 0.0038, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 1.2422247917222569, | |
| "grad_norm": 0.23070839047431946, | |
| "learning_rate": 6.423821898431266e-06, | |
| "loss": 0.0043, | |
| "step": 20950 | |
| }, | |
| { | |
| "epoch": 1.2451895994544753, | |
| "grad_norm": 2.4584333896636963, | |
| "learning_rate": 6.4087858636403565e-06, | |
| "loss": 0.0028, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.248154407186694, | |
| "grad_norm": 0.4119054675102234, | |
| "learning_rate": 6.393735977387239e-06, | |
| "loss": 0.0042, | |
| "step": 21050 | |
| }, | |
| { | |
| "epoch": 1.2511192149189125, | |
| "grad_norm": 0.22328363358974457, | |
| "learning_rate": 6.3786723876453835e-06, | |
| "loss": 0.0023, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 1.254084022651131, | |
| "grad_norm": 0.10614093393087387, | |
| "learning_rate": 6.363595242522993e-06, | |
| "loss": 0.0023, | |
| "step": 21150 | |
| }, | |
| { | |
| "epoch": 1.2570488303833496, | |
| "grad_norm": 1.6743851900100708, | |
| "learning_rate": 6.3485046902615495e-06, | |
| "loss": 0.004, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 1.2600136381155682, | |
| "grad_norm": 0.046557892113924026, | |
| "learning_rate": 6.333400879234359e-06, | |
| "loss": 0.0019, | |
| "step": 21250 | |
| }, | |
| { | |
| "epoch": 1.2629784458477868, | |
| "grad_norm": 1.6077167987823486, | |
| "learning_rate": 6.318283957945091e-06, | |
| "loss": 0.0029, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 1.2659432535800053, | |
| "grad_norm": 0.8150466680526733, | |
| "learning_rate": 6.303154075026312e-06, | |
| "loss": 0.0032, | |
| "step": 21350 | |
| }, | |
| { | |
| "epoch": 1.268908061312224, | |
| "grad_norm": 1.1461350917816162, | |
| "learning_rate": 6.288011379238037e-06, | |
| "loss": 0.0044, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 1.2718728690444425, | |
| "grad_norm": 0.021177906543016434, | |
| "learning_rate": 6.272856019466254e-06, | |
| "loss": 0.0046, | |
| "step": 21450 | |
| }, | |
| { | |
| "epoch": 1.2748376767766612, | |
| "grad_norm": 0.09657502174377441, | |
| "learning_rate": 6.257688144721469e-06, | |
| "loss": 0.0024, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.2778024845088796, | |
| "grad_norm": 0.7494125366210938, | |
| "learning_rate": 6.242507904137237e-06, | |
| "loss": 0.0049, | |
| "step": 21550 | |
| }, | |
| { | |
| "epoch": 1.2807672922410982, | |
| "grad_norm": 1.2902694940567017, | |
| "learning_rate": 6.227315446968697e-06, | |
| "loss": 0.005, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 1.2837320999733168, | |
| "grad_norm": 0.059971459209918976, | |
| "learning_rate": 6.212110922591099e-06, | |
| "loss": 0.0045, | |
| "step": 21650 | |
| }, | |
| { | |
| "epoch": 1.2866969077055352, | |
| "grad_norm": 1.4867457151412964, | |
| "learning_rate": 6.196894480498349e-06, | |
| "loss": 0.0038, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 1.2896617154377539, | |
| "grad_norm": 0.17376358807086945, | |
| "learning_rate": 6.181666270301524e-06, | |
| "loss": 0.0048, | |
| "step": 21750 | |
| }, | |
| { | |
| "epoch": 1.2926265231699725, | |
| "grad_norm": 1.8240177631378174, | |
| "learning_rate": 6.166426441727409e-06, | |
| "loss": 0.0031, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 1.295591330902191, | |
| "grad_norm": 0.4864703118801117, | |
| "learning_rate": 6.151175144617023e-06, | |
| "loss": 0.004, | |
| "step": 21850 | |
| }, | |
| { | |
| "epoch": 1.2985561386344096, | |
| "grad_norm": 1.6870369911193848, | |
| "learning_rate": 6.135912528924145e-06, | |
| "loss": 0.0042, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 1.3015209463666282, | |
| "grad_norm": 0.1494980752468109, | |
| "learning_rate": 6.120638744713844e-06, | |
| "loss": 0.0057, | |
| "step": 21950 | |
| }, | |
| { | |
| "epoch": 1.3044857540988466, | |
| "grad_norm": 0.04229666665196419, | |
| "learning_rate": 6.105353942160994e-06, | |
| "loss": 0.0029, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.3074505618310652, | |
| "grad_norm": 0.2506253123283386, | |
| "learning_rate": 6.090058271548812e-06, | |
| "loss": 0.0025, | |
| "step": 22050 | |
| }, | |
| { | |
| "epoch": 1.3104153695632839, | |
| "grad_norm": 0.03190387040376663, | |
| "learning_rate": 6.0747518832673625e-06, | |
| "loss": 0.0039, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 1.3133801772955023, | |
| "grad_norm": 0.17619547247886658, | |
| "learning_rate": 6.059434927812096e-06, | |
| "loss": 0.0029, | |
| "step": 22150 | |
| }, | |
| { | |
| "epoch": 1.316344985027721, | |
| "grad_norm": 0.07416270673274994, | |
| "learning_rate": 6.044107555782359e-06, | |
| "loss": 0.0022, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 1.3193097927599395, | |
| "grad_norm": 0.5905612111091614, | |
| "learning_rate": 6.028769917879912e-06, | |
| "loss": 0.0036, | |
| "step": 22250 | |
| }, | |
| { | |
| "epoch": 1.322274600492158, | |
| "grad_norm": 0.16600991785526276, | |
| "learning_rate": 6.013422164907456e-06, | |
| "loss": 0.0026, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 1.3252394082243766, | |
| "grad_norm": 0.02765166386961937, | |
| "learning_rate": 5.998064447767145e-06, | |
| "loss": 0.0048, | |
| "step": 22350 | |
| }, | |
| { | |
| "epoch": 1.3282042159565952, | |
| "grad_norm": 0.03902462497353554, | |
| "learning_rate": 5.982696917459102e-06, | |
| "loss": 0.0036, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 1.3311690236888138, | |
| "grad_norm": 0.9107586741447449, | |
| "learning_rate": 5.967319725079935e-06, | |
| "loss": 0.0046, | |
| "step": 22450 | |
| }, | |
| { | |
| "epoch": 1.3341338314210325, | |
| "grad_norm": 0.13513422012329102, | |
| "learning_rate": 5.951933021821251e-06, | |
| "loss": 0.0026, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.3341338314210325, | |
| "eval_loss": 0.1039728969335556, | |
| "eval_runtime": 3797.1697, | |
| "eval_samples_per_second": 1.542, | |
| "eval_steps_per_second": 0.386, | |
| "eval_wer": 13.602806812367518, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.337098639153251, | |
| "grad_norm": 0.12307656556367874, | |
| "learning_rate": 5.93653695896817e-06, | |
| "loss": 0.0036, | |
| "step": 22550 | |
| }, | |
| { | |
| "epoch": 1.3400634468854695, | |
| "grad_norm": 1.4716602563858032, | |
| "learning_rate": 5.921131687897837e-06, | |
| "loss": 0.004, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 1.3430282546176882, | |
| "grad_norm": 0.6326406002044678, | |
| "learning_rate": 5.905717360077936e-06, | |
| "loss": 0.0025, | |
| "step": 22650 | |
| }, | |
| { | |
| "epoch": 1.3459930623499066, | |
| "grad_norm": 0.0671234279870987, | |
| "learning_rate": 5.890294127065196e-06, | |
| "loss": 0.0046, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 1.3489578700821252, | |
| "grad_norm": 0.08047901093959808, | |
| "learning_rate": 5.8748621405039056e-06, | |
| "loss": 0.0051, | |
| "step": 22750 | |
| }, | |
| { | |
| "epoch": 1.3519226778143438, | |
| "grad_norm": 0.06256826221942902, | |
| "learning_rate": 5.859730447208026e-06, | |
| "loss": 0.0021, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 1.3548874855465622, | |
| "grad_norm": 0.02325344830751419, | |
| "learning_rate": 5.844281576337013e-06, | |
| "loss": 0.0053, | |
| "step": 22850 | |
| }, | |
| { | |
| "epoch": 1.3578522932787809, | |
| "grad_norm": 1.1424200534820557, | |
| "learning_rate": 5.82882440432198e-06, | |
| "loss": 0.0038, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 1.3608171010109995, | |
| "grad_norm": 0.3276439607143402, | |
| "learning_rate": 5.813359083140911e-06, | |
| "loss": 0.0033, | |
| "step": 22950 | |
| }, | |
| { | |
| "epoch": 1.363781908743218, | |
| "grad_norm": 0.19697032868862152, | |
| "learning_rate": 5.7978857648519115e-06, | |
| "loss": 0.0035, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.3667467164754366, | |
| "grad_norm": 1.1077672243118286, | |
| "learning_rate": 5.782404601591718e-06, | |
| "loss": 0.0038, | |
| "step": 23050 | |
| }, | |
| { | |
| "epoch": 1.3697115242076552, | |
| "grad_norm": 1.1511179208755493, | |
| "learning_rate": 5.7669157455742016e-06, | |
| "loss": 0.0052, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 1.3726763319398736, | |
| "grad_norm": 0.051693856716156006, | |
| "learning_rate": 5.7514193490888685e-06, | |
| "loss": 0.0043, | |
| "step": 23150 | |
| }, | |
| { | |
| "epoch": 1.3756411396720922, | |
| "grad_norm": 0.1749086230993271, | |
| "learning_rate": 5.735915564499364e-06, | |
| "loss": 0.0039, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 1.3786059474043109, | |
| "grad_norm": 0.1431511789560318, | |
| "learning_rate": 5.7204045442419735e-06, | |
| "loss": 0.003, | |
| "step": 23250 | |
| }, | |
| { | |
| "epoch": 1.3815707551365293, | |
| "grad_norm": 0.014357727020978928, | |
| "learning_rate": 5.70488644082413e-06, | |
| "loss": 0.0034, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 1.384535562868748, | |
| "grad_norm": 0.3107154667377472, | |
| "learning_rate": 5.6893614068229044e-06, | |
| "loss": 0.005, | |
| "step": 23350 | |
| }, | |
| { | |
| "epoch": 1.3875003706009665, | |
| "grad_norm": 0.5827867388725281, | |
| "learning_rate": 5.673829594883511e-06, | |
| "loss": 0.0036, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 1.3904651783331852, | |
| "grad_norm": 0.4399601221084595, | |
| "learning_rate": 5.658291157717809e-06, | |
| "loss": 0.0039, | |
| "step": 23450 | |
| }, | |
| { | |
| "epoch": 1.3934299860654036, | |
| "grad_norm": 0.3615168035030365, | |
| "learning_rate": 5.642746248102795e-06, | |
| "loss": 0.0029, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.3963947937976222, | |
| "grad_norm": 0.10081396996974945, | |
| "learning_rate": 5.627195018879107e-06, | |
| "loss": 0.004, | |
| "step": 23550 | |
| }, | |
| { | |
| "epoch": 1.3993596015298408, | |
| "grad_norm": 1.02580988407135, | |
| "learning_rate": 5.611637622949517e-06, | |
| "loss": 0.0027, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 1.4023244092620595, | |
| "grad_norm": 0.8904252648353577, | |
| "learning_rate": 5.59607421327743e-06, | |
| "loss": 0.0031, | |
| "step": 23650 | |
| }, | |
| { | |
| "epoch": 1.405289216994278, | |
| "grad_norm": 0.36355146765708923, | |
| "learning_rate": 5.5805049428853744e-06, | |
| "loss": 0.0021, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 1.4082540247264965, | |
| "grad_norm": 0.9062953591346741, | |
| "learning_rate": 5.564929964853512e-06, | |
| "loss": 0.0045, | |
| "step": 23750 | |
| }, | |
| { | |
| "epoch": 1.4112188324587152, | |
| "grad_norm": 0.27617210149765015, | |
| "learning_rate": 5.549349432318114e-06, | |
| "loss": 0.0028, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 1.4141836401909336, | |
| "grad_norm": 1.4333345890045166, | |
| "learning_rate": 5.533763498470073e-06, | |
| "loss": 0.0059, | |
| "step": 23850 | |
| }, | |
| { | |
| "epoch": 1.4171484479231522, | |
| "grad_norm": 0.024424336850643158, | |
| "learning_rate": 5.518172316553378e-06, | |
| "loss": 0.0037, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 1.4201132556553708, | |
| "grad_norm": 1.163394808769226, | |
| "learning_rate": 5.502576039863629e-06, | |
| "loss": 0.0048, | |
| "step": 23950 | |
| }, | |
| { | |
| "epoch": 1.4230780633875892, | |
| "grad_norm": 0.915595293045044, | |
| "learning_rate": 5.486974821746512e-06, | |
| "loss": 0.0026, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.4260428711198079, | |
| "grad_norm": 0.47064208984375, | |
| "learning_rate": 5.471368815596299e-06, | |
| "loss": 0.0035, | |
| "step": 24050 | |
| }, | |
| { | |
| "epoch": 1.4290076788520265, | |
| "grad_norm": 1.8702881336212158, | |
| "learning_rate": 5.455758174854341e-06, | |
| "loss": 0.0028, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 1.431972486584245, | |
| "grad_norm": 1.371960163116455, | |
| "learning_rate": 5.440143053007554e-06, | |
| "loss": 0.0037, | |
| "step": 24150 | |
| }, | |
| { | |
| "epoch": 1.4349372943164636, | |
| "grad_norm": 2.1173136234283447, | |
| "learning_rate": 5.424523603586917e-06, | |
| "loss": 0.0048, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 1.4379021020486822, | |
| "grad_norm": 0.10133637487888336, | |
| "learning_rate": 5.408899980165957e-06, | |
| "loss": 0.0045, | |
| "step": 24250 | |
| }, | |
| { | |
| "epoch": 1.4408669097809006, | |
| "grad_norm": 0.07063573598861694, | |
| "learning_rate": 5.393272336359238e-06, | |
| "loss": 0.0018, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 1.4438317175131192, | |
| "grad_norm": 1.8741753101348877, | |
| "learning_rate": 5.377640825820856e-06, | |
| "loss": 0.0038, | |
| "step": 24350 | |
| }, | |
| { | |
| "epoch": 1.4467965252453379, | |
| "grad_norm": 1.05840003490448, | |
| "learning_rate": 5.362005602242928e-06, | |
| "loss": 0.0028, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 1.4497613329775563, | |
| "grad_norm": 0.07426287978887558, | |
| "learning_rate": 5.346366819354072e-06, | |
| "loss": 0.0034, | |
| "step": 24450 | |
| }, | |
| { | |
| "epoch": 1.452726140709775, | |
| "grad_norm": 0.6569198369979858, | |
| "learning_rate": 5.330724630917905e-06, | |
| "loss": 0.0034, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 1.4556909484419935, | |
| "grad_norm": 1.0480008125305176, | |
| "learning_rate": 5.31507919073153e-06, | |
| "loss": 0.004, | |
| "step": 24550 | |
| }, | |
| { | |
| "epoch": 1.4586557561742122, | |
| "grad_norm": 3.5745363235473633, | |
| "learning_rate": 5.299430652624019e-06, | |
| "loss": 0.0045, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 1.4616205639064308, | |
| "grad_norm": 0.3077293634414673, | |
| "learning_rate": 5.2837791704549056e-06, | |
| "loss": 0.002, | |
| "step": 24650 | |
| }, | |
| { | |
| "epoch": 1.4645853716386492, | |
| "grad_norm": 0.7704965472221375, | |
| "learning_rate": 5.268124898112668e-06, | |
| "loss": 0.0028, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 1.4675501793708678, | |
| "grad_norm": 1.4390088319778442, | |
| "learning_rate": 5.252467989513217e-06, | |
| "loss": 0.0033, | |
| "step": 24750 | |
| }, | |
| { | |
| "epoch": 1.4705149871030865, | |
| "grad_norm": 0.019557828083634377, | |
| "learning_rate": 5.236808598598388e-06, | |
| "loss": 0.003, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 1.473479794835305, | |
| "grad_norm": 0.04724704474210739, | |
| "learning_rate": 5.2211468793344164e-06, | |
| "loss": 0.0023, | |
| "step": 24850 | |
| }, | |
| { | |
| "epoch": 1.4764446025675235, | |
| "grad_norm": 0.11611367762088776, | |
| "learning_rate": 5.2054829857104395e-06, | |
| "loss": 0.0022, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 1.4794094102997422, | |
| "grad_norm": 0.8246281743049622, | |
| "learning_rate": 5.189817071736965e-06, | |
| "loss": 0.0024, | |
| "step": 24950 | |
| }, | |
| { | |
| "epoch": 1.4823742180319606, | |
| "grad_norm": 0.017677294090390205, | |
| "learning_rate": 5.174149291444369e-06, | |
| "loss": 0.0041, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.4823742180319606, | |
| "eval_loss": 0.10340176522731781, | |
| "eval_runtime": 3475.2421, | |
| "eval_samples_per_second": 1.685, | |
| "eval_steps_per_second": 0.421, | |
| "eval_wer": 7.892697902200132, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.4853390257641792, | |
| "grad_norm": 0.24159014225006104, | |
| "learning_rate": 5.158479798881378e-06, | |
| "loss": 0.0043, | |
| "step": 25050 | |
| }, | |
| { | |
| "epoch": 1.4883038334963978, | |
| "grad_norm": 0.09349126368761063, | |
| "learning_rate": 5.142808748113552e-06, | |
| "loss": 0.0021, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 1.4912686412286162, | |
| "grad_norm": 9.830806732177734, | |
| "learning_rate": 5.127136293221768e-06, | |
| "loss": 0.003, | |
| "step": 25150 | |
| }, | |
| { | |
| "epoch": 1.4942334489608349, | |
| "grad_norm": 0.02017897740006447, | |
| "learning_rate": 5.11146258830072e-06, | |
| "loss": 0.0018, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 1.4971982566930535, | |
| "grad_norm": 0.9748997092247009, | |
| "learning_rate": 5.095787787457381e-06, | |
| "loss": 0.0028, | |
| "step": 25250 | |
| }, | |
| { | |
| "epoch": 1.500163064425272, | |
| "grad_norm": 0.08710702508687973, | |
| "learning_rate": 5.080112044809507e-06, | |
| "loss": 0.0045, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 1.5031278721574906, | |
| "grad_norm": 0.12617884576320648, | |
| "learning_rate": 5.064435514484107e-06, | |
| "loss": 0.0041, | |
| "step": 25350 | |
| }, | |
| { | |
| "epoch": 1.5060926798897092, | |
| "grad_norm": 1.1999905109405518, | |
| "learning_rate": 5.04875835061594e-06, | |
| "loss": 0.0028, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 1.5090574876219276, | |
| "grad_norm": 0.6879501342773438, | |
| "learning_rate": 5.033080707345994e-06, | |
| "loss": 0.0027, | |
| "step": 25450 | |
| }, | |
| { | |
| "epoch": 1.5120222953541462, | |
| "grad_norm": 0.3369424641132355, | |
| "learning_rate": 5.017402738819968e-06, | |
| "loss": 0.0036, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 1.5149871030863649, | |
| "grad_norm": 0.05699804425239563, | |
| "learning_rate": 5.001724599186759e-06, | |
| "loss": 0.003, | |
| "step": 25550 | |
| }, | |
| { | |
| "epoch": 1.5179519108185833, | |
| "grad_norm": 0.15526382625102997, | |
| "learning_rate": 4.986046442596949e-06, | |
| "loss": 0.0044, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 1.5209167185508021, | |
| "grad_norm": 0.13832417130470276, | |
| "learning_rate": 4.970368423201286e-06, | |
| "loss": 0.0027, | |
| "step": 25650 | |
| }, | |
| { | |
| "epoch": 1.5238815262830205, | |
| "grad_norm": 0.35199618339538574, | |
| "learning_rate": 4.954690695149166e-06, | |
| "loss": 0.0028, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 1.526846334015239, | |
| "grad_norm": 0.03731779381632805, | |
| "learning_rate": 4.939013412587124e-06, | |
| "loss": 0.0022, | |
| "step": 25750 | |
| }, | |
| { | |
| "epoch": 1.5298111417474578, | |
| "grad_norm": 1.604335904121399, | |
| "learning_rate": 4.923336729657312e-06, | |
| "loss": 0.0014, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 1.5327759494796762, | |
| "grad_norm": 0.13903003931045532, | |
| "learning_rate": 4.90766080049599e-06, | |
| "loss": 0.0037, | |
| "step": 25850 | |
| }, | |
| { | |
| "epoch": 1.5357407572118948, | |
| "grad_norm": 0.04218101501464844, | |
| "learning_rate": 4.891985779232004e-06, | |
| "loss": 0.0028, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 1.5387055649441135, | |
| "grad_norm": 2.855713129043579, | |
| "learning_rate": 4.876311819985274e-06, | |
| "loss": 0.0038, | |
| "step": 25950 | |
| }, | |
| { | |
| "epoch": 1.541670372676332, | |
| "grad_norm": 0.18099339306354523, | |
| "learning_rate": 4.860639076865275e-06, | |
| "loss": 0.0023, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.5446351804085505, | |
| "grad_norm": 0.24422989785671234, | |
| "learning_rate": 4.844967703969532e-06, | |
| "loss": 0.0019, | |
| "step": 26050 | |
| }, | |
| { | |
| "epoch": 1.5475999881407692, | |
| "grad_norm": 0.45736047625541687, | |
| "learning_rate": 4.829297855382093e-06, | |
| "loss": 0.0034, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 1.5505647958729876, | |
| "grad_norm": 0.041498664766550064, | |
| "learning_rate": 4.81362968517202e-06, | |
| "loss": 0.0031, | |
| "step": 26150 | |
| }, | |
| { | |
| "epoch": 1.5535296036052062, | |
| "grad_norm": 0.7280819416046143, | |
| "learning_rate": 4.797963347391869e-06, | |
| "loss": 0.0027, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 1.5564944113374248, | |
| "grad_norm": 0.19048987329006195, | |
| "learning_rate": 4.782298996076183e-06, | |
| "loss": 0.0018, | |
| "step": 26250 | |
| }, | |
| { | |
| "epoch": 1.5594592190696432, | |
| "grad_norm": 0.026411147788167, | |
| "learning_rate": 4.766636785239976e-06, | |
| "loss": 0.0042, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 1.5624240268018619, | |
| "grad_norm": 0.03540141507983208, | |
| "learning_rate": 4.75097686887721e-06, | |
| "loss": 0.0021, | |
| "step": 26350 | |
| }, | |
| { | |
| "epoch": 1.5653888345340805, | |
| "grad_norm": 2.208853244781494, | |
| "learning_rate": 4.735319400959291e-06, | |
| "loss": 0.0019, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 1.568353642266299, | |
| "grad_norm": 0.07108508050441742, | |
| "learning_rate": 4.719664535433553e-06, | |
| "loss": 0.0046, | |
| "step": 26450 | |
| }, | |
| { | |
| "epoch": 1.5713184499985176, | |
| "grad_norm": 0.6509733200073242, | |
| "learning_rate": 4.70401242622174e-06, | |
| "loss": 0.0034, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 1.5742832577307362, | |
| "grad_norm": 1.6000564098358154, | |
| "learning_rate": 4.688363227218495e-06, | |
| "loss": 0.0046, | |
| "step": 26550 | |
| }, | |
| { | |
| "epoch": 1.5772480654629546, | |
| "grad_norm": 0.046783801168203354, | |
| "learning_rate": 4.6727170922898495e-06, | |
| "loss": 0.0026, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 1.5802128731951735, | |
| "grad_norm": 0.15229929983615875, | |
| "learning_rate": 4.657387001081686e-06, | |
| "loss": 0.0041, | |
| "step": 26650 | |
| }, | |
| { | |
| "epoch": 1.5831776809273919, | |
| "grad_norm": 0.048166148364543915, | |
| "learning_rate": 4.641747386836947e-06, | |
| "loss": 0.0029, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 1.5861424886596103, | |
| "grad_norm": 0.14575374126434326, | |
| "learning_rate": 4.626111295003e-06, | |
| "loss": 0.0018, | |
| "step": 26750 | |
| }, | |
| { | |
| "epoch": 1.5891072963918291, | |
| "grad_norm": 0.3450085520744324, | |
| "learning_rate": 4.610478879317006e-06, | |
| "loss": 0.002, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 1.5920721041240475, | |
| "grad_norm": 0.020749246701598167, | |
| "learning_rate": 4.594850293479977e-06, | |
| "loss": 0.0036, | |
| "step": 26850 | |
| }, | |
| { | |
| "epoch": 1.5950369118562662, | |
| "grad_norm": 0.039859525859355927, | |
| "learning_rate": 4.5792256911552716e-06, | |
| "loss": 0.0034, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 1.5980017195884848, | |
| "grad_norm": 0.11719939112663269, | |
| "learning_rate": 4.563605225967078e-06, | |
| "loss": 0.0035, | |
| "step": 26950 | |
| }, | |
| { | |
| "epoch": 1.6009665273207032, | |
| "grad_norm": 2.0398378372192383, | |
| "learning_rate": 4.547989051498915e-06, | |
| "loss": 0.003, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.6039313350529218, | |
| "grad_norm": 0.5318859815597534, | |
| "learning_rate": 4.532377321292105e-06, | |
| "loss": 0.0025, | |
| "step": 27050 | |
| }, | |
| { | |
| "epoch": 1.6068961427851405, | |
| "grad_norm": 0.13859181106090546, | |
| "learning_rate": 4.51677018884428e-06, | |
| "loss": 0.0037, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 1.609860950517359, | |
| "grad_norm": 0.6273730993270874, | |
| "learning_rate": 4.50116780760786e-06, | |
| "loss": 0.0036, | |
| "step": 27150 | |
| }, | |
| { | |
| "epoch": 1.6128257582495775, | |
| "grad_norm": 0.09279701858758926, | |
| "learning_rate": 4.485570330988558e-06, | |
| "loss": 0.003, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 1.6157905659817962, | |
| "grad_norm": 1.7686452865600586, | |
| "learning_rate": 4.4699779123438605e-06, | |
| "loss": 0.0027, | |
| "step": 27250 | |
| }, | |
| { | |
| "epoch": 1.6187553737140146, | |
| "grad_norm": 0.1289617270231247, | |
| "learning_rate": 4.45470239706685e-06, | |
| "loss": 0.0016, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 1.6217201814462332, | |
| "grad_norm": 0.01868435926735401, | |
| "learning_rate": 4.4391204454510306e-06, | |
| "loss": 0.0033, | |
| "step": 27350 | |
| }, | |
| { | |
| "epoch": 1.6246849891784518, | |
| "grad_norm": 0.004626968875527382, | |
| "learning_rate": 4.423544008514307e-06, | |
| "loss": 0.0029, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 1.6276497969106702, | |
| "grad_norm": 0.17620013654232025, | |
| "learning_rate": 4.407973239407297e-06, | |
| "loss": 0.0025, | |
| "step": 27450 | |
| }, | |
| { | |
| "epoch": 1.6306146046428889, | |
| "grad_norm": 0.5455169081687927, | |
| "learning_rate": 4.392408291224895e-06, | |
| "loss": 0.0024, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 1.6306146046428889, | |
| "eval_loss": 0.09911302477121353, | |
| "eval_runtime": 3697.4743, | |
| "eval_samples_per_second": 1.584, | |
| "eval_steps_per_second": 0.396, | |
| "eval_wer": 10.813537022147504, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 1.6335794123751075, | |
| "grad_norm": 0.31777656078338623, | |
| "learning_rate": 4.376849317004758e-06, | |
| "loss": 0.0021, | |
| "step": 27550 | |
| }, | |
| { | |
| "epoch": 1.636544220107326, | |
| "grad_norm": 0.11544878035783768, | |
| "learning_rate": 4.361296469725813e-06, | |
| "loss": 0.0034, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 1.6395090278395446, | |
| "grad_norm": 0.1291680634021759, | |
| "learning_rate": 4.345749902306739e-06, | |
| "loss": 0.0026, | |
| "step": 27650 | |
| }, | |
| { | |
| "epoch": 1.6424738355717632, | |
| "grad_norm": 0.03346523270010948, | |
| "learning_rate": 4.33020976760447e-06, | |
| "loss": 0.0025, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 1.6454386433039816, | |
| "grad_norm": 0.014625953510403633, | |
| "learning_rate": 4.3146762184126985e-06, | |
| "loss": 0.0026, | |
| "step": 27750 | |
| }, | |
| { | |
| "epoch": 1.6484034510362005, | |
| "grad_norm": 1.6268267631530762, | |
| "learning_rate": 4.29914940746036e-06, | |
| "loss": 0.0045, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 1.6513682587684189, | |
| "grad_norm": 0.19663332402706146, | |
| "learning_rate": 4.283629487410143e-06, | |
| "loss": 0.0044, | |
| "step": 27850 | |
| }, | |
| { | |
| "epoch": 1.6543330665006373, | |
| "grad_norm": 0.735085666179657, | |
| "learning_rate": 4.26811661085698e-06, | |
| "loss": 0.0025, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 1.6572978742328561, | |
| "grad_norm": 1.0356351137161255, | |
| "learning_rate": 4.252610930326554e-06, | |
| "loss": 0.0029, | |
| "step": 27950 | |
| }, | |
| { | |
| "epoch": 1.6602626819650745, | |
| "grad_norm": 1.1497199535369873, | |
| "learning_rate": 4.237112598273793e-06, | |
| "loss": 0.0032, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.6632274896972932, | |
| "grad_norm": 0.06777022778987885, | |
| "learning_rate": 4.221621767081372e-06, | |
| "loss": 0.0028, | |
| "step": 28050 | |
| }, | |
| { | |
| "epoch": 1.6661922974295118, | |
| "grad_norm": 0.1217995136976242, | |
| "learning_rate": 4.206138589058217e-06, | |
| "loss": 0.0014, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 1.6691571051617302, | |
| "grad_norm": 2.1839497089385986, | |
| "learning_rate": 4.190663216438011e-06, | |
| "loss": 0.0036, | |
| "step": 28150 | |
| }, | |
| { | |
| "epoch": 1.6721219128939488, | |
| "grad_norm": 0.40566369891166687, | |
| "learning_rate": 4.1751958013776875e-06, | |
| "loss": 0.0049, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 1.6750867206261675, | |
| "grad_norm": 0.15384216606616974, | |
| "learning_rate": 4.159736495955937e-06, | |
| "loss": 0.0026, | |
| "step": 28250 | |
| }, | |
| { | |
| "epoch": 1.678051528358386, | |
| "grad_norm": 0.9062075018882751, | |
| "learning_rate": 4.1442854521717245e-06, | |
| "loss": 0.0027, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 1.6810163360906045, | |
| "grad_norm": 0.2344817817211151, | |
| "learning_rate": 4.128842821942776e-06, | |
| "loss": 0.0021, | |
| "step": 28350 | |
| }, | |
| { | |
| "epoch": 1.6839811438228232, | |
| "grad_norm": 0.011048096232116222, | |
| "learning_rate": 4.113408757104098e-06, | |
| "loss": 0.0034, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 1.6869459515550416, | |
| "grad_norm": 0.42319443821907043, | |
| "learning_rate": 4.097983409406478e-06, | |
| "loss": 0.0033, | |
| "step": 28450 | |
| }, | |
| { | |
| "epoch": 1.6899107592872602, | |
| "grad_norm": 0.3838003873825073, | |
| "learning_rate": 4.082566930514997e-06, | |
| "loss": 0.0014, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 1.6928755670194788, | |
| "grad_norm": 0.0286489836871624, | |
| "learning_rate": 4.067159472007533e-06, | |
| "loss": 0.0017, | |
| "step": 28550 | |
| }, | |
| { | |
| "epoch": 1.6958403747516972, | |
| "grad_norm": 0.014939649030566216, | |
| "learning_rate": 4.051761185373279e-06, | |
| "loss": 0.0039, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 1.6988051824839159, | |
| "grad_norm": 0.0331413671374321, | |
| "learning_rate": 4.036372222011243e-06, | |
| "loss": 0.0029, | |
| "step": 28650 | |
| }, | |
| { | |
| "epoch": 1.7017699902161345, | |
| "grad_norm": 0.029739579185843468, | |
| "learning_rate": 4.020992733228767e-06, | |
| "loss": 0.0021, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 1.704734797948353, | |
| "grad_norm": 0.03251701593399048, | |
| "learning_rate": 4.005622870240038e-06, | |
| "loss": 0.0026, | |
| "step": 28750 | |
| }, | |
| { | |
| "epoch": 1.7076996056805718, | |
| "grad_norm": 0.49799245595932007, | |
| "learning_rate": 3.990262784164598e-06, | |
| "loss": 0.0026, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 1.7106644134127902, | |
| "grad_norm": 0.08971042931079865, | |
| "learning_rate": 3.974912626025864e-06, | |
| "loss": 0.0014, | |
| "step": 28850 | |
| }, | |
| { | |
| "epoch": 1.7136292211450086, | |
| "grad_norm": 0.011349070817232132, | |
| "learning_rate": 3.959572546749634e-06, | |
| "loss": 0.0014, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 1.7165940288772275, | |
| "grad_norm": 0.015758298337459564, | |
| "learning_rate": 3.9442426971626156e-06, | |
| "loss": 0.002, | |
| "step": 28950 | |
| }, | |
| { | |
| "epoch": 1.7195588366094459, | |
| "grad_norm": 1.6579985618591309, | |
| "learning_rate": 3.92892322799093e-06, | |
| "loss": 0.0037, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.7225236443416645, | |
| "grad_norm": 0.313670814037323, | |
| "learning_rate": 3.913614289858639e-06, | |
| "loss": 0.003, | |
| "step": 29050 | |
| }, | |
| { | |
| "epoch": 1.7254884520738831, | |
| "grad_norm": 0.3589758276939392, | |
| "learning_rate": 3.898316033286261e-06, | |
| "loss": 0.0019, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 1.7284532598061015, | |
| "grad_norm": 0.1357835978269577, | |
| "learning_rate": 3.883028608689291e-06, | |
| "loss": 0.0026, | |
| "step": 29150 | |
| }, | |
| { | |
| "epoch": 1.7314180675383202, | |
| "grad_norm": 0.12351395934820175, | |
| "learning_rate": 3.86775216637672e-06, | |
| "loss": 0.0034, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 1.7343828752705388, | |
| "grad_norm": 0.27879443764686584, | |
| "learning_rate": 3.852486856549564e-06, | |
| "loss": 0.0031, | |
| "step": 29250 | |
| }, | |
| { | |
| "epoch": 1.7373476830027572, | |
| "grad_norm": 0.11169170588254929, | |
| "learning_rate": 3.837232829299375e-06, | |
| "loss": 0.0026, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 1.7403124907349758, | |
| "grad_norm": 0.17126189172267914, | |
| "learning_rate": 3.821990234606778e-06, | |
| "loss": 0.0026, | |
| "step": 29350 | |
| }, | |
| { | |
| "epoch": 1.7432772984671945, | |
| "grad_norm": 0.012666056863963604, | |
| "learning_rate": 3.8067592223399908e-06, | |
| "loss": 0.003, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 1.746242106199413, | |
| "grad_norm": 0.03582064434885979, | |
| "learning_rate": 3.7915399422533466e-06, | |
| "loss": 0.0021, | |
| "step": 29450 | |
| }, | |
| { | |
| "epoch": 1.7492069139316315, | |
| "grad_norm": 0.26408132910728455, | |
| "learning_rate": 3.7763325439858288e-06, | |
| "loss": 0.0032, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 1.7521717216638502, | |
| "grad_norm": 0.6645041108131409, | |
| "learning_rate": 3.761137177059594e-06, | |
| "loss": 0.004, | |
| "step": 29550 | |
| }, | |
| { | |
| "epoch": 1.7551365293960686, | |
| "grad_norm": 0.7976374626159668, | |
| "learning_rate": 3.7459539908785057e-06, | |
| "loss": 0.0026, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 1.7581013371282872, | |
| "grad_norm": 1.91818106174469, | |
| "learning_rate": 3.7307831347266653e-06, | |
| "loss": 0.0037, | |
| "step": 29650 | |
| }, | |
| { | |
| "epoch": 1.7610661448605058, | |
| "grad_norm": 0.10215174406766891, | |
| "learning_rate": 3.7156247577669413e-06, | |
| "loss": 0.0058, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 1.7640309525927242, | |
| "grad_norm": 0.7943007349967957, | |
| "learning_rate": 3.7004790090395043e-06, | |
| "loss": 0.0041, | |
| "step": 29750 | |
| }, | |
| { | |
| "epoch": 1.7669957603249429, | |
| "grad_norm": 0.08547532558441162, | |
| "learning_rate": 3.6853460374603613e-06, | |
| "loss": 0.0031, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 1.7699605680571615, | |
| "grad_norm": 0.7651055455207825, | |
| "learning_rate": 3.67022599181989e-06, | |
| "loss": 0.0019, | |
| "step": 29850 | |
| }, | |
| { | |
| "epoch": 1.77292537578938, | |
| "grad_norm": 0.016290869563817978, | |
| "learning_rate": 3.6551190207813836e-06, | |
| "loss": 0.002, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 1.7758901835215988, | |
| "grad_norm": 0.018452471122145653, | |
| "learning_rate": 3.640025272879578e-06, | |
| "loss": 0.0038, | |
| "step": 29950 | |
| }, | |
| { | |
| "epoch": 1.7788549912538172, | |
| "grad_norm": 0.3965455889701843, | |
| "learning_rate": 3.624944896519198e-06, | |
| "loss": 0.0039, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.7788549912538172, | |
| "eval_loss": 0.09763780981302261, | |
| "eval_runtime": 1933.8956, | |
| "eval_samples_per_second": 3.028, | |
| "eval_steps_per_second": 0.757, | |
| "eval_wer": 17.889043198596593, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.7818197989860356, | |
| "grad_norm": 0.29020628333091736, | |
| "learning_rate": 3.609878039973498e-06, | |
| "loss": 0.0019, | |
| "step": 30050 | |
| }, | |
| { | |
| "epoch": 1.7847846067182545, | |
| "grad_norm": 0.07447979599237442, | |
| "learning_rate": 3.5948248513828e-06, | |
| "loss": 0.0032, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 1.7877494144504729, | |
| "grad_norm": 0.13018357753753662, | |
| "learning_rate": 3.5797854787530433e-06, | |
| "loss": 0.0022, | |
| "step": 30150 | |
| }, | |
| { | |
| "epoch": 1.7907142221826915, | |
| "grad_norm": 0.5166714191436768, | |
| "learning_rate": 3.564760069954323e-06, | |
| "loss": 0.0028, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 1.7936790299149101, | |
| "grad_norm": 0.013915842399001122, | |
| "learning_rate": 3.5497487727194405e-06, | |
| "loss": 0.0024, | |
| "step": 30250 | |
| }, | |
| { | |
| "epoch": 1.7966438376471285, | |
| "grad_norm": 0.0228273943066597, | |
| "learning_rate": 3.534751734642451e-06, | |
| "loss": 0.0025, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 1.7996086453793472, | |
| "grad_norm": 0.06491345912218094, | |
| "learning_rate": 3.5197691031772095e-06, | |
| "loss": 0.0032, | |
| "step": 30350 | |
| }, | |
| { | |
| "epoch": 1.8025734531115658, | |
| "grad_norm": 0.07135559618473053, | |
| "learning_rate": 3.504801025635921e-06, | |
| "loss": 0.0014, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 1.8055382608437842, | |
| "grad_norm": 1.0214452743530273, | |
| "learning_rate": 3.489847649187693e-06, | |
| "loss": 0.0012, | |
| "step": 30450 | |
| }, | |
| { | |
| "epoch": 1.8085030685760028, | |
| "grad_norm": 0.09079097956418991, | |
| "learning_rate": 3.474909120857094e-06, | |
| "loss": 0.0035, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 1.8114678763082215, | |
| "grad_norm": 1.4023202657699585, | |
| "learning_rate": 3.4599855875226967e-06, | |
| "loss": 0.0039, | |
| "step": 30550 | |
| }, | |
| { | |
| "epoch": 1.81443268404044, | |
| "grad_norm": 2.584686279296875, | |
| "learning_rate": 3.4450771959156437e-06, | |
| "loss": 0.0033, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 1.8173974917726585, | |
| "grad_norm": 0.04744521901011467, | |
| "learning_rate": 3.430184092618199e-06, | |
| "loss": 0.0038, | |
| "step": 30650 | |
| }, | |
| { | |
| "epoch": 1.8203622995048772, | |
| "grad_norm": 0.03454584628343582, | |
| "learning_rate": 3.4153064240623113e-06, | |
| "loss": 0.0015, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 1.8233271072370956, | |
| "grad_norm": 0.012815977446734905, | |
| "learning_rate": 3.4004443365281703e-06, | |
| "loss": 0.0009, | |
| "step": 30750 | |
| }, | |
| { | |
| "epoch": 1.8262919149693142, | |
| "grad_norm": 1.009682297706604, | |
| "learning_rate": 3.3855979761427705e-06, | |
| "loss": 0.0023, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 1.8292567227015328, | |
| "grad_norm": 0.026725037023425102, | |
| "learning_rate": 3.370767488878471e-06, | |
| "loss": 0.0025, | |
| "step": 30850 | |
| }, | |
| { | |
| "epoch": 1.8322215304337512, | |
| "grad_norm": 0.06415297836065292, | |
| "learning_rate": 3.3559530205515705e-06, | |
| "loss": 0.0016, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 1.83518633816597, | |
| "grad_norm": 0.007746212650090456, | |
| "learning_rate": 3.341154716820857e-06, | |
| "loss": 0.0018, | |
| "step": 30950 | |
| }, | |
| { | |
| "epoch": 1.8381511458981885, | |
| "grad_norm": 0.04692048206925392, | |
| "learning_rate": 3.3263727231861942e-06, | |
| "loss": 0.0016, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.841115953630407, | |
| "grad_norm": 1.4830511808395386, | |
| "learning_rate": 3.3116071849870746e-06, | |
| "loss": 0.0019, | |
| "step": 31050 | |
| }, | |
| { | |
| "epoch": 1.8440807613626258, | |
| "grad_norm": 0.0519493967294693, | |
| "learning_rate": 3.2968582474012e-06, | |
| "loss": 0.0009, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 1.8470455690948442, | |
| "grad_norm": 0.21671843528747559, | |
| "learning_rate": 3.2821260554430538e-06, | |
| "loss": 0.0009, | |
| "step": 31150 | |
| }, | |
| { | |
| "epoch": 1.8500103768270628, | |
| "grad_norm": 0.04223645478487015, | |
| "learning_rate": 3.26741075396247e-06, | |
| "loss": 0.0027, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 1.8529751845592815, | |
| "grad_norm": 0.03890330716967583, | |
| "learning_rate": 3.252712487643214e-06, | |
| "loss": 0.0024, | |
| "step": 31250 | |
| }, | |
| { | |
| "epoch": 1.8559399922914999, | |
| "grad_norm": 0.01904849335551262, | |
| "learning_rate": 3.2380314010015543e-06, | |
| "loss": 0.002, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 1.8589048000237185, | |
| "grad_norm": 0.03356530889868736, | |
| "learning_rate": 3.2233676383848533e-06, | |
| "loss": 0.0021, | |
| "step": 31350 | |
| }, | |
| { | |
| "epoch": 1.8618696077559371, | |
| "grad_norm": 0.07836949825286865, | |
| "learning_rate": 3.2087213439701326e-06, | |
| "loss": 0.003, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 1.8648344154881555, | |
| "grad_norm": 1.2145752906799316, | |
| "learning_rate": 3.1940926617626655e-06, | |
| "loss": 0.0023, | |
| "step": 31450 | |
| }, | |
| { | |
| "epoch": 1.8677992232203742, | |
| "grad_norm": 0.08941322565078735, | |
| "learning_rate": 3.179481735594558e-06, | |
| "loss": 0.0027, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 1.8707640309525928, | |
| "grad_norm": 0.030275221914052963, | |
| "learning_rate": 3.164888709123338e-06, | |
| "loss": 0.0043, | |
| "step": 31550 | |
| }, | |
| { | |
| "epoch": 1.8737288386848112, | |
| "grad_norm": 1.2117986679077148, | |
| "learning_rate": 3.150313725830536e-06, | |
| "loss": 0.0032, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 1.8766936464170298, | |
| "grad_norm": 0.02491467259824276, | |
| "learning_rate": 3.1357569290202827e-06, | |
| "loss": 0.0024, | |
| "step": 31650 | |
| }, | |
| { | |
| "epoch": 1.8796584541492485, | |
| "grad_norm": 0.1975928694009781, | |
| "learning_rate": 3.121218461817893e-06, | |
| "loss": 0.0016, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 1.882623261881467, | |
| "grad_norm": 0.7876911163330078, | |
| "learning_rate": 3.1066984671684595e-06, | |
| "loss": 0.003, | |
| "step": 31750 | |
| }, | |
| { | |
| "epoch": 1.8855880696136855, | |
| "grad_norm": 0.011536120437085629, | |
| "learning_rate": 3.0921970878354535e-06, | |
| "loss": 0.0025, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 1.8885528773459042, | |
| "grad_norm": 0.33370065689086914, | |
| "learning_rate": 3.077714466399314e-06, | |
| "loss": 0.0017, | |
| "step": 31850 | |
| }, | |
| { | |
| "epoch": 1.8915176850781226, | |
| "grad_norm": 0.05288661643862724, | |
| "learning_rate": 3.06325074525605e-06, | |
| "loss": 0.0045, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 1.8944824928103412, | |
| "grad_norm": 0.03731587901711464, | |
| "learning_rate": 3.048806066615836e-06, | |
| "loss": 0.0016, | |
| "step": 31950 | |
| }, | |
| { | |
| "epoch": 1.8974473005425598, | |
| "grad_norm": 0.1143055409193039, | |
| "learning_rate": 3.0343805725016218e-06, | |
| "loss": 0.003, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.9004121082747782, | |
| "grad_norm": 0.02334473840892315, | |
| "learning_rate": 3.0199744047477274e-06, | |
| "loss": 0.0015, | |
| "step": 32050 | |
| }, | |
| { | |
| "epoch": 1.903376916006997, | |
| "grad_norm": 0.8750964403152466, | |
| "learning_rate": 3.005587704998453e-06, | |
| "loss": 0.0033, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 1.9063417237392155, | |
| "grad_norm": 0.16102011501789093, | |
| "learning_rate": 2.991220614706686e-06, | |
| "loss": 0.0015, | |
| "step": 32150 | |
| }, | |
| { | |
| "epoch": 1.909306531471434, | |
| "grad_norm": 1.6478341817855835, | |
| "learning_rate": 2.9768732751325102e-06, | |
| "loss": 0.0014, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 1.9122713392036528, | |
| "grad_norm": 0.01625387743115425, | |
| "learning_rate": 2.962545827341815e-06, | |
| "loss": 0.0013, | |
| "step": 32250 | |
| }, | |
| { | |
| "epoch": 1.9152361469358712, | |
| "grad_norm": 0.8058168292045593, | |
| "learning_rate": 2.948238412204909e-06, | |
| "loss": 0.0024, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 1.9182009546680898, | |
| "grad_norm": 0.2690886855125427, | |
| "learning_rate": 2.9339511703951408e-06, | |
| "loss": 0.0028, | |
| "step": 32350 | |
| }, | |
| { | |
| "epoch": 1.9211657624003085, | |
| "grad_norm": 0.06821112334728241, | |
| "learning_rate": 2.919684242387505e-06, | |
| "loss": 0.0021, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 1.9241305701325269, | |
| "grad_norm": 0.012448090128600597, | |
| "learning_rate": 2.905437768457272e-06, | |
| "loss": 0.0012, | |
| "step": 32450 | |
| }, | |
| { | |
| "epoch": 1.9270953778647455, | |
| "grad_norm": 0.33233898878097534, | |
| "learning_rate": 2.8912118886785974e-06, | |
| "loss": 0.0029, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 1.9270953778647455, | |
| "eval_loss": 0.09504964202642441, | |
| "eval_runtime": 1925.3076, | |
| "eval_samples_per_second": 3.041, | |
| "eval_steps_per_second": 0.76, | |
| "eval_wer": 14.754769388202615, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 1.9300601855969641, | |
| "grad_norm": 1.714479923248291, | |
| "learning_rate": 2.877006742923155e-06, | |
| "loss": 0.0028, | |
| "step": 32550 | |
| }, | |
| { | |
| "epoch": 1.9330249933291825, | |
| "grad_norm": 0.18112489581108093, | |
| "learning_rate": 2.8628224708587617e-06, | |
| "loss": 0.0011, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 1.9359898010614012, | |
| "grad_norm": 0.025406301021575928, | |
| "learning_rate": 2.848659211947989e-06, | |
| "loss": 0.0023, | |
| "step": 32650 | |
| }, | |
| { | |
| "epoch": 1.9389546087936198, | |
| "grad_norm": 1.0433779954910278, | |
| "learning_rate": 2.8345171054468122e-06, | |
| "loss": 0.0012, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 1.9419194165258382, | |
| "grad_norm": 0.6328701972961426, | |
| "learning_rate": 2.820396290403232e-06, | |
| "loss": 0.0042, | |
| "step": 32750 | |
| }, | |
| { | |
| "epoch": 1.9448842242580568, | |
| "grad_norm": 2.384730339050293, | |
| "learning_rate": 2.8062969056558998e-06, | |
| "loss": 0.0029, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 1.9478490319902755, | |
| "grad_norm": 1.5068167448043823, | |
| "learning_rate": 2.792219089832767e-06, | |
| "loss": 0.003, | |
| "step": 32850 | |
| }, | |
| { | |
| "epoch": 1.950813839722494, | |
| "grad_norm": 0.24156026542186737, | |
| "learning_rate": 2.778162981349714e-06, | |
| "loss": 0.0015, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 1.9537786474547125, | |
| "grad_norm": 0.06006159633398056, | |
| "learning_rate": 2.7641287184091835e-06, | |
| "loss": 0.0033, | |
| "step": 32950 | |
| }, | |
| { | |
| "epoch": 1.9567434551869312, | |
| "grad_norm": 0.1492675542831421, | |
| "learning_rate": 2.7501164389988423e-06, | |
| "loss": 0.0022, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 1.9597082629191496, | |
| "grad_norm": 0.019522709771990776, | |
| "learning_rate": 2.736126280890196e-06, | |
| "loss": 0.0026, | |
| "step": 33050 | |
| }, | |
| { | |
| "epoch": 1.9626730706513684, | |
| "grad_norm": 0.2943977117538452, | |
| "learning_rate": 2.7221583816372602e-06, | |
| "loss": 0.0033, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 1.9656378783835868, | |
| "grad_norm": 0.8795707821846008, | |
| "learning_rate": 2.7082128785751884e-06, | |
| "loss": 0.0025, | |
| "step": 33150 | |
| }, | |
| { | |
| "epoch": 1.9686026861158052, | |
| "grad_norm": 0.2423001527786255, | |
| "learning_rate": 2.694289908818937e-06, | |
| "loss": 0.0014, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 1.971567493848024, | |
| "grad_norm": 1.4210988283157349, | |
| "learning_rate": 2.6803896092619073e-06, | |
| "loss": 0.0021, | |
| "step": 33250 | |
| }, | |
| { | |
| "epoch": 1.9745323015802425, | |
| "grad_norm": 0.1827964335680008, | |
| "learning_rate": 2.6665121165746027e-06, | |
| "loss": 0.0015, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 1.977497109312461, | |
| "grad_norm": 0.8498244285583496, | |
| "learning_rate": 2.652934432464768e-06, | |
| "loss": 0.0024, | |
| "step": 33350 | |
| }, | |
| { | |
| "epoch": 1.9804619170446798, | |
| "grad_norm": 0.006696091033518314, | |
| "learning_rate": 2.6391024997058873e-06, | |
| "loss": 0.0012, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 1.9834267247768982, | |
| "grad_norm": 0.7741572856903076, | |
| "learning_rate": 2.6252937797597878e-06, | |
| "loss": 0.0019, | |
| "step": 33450 | |
| }, | |
| { | |
| "epoch": 1.9863915325091168, | |
| "grad_norm": 0.9306731820106506, | |
| "learning_rate": 2.6115084083965435e-06, | |
| "loss": 0.0013, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 1.9893563402413355, | |
| "grad_norm": 0.019881825894117355, | |
| "learning_rate": 2.5977465211566645e-06, | |
| "loss": 0.0031, | |
| "step": 33550 | |
| }, | |
| { | |
| "epoch": 1.9923211479735539, | |
| "grad_norm": 0.3807990849018097, | |
| "learning_rate": 2.5840082533497534e-06, | |
| "loss": 0.0018, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 1.9952859557057725, | |
| "grad_norm": 0.04648787900805473, | |
| "learning_rate": 2.570293740053188e-06, | |
| "loss": 0.0013, | |
| "step": 33650 | |
| }, | |
| { | |
| "epoch": 1.9982507634379911, | |
| "grad_norm": 0.15467403829097748, | |
| "learning_rate": 2.5566031161107856e-06, | |
| "loss": 0.0018, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 2.0011859230928875, | |
| "grad_norm": 0.016635119915008545, | |
| "learning_rate": 2.5429365161314758e-06, | |
| "loss": 0.0013, | |
| "step": 33750 | |
| }, | |
| { | |
| "epoch": 2.004150730825106, | |
| "grad_norm": 0.015187140554189682, | |
| "learning_rate": 2.529294074487984e-06, | |
| "loss": 0.0007, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 2.0071155385573247, | |
| "grad_norm": 0.10330521315336227, | |
| "learning_rate": 2.5156759253155017e-06, | |
| "loss": 0.001, | |
| "step": 33850 | |
| }, | |
| { | |
| "epoch": 2.010080346289543, | |
| "grad_norm": 0.744099497795105, | |
| "learning_rate": 2.5020822025103752e-06, | |
| "loss": 0.0005, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 2.0130451540217615, | |
| "grad_norm": 0.02314056269824505, | |
| "learning_rate": 2.4885130397287875e-06, | |
| "loss": 0.0009, | |
| "step": 33950 | |
| }, | |
| { | |
| "epoch": 2.0160099617539804, | |
| "grad_norm": 0.1130124107003212, | |
| "learning_rate": 2.4749685703854366e-06, | |
| "loss": 0.0007, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.018974769486199, | |
| "grad_norm": 0.010739482939243317, | |
| "learning_rate": 2.461448927652234e-06, | |
| "loss": 0.0005, | |
| "step": 34050 | |
| }, | |
| { | |
| "epoch": 2.021939577218417, | |
| "grad_norm": 0.07081291824579239, | |
| "learning_rate": 2.447954244456991e-06, | |
| "loss": 0.0008, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 2.024904384950636, | |
| "grad_norm": 0.02812119945883751, | |
| "learning_rate": 2.434484653482111e-06, | |
| "loss": 0.0008, | |
| "step": 34150 | |
| }, | |
| { | |
| "epoch": 2.0278691926828545, | |
| "grad_norm": 0.017130475491285324, | |
| "learning_rate": 2.421040287163287e-06, | |
| "loss": 0.0006, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 2.030834000415073, | |
| "grad_norm": 0.09646749496459961, | |
| "learning_rate": 2.407621277688194e-06, | |
| "loss": 0.0018, | |
| "step": 34250 | |
| }, | |
| { | |
| "epoch": 2.0337988081472917, | |
| "grad_norm": 0.04550813138484955, | |
| "learning_rate": 2.3942277569951983e-06, | |
| "loss": 0.0009, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 2.03676361587951, | |
| "grad_norm": 0.06901838630437851, | |
| "learning_rate": 2.380859856772054e-06, | |
| "loss": 0.0008, | |
| "step": 34350 | |
| }, | |
| { | |
| "epoch": 2.0397284236117286, | |
| "grad_norm": 0.007037996314466, | |
| "learning_rate": 2.367517708454607e-06, | |
| "loss": 0.0005, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 2.0426932313439474, | |
| "grad_norm": 0.0740680992603302, | |
| "learning_rate": 2.3544675140285617e-06, | |
| "loss": 0.0012, | |
| "step": 34450 | |
| }, | |
| { | |
| "epoch": 2.045658039076166, | |
| "grad_norm": 0.617061197757721, | |
| "learning_rate": 2.341176741254201e-06, | |
| "loss": 0.0009, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 2.0486228468083847, | |
| "grad_norm": 0.00723099522292614, | |
| "learning_rate": 2.3279121105578058e-06, | |
| "loss": 0.0008, | |
| "step": 34550 | |
| }, | |
| { | |
| "epoch": 2.051587654540603, | |
| "grad_norm": 0.03154204040765762, | |
| "learning_rate": 2.3146737523598583e-06, | |
| "loss": 0.0009, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 2.0545524622728215, | |
| "grad_norm": 0.004429316148161888, | |
| "learning_rate": 2.3014617968225256e-06, | |
| "loss": 0.0005, | |
| "step": 34650 | |
| }, | |
| { | |
| "epoch": 2.0575172700050404, | |
| "grad_norm": 0.1683816909790039, | |
| "learning_rate": 2.2882763738483724e-06, | |
| "loss": 0.0013, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 2.0604820777372588, | |
| "grad_norm": 0.020628100261092186, | |
| "learning_rate": 2.275117613079099e-06, | |
| "loss": 0.0006, | |
| "step": 34750 | |
| }, | |
| { | |
| "epoch": 2.063446885469477, | |
| "grad_norm": 0.038743212819099426, | |
| "learning_rate": 2.2619856438942485e-06, | |
| "loss": 0.0009, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 2.066411693201696, | |
| "grad_norm": 0.019078753888607025, | |
| "learning_rate": 2.2488805954099498e-06, | |
| "loss": 0.0003, | |
| "step": 34850 | |
| }, | |
| { | |
| "epoch": 2.0693765009339145, | |
| "grad_norm": 0.008165969513356686, | |
| "learning_rate": 2.2358025964776427e-06, | |
| "loss": 0.0011, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 2.072341308666133, | |
| "grad_norm": 0.018806306645274162, | |
| "learning_rate": 2.2227517756828036e-06, | |
| "loss": 0.0008, | |
| "step": 34950 | |
| }, | |
| { | |
| "epoch": 2.0753061163983517, | |
| "grad_norm": 0.010271578095853329, | |
| "learning_rate": 2.2097282613436964e-06, | |
| "loss": 0.0012, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 2.0753061163983517, | |
| "eval_loss": 0.09633096307516098, | |
| "eval_runtime": 1965.4539, | |
| "eval_samples_per_second": 2.979, | |
| "eval_steps_per_second": 0.745, | |
| "eval_wer": 17.523572838242817, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 2.07827092413057, | |
| "grad_norm": 0.05877639353275299, | |
| "learning_rate": 2.1967321815100996e-06, | |
| "loss": 0.0007, | |
| "step": 35050 | |
| }, | |
| { | |
| "epoch": 2.0812357318627885, | |
| "grad_norm": 0.012410519644618034, | |
| "learning_rate": 2.1837636639620454e-06, | |
| "loss": 0.0008, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 2.0842005395950074, | |
| "grad_norm": 0.6075167059898376, | |
| "learning_rate": 2.170822836208574e-06, | |
| "loss": 0.0007, | |
| "step": 35150 | |
| }, | |
| { | |
| "epoch": 2.087165347327226, | |
| "grad_norm": 0.022046292200684547, | |
| "learning_rate": 2.157909825486473e-06, | |
| "loss": 0.0006, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 2.090130155059444, | |
| "grad_norm": 0.06569838523864746, | |
| "learning_rate": 2.1450247587590267e-06, | |
| "loss": 0.0005, | |
| "step": 35250 | |
| }, | |
| { | |
| "epoch": 2.093094962791663, | |
| "grad_norm": 0.0742030218243599, | |
| "learning_rate": 2.132167762714769e-06, | |
| "loss": 0.0014, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 2.0960597705238815, | |
| "grad_norm": 0.051597367972135544, | |
| "learning_rate": 2.119338963766234e-06, | |
| "loss": 0.0009, | |
| "step": 35350 | |
| }, | |
| { | |
| "epoch": 2.0990245782561, | |
| "grad_norm": 0.039865605533123016, | |
| "learning_rate": 2.106538488048722e-06, | |
| "loss": 0.0004, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 2.1019893859883187, | |
| "grad_norm": 0.008128203451633453, | |
| "learning_rate": 2.093766461419048e-06, | |
| "loss": 0.0009, | |
| "step": 35450 | |
| }, | |
| { | |
| "epoch": 2.104954193720537, | |
| "grad_norm": 0.01954249106347561, | |
| "learning_rate": 2.0810230094543133e-06, | |
| "loss": 0.0011, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 2.107919001452756, | |
| "grad_norm": 0.47133609652519226, | |
| "learning_rate": 2.0683082574506698e-06, | |
| "loss": 0.0008, | |
| "step": 35550 | |
| }, | |
| { | |
| "epoch": 2.1108838091849744, | |
| "grad_norm": 0.1561160683631897, | |
| "learning_rate": 2.055622330422078e-06, | |
| "loss": 0.0004, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 2.113848616917193, | |
| "grad_norm": 0.1073426902294159, | |
| "learning_rate": 2.0429653530990924e-06, | |
| "loss": 0.0008, | |
| "step": 35650 | |
| }, | |
| { | |
| "epoch": 2.1168134246494117, | |
| "grad_norm": 0.015249393880367279, | |
| "learning_rate": 2.0303374499276278e-06, | |
| "loss": 0.0009, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 2.11977823238163, | |
| "grad_norm": 0.03339747339487076, | |
| "learning_rate": 2.017738745067731e-06, | |
| "loss": 0.0006, | |
| "step": 35750 | |
| }, | |
| { | |
| "epoch": 2.1227430401138485, | |
| "grad_norm": 0.02580416575074196, | |
| "learning_rate": 2.0051693623923706e-06, | |
| "loss": 0.0013, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 2.1257078478460674, | |
| "grad_norm": 0.05754227936267853, | |
| "learning_rate": 1.9926294254862127e-06, | |
| "loss": 0.0004, | |
| "step": 35850 | |
| }, | |
| { | |
| "epoch": 2.1286726555782858, | |
| "grad_norm": 0.1823517233133316, | |
| "learning_rate": 1.9801190576444005e-06, | |
| "loss": 0.0007, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 2.131637463310504, | |
| "grad_norm": 0.011044003069400787, | |
| "learning_rate": 1.9676383818713612e-06, | |
| "loss": 0.0008, | |
| "step": 35950 | |
| }, | |
| { | |
| "epoch": 2.134602271042723, | |
| "grad_norm": 0.00475548068061471, | |
| "learning_rate": 1.95518752087957e-06, | |
| "loss": 0.0007, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 2.1375670787749415, | |
| "grad_norm": 1.1900372505187988, | |
| "learning_rate": 1.942766597088367e-06, | |
| "loss": 0.0007, | |
| "step": 36050 | |
| }, | |
| { | |
| "epoch": 2.14053188650716, | |
| "grad_norm": 0.009477607905864716, | |
| "learning_rate": 1.9303757326227368e-06, | |
| "loss": 0.0005, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 2.1434966942393787, | |
| "grad_norm": 0.0051522282883524895, | |
| "learning_rate": 1.9180150493121197e-06, | |
| "loss": 0.0006, | |
| "step": 36150 | |
| }, | |
| { | |
| "epoch": 2.146461501971597, | |
| "grad_norm": 0.0598483607172966, | |
| "learning_rate": 1.9056846686892088e-06, | |
| "loss": 0.0011, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 2.1494263097038155, | |
| "grad_norm": 0.012758989818394184, | |
| "learning_rate": 1.8933847119887499e-06, | |
| "loss": 0.0008, | |
| "step": 36250 | |
| }, | |
| { | |
| "epoch": 2.1523911174360344, | |
| "grad_norm": 0.021726280450820923, | |
| "learning_rate": 1.8811153001463606e-06, | |
| "loss": 0.0005, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 2.155355925168253, | |
| "grad_norm": 0.005376185290515423, | |
| "learning_rate": 1.8688765537973341e-06, | |
| "loss": 0.0005, | |
| "step": 36350 | |
| }, | |
| { | |
| "epoch": 2.158320732900471, | |
| "grad_norm": 0.006016736384481192, | |
| "learning_rate": 1.8566685932754507e-06, | |
| "loss": 0.0002, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 2.16128554063269, | |
| "grad_norm": 0.4107031524181366, | |
| "learning_rate": 1.8444915386118029e-06, | |
| "loss": 0.0004, | |
| "step": 36450 | |
| }, | |
| { | |
| "epoch": 2.1642503483649085, | |
| "grad_norm": 0.06917353719472885, | |
| "learning_rate": 1.83234550953361e-06, | |
| "loss": 0.0005, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 2.167215156097127, | |
| "grad_norm": 0.007486425340175629, | |
| "learning_rate": 1.8202306254630365e-06, | |
| "loss": 0.001, | |
| "step": 36550 | |
| }, | |
| { | |
| "epoch": 2.1701799638293457, | |
| "grad_norm": 0.026751527562737465, | |
| "learning_rate": 1.8081470055160322e-06, | |
| "loss": 0.0008, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 2.173144771561564, | |
| "grad_norm": 0.0032799181062728167, | |
| "learning_rate": 1.7960947685011425e-06, | |
| "loss": 0.0007, | |
| "step": 36650 | |
| }, | |
| { | |
| "epoch": 2.176109579293783, | |
| "grad_norm": 0.0042535727843642235, | |
| "learning_rate": 1.784074032918356e-06, | |
| "loss": 0.0013, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 2.1790743870260014, | |
| "grad_norm": 0.045528728514909744, | |
| "learning_rate": 1.7720849169579308e-06, | |
| "loss": 0.0019, | |
| "step": 36750 | |
| }, | |
| { | |
| "epoch": 2.18203919475822, | |
| "grad_norm": 0.46880343556404114, | |
| "learning_rate": 1.7601275384992317e-06, | |
| "loss": 0.0005, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 2.1850040024904387, | |
| "grad_norm": 0.3082144260406494, | |
| "learning_rate": 1.7482020151095807e-06, | |
| "loss": 0.0009, | |
| "step": 36850 | |
| }, | |
| { | |
| "epoch": 2.187968810222657, | |
| "grad_norm": 0.5959653854370117, | |
| "learning_rate": 1.7363084640430867e-06, | |
| "loss": 0.0007, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 2.1909336179548755, | |
| "grad_norm": 0.018325725570321083, | |
| "learning_rate": 1.724447002239506e-06, | |
| "loss": 0.0013, | |
| "step": 36950 | |
| }, | |
| { | |
| "epoch": 2.1938984256870944, | |
| "grad_norm": 0.025977754965424538, | |
| "learning_rate": 1.7126177463230875e-06, | |
| "loss": 0.001, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 2.1968632334193128, | |
| "grad_norm": 0.18662206828594208, | |
| "learning_rate": 1.7010564337675956e-06, | |
| "loss": 0.0016, | |
| "step": 37050 | |
| }, | |
| { | |
| "epoch": 2.199828041151531, | |
| "grad_norm": 0.025148356333374977, | |
| "learning_rate": 1.6892912883321422e-06, | |
| "loss": 0.0006, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 2.20279284888375, | |
| "grad_norm": 0.004004855640232563, | |
| "learning_rate": 1.677558694441807e-06, | |
| "loss": 0.0005, | |
| "step": 37150 | |
| }, | |
| { | |
| "epoch": 2.2057576566159685, | |
| "grad_norm": 0.016185695305466652, | |
| "learning_rate": 1.6658587674537785e-06, | |
| "loss": 0.0014, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 2.208722464348187, | |
| "grad_norm": 0.0063825915567576885, | |
| "learning_rate": 1.6541916224040617e-06, | |
| "loss": 0.0008, | |
| "step": 37250 | |
| }, | |
| { | |
| "epoch": 2.2116872720804057, | |
| "grad_norm": 0.7008712887763977, | |
| "learning_rate": 1.6425573740063438e-06, | |
| "loss": 0.0017, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 2.214652079812624, | |
| "grad_norm": 0.547462522983551, | |
| "learning_rate": 1.6309561366508619e-06, | |
| "loss": 0.0006, | |
| "step": 37350 | |
| }, | |
| { | |
| "epoch": 2.2176168875448425, | |
| "grad_norm": 0.003590489737689495, | |
| "learning_rate": 1.6193880244032856e-06, | |
| "loss": 0.0002, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 2.2205816952770614, | |
| "grad_norm": 0.005014427937567234, | |
| "learning_rate": 1.607853151003591e-06, | |
| "loss": 0.0004, | |
| "step": 37450 | |
| }, | |
| { | |
| "epoch": 2.22354650300928, | |
| "grad_norm": 0.4616244435310364, | |
| "learning_rate": 1.5963516298649401e-06, | |
| "loss": 0.0008, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 2.22354650300928, | |
| "eval_loss": 0.09748079627752304, | |
| "eval_runtime": 1908.5821, | |
| "eval_samples_per_second": 3.068, | |
| "eval_steps_per_second": 0.767, | |
| "eval_wer": 14.918500109641109, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 2.226511310741498, | |
| "grad_norm": 0.007188325747847557, | |
| "learning_rate": 1.5848835740725744e-06, | |
| "loss": 0.0014, | |
| "step": 37550 | |
| }, | |
| { | |
| "epoch": 2.229476118473717, | |
| "grad_norm": 0.004517501685768366, | |
| "learning_rate": 1.5734490963826914e-06, | |
| "loss": 0.0012, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 2.2324409262059355, | |
| "grad_norm": 0.009022989310324192, | |
| "learning_rate": 1.5620483092213467e-06, | |
| "loss": 0.0009, | |
| "step": 37650 | |
| }, | |
| { | |
| "epoch": 2.235405733938154, | |
| "grad_norm": 0.038672007620334625, | |
| "learning_rate": 1.5506813246833424e-06, | |
| "loss": 0.0003, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 2.2383705416703727, | |
| "grad_norm": 0.01938713528215885, | |
| "learning_rate": 1.5393482545311267e-06, | |
| "loss": 0.0006, | |
| "step": 37750 | |
| }, | |
| { | |
| "epoch": 2.241335349402591, | |
| "grad_norm": 0.12773361802101135, | |
| "learning_rate": 1.5280492101936955e-06, | |
| "loss": 0.0007, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 2.24430015713481, | |
| "grad_norm": 0.0028612061869353056, | |
| "learning_rate": 1.516784302765492e-06, | |
| "loss": 0.0006, | |
| "step": 37850 | |
| }, | |
| { | |
| "epoch": 2.2472649648670284, | |
| "grad_norm": 0.15187284350395203, | |
| "learning_rate": 1.5055536430053241e-06, | |
| "loss": 0.0003, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 2.250229772599247, | |
| "grad_norm": 0.13594093918800354, | |
| "learning_rate": 1.4943573413352685e-06, | |
| "loss": 0.0009, | |
| "step": 37950 | |
| }, | |
| { | |
| "epoch": 2.2531945803314657, | |
| "grad_norm": 0.0652371272444725, | |
| "learning_rate": 1.4831955078395822e-06, | |
| "loss": 0.001, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 2.256159388063684, | |
| "grad_norm": 0.09638596326112747, | |
| "learning_rate": 1.4720682522636294e-06, | |
| "loss": 0.0005, | |
| "step": 38050 | |
| }, | |
| { | |
| "epoch": 2.2591241957959025, | |
| "grad_norm": 0.019569765776395798, | |
| "learning_rate": 1.4609756840127959e-06, | |
| "loss": 0.0013, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 2.2620890035281214, | |
| "grad_norm": 0.06489352881908417, | |
| "learning_rate": 1.4499179121514118e-06, | |
| "loss": 0.0002, | |
| "step": 38150 | |
| }, | |
| { | |
| "epoch": 2.2650538112603398, | |
| "grad_norm": 0.0036429071333259344, | |
| "learning_rate": 1.4388950454016876e-06, | |
| "loss": 0.0009, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 2.268018618992558, | |
| "grad_norm": 0.0040684971027076244, | |
| "learning_rate": 1.4279071921426342e-06, | |
| "loss": 0.0008, | |
| "step": 38250 | |
| }, | |
| { | |
| "epoch": 2.270983426724777, | |
| "grad_norm": 0.20703789591789246, | |
| "learning_rate": 1.4169544604090063e-06, | |
| "loss": 0.0003, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 2.2739482344569955, | |
| "grad_norm": 1.6676180362701416, | |
| "learning_rate": 1.4060369578902366e-06, | |
| "loss": 0.0017, | |
| "step": 38350 | |
| }, | |
| { | |
| "epoch": 2.276913042189214, | |
| "grad_norm": 0.013792523182928562, | |
| "learning_rate": 1.3951547919293762e-06, | |
| "loss": 0.0007, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 2.2798778499214327, | |
| "grad_norm": 0.25312694907188416, | |
| "learning_rate": 1.3843080695220429e-06, | |
| "loss": 0.0004, | |
| "step": 38450 | |
| }, | |
| { | |
| "epoch": 2.282842657653651, | |
| "grad_norm": 0.0025230322498828173, | |
| "learning_rate": 1.37349689731536e-06, | |
| "loss": 0.0014, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 2.2858074653858695, | |
| "grad_norm": 0.3334575593471527, | |
| "learning_rate": 1.3627213816069208e-06, | |
| "loss": 0.0005, | |
| "step": 38550 | |
| }, | |
| { | |
| "epoch": 2.2887722731180884, | |
| "grad_norm": 0.008739791810512543, | |
| "learning_rate": 1.3519816283437349e-06, | |
| "loss": 0.001, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 2.291737080850307, | |
| "grad_norm": 0.5587530732154846, | |
| "learning_rate": 1.3412777431211859e-06, | |
| "loss": 0.0006, | |
| "step": 38650 | |
| }, | |
| { | |
| "epoch": 2.2947018885825257, | |
| "grad_norm": 0.02083914540708065, | |
| "learning_rate": 1.3306098311819982e-06, | |
| "loss": 0.001, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 2.297666696314744, | |
| "grad_norm": 0.008118952624499798, | |
| "learning_rate": 1.3199779974152011e-06, | |
| "loss": 0.0007, | |
| "step": 38750 | |
| }, | |
| { | |
| "epoch": 2.3006315040469625, | |
| "grad_norm": 0.0053606764413416386, | |
| "learning_rate": 1.3093823463550909e-06, | |
| "loss": 0.0009, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 2.303596311779181, | |
| "grad_norm": 0.06790202856063843, | |
| "learning_rate": 1.2988229821802157e-06, | |
| "loss": 0.0003, | |
| "step": 38850 | |
| }, | |
| { | |
| "epoch": 2.3065611195113997, | |
| "grad_norm": 0.056741103529930115, | |
| "learning_rate": 1.2883000087123355e-06, | |
| "loss": 0.0011, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 2.309525927243618, | |
| "grad_norm": 0.012334014289081097, | |
| "learning_rate": 1.277813529415416e-06, | |
| "loss": 0.0008, | |
| "step": 38950 | |
| }, | |
| { | |
| "epoch": 2.312490734975837, | |
| "grad_norm": 0.033796992152929306, | |
| "learning_rate": 1.2673636473946033e-06, | |
| "loss": 0.0006, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 2.3154555427080554, | |
| "grad_norm": 0.010236002504825592, | |
| "learning_rate": 1.2569504653952103e-06, | |
| "loss": 0.0007, | |
| "step": 39050 | |
| }, | |
| { | |
| "epoch": 2.318420350440274, | |
| "grad_norm": 0.09120677411556244, | |
| "learning_rate": 1.2465740858017118e-06, | |
| "loss": 0.0004, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 2.3213851581724927, | |
| "grad_norm": 0.016125334426760674, | |
| "learning_rate": 1.2362346106367268e-06, | |
| "loss": 0.0005, | |
| "step": 39150 | |
| }, | |
| { | |
| "epoch": 2.324349965904711, | |
| "grad_norm": 0.04164751619100571, | |
| "learning_rate": 1.2259321415600307e-06, | |
| "loss": 0.0012, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 2.3273147736369295, | |
| "grad_norm": 0.005560677032917738, | |
| "learning_rate": 1.2156667798675436e-06, | |
| "loss": 0.0008, | |
| "step": 39250 | |
| }, | |
| { | |
| "epoch": 2.3302795813691484, | |
| "grad_norm": 0.005640542134642601, | |
| "learning_rate": 1.2054386264903373e-06, | |
| "loss": 0.0007, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 2.333244389101367, | |
| "grad_norm": 0.005680414382368326, | |
| "learning_rate": 1.1952477819936465e-06, | |
| "loss": 0.0007, | |
| "step": 39350 | |
| }, | |
| { | |
| "epoch": 2.336209196833585, | |
| "grad_norm": 0.1653079390525818, | |
| "learning_rate": 1.185094346575879e-06, | |
| "loss": 0.0003, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 2.339174004565804, | |
| "grad_norm": 0.02139970287680626, | |
| "learning_rate": 1.1749784200676233e-06, | |
| "loss": 0.0002, | |
| "step": 39450 | |
| }, | |
| { | |
| "epoch": 2.3421388122980225, | |
| "grad_norm": 0.007563546299934387, | |
| "learning_rate": 1.1649001019306799e-06, | |
| "loss": 0.0006, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 2.345103620030241, | |
| "grad_norm": 0.020477985963225365, | |
| "learning_rate": 1.15485949125707e-06, | |
| "loss": 0.0006, | |
| "step": 39550 | |
| }, | |
| { | |
| "epoch": 2.3480684277624597, | |
| "grad_norm": 0.007695810403674841, | |
| "learning_rate": 1.1448566867680715e-06, | |
| "loss": 0.0006, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 2.351033235494678, | |
| "grad_norm": 0.1916559338569641, | |
| "learning_rate": 1.1348917868132452e-06, | |
| "loss": 0.0004, | |
| "step": 39650 | |
| }, | |
| { | |
| "epoch": 2.3539980432268965, | |
| "grad_norm": 0.006019901018589735, | |
| "learning_rate": 1.124964889369461e-06, | |
| "loss": 0.0006, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 2.3569628509591154, | |
| "grad_norm": 0.12937164306640625, | |
| "learning_rate": 1.1150760920399501e-06, | |
| "loss": 0.0007, | |
| "step": 39750 | |
| }, | |
| { | |
| "epoch": 2.359927658691334, | |
| "grad_norm": 2.1091978549957275, | |
| "learning_rate": 1.1052254920533262e-06, | |
| "loss": 0.001, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 2.3628924664235527, | |
| "grad_norm": 0.014486163854598999, | |
| "learning_rate": 1.095413186262645e-06, | |
| "loss": 0.0004, | |
| "step": 39850 | |
| }, | |
| { | |
| "epoch": 2.365857274155771, | |
| "grad_norm": 0.021811697632074356, | |
| "learning_rate": 1.0856392711444452e-06, | |
| "loss": 0.0005, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 2.3688220818879895, | |
| "grad_norm": 0.030487345531582832, | |
| "learning_rate": 1.075903842797798e-06, | |
| "loss": 0.0009, | |
| "step": 39950 | |
| }, | |
| { | |
| "epoch": 2.371786889620208, | |
| "grad_norm": 0.6815763711929321, | |
| "learning_rate": 1.0662069969433681e-06, | |
| "loss": 0.0004, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 2.371786889620208, | |
| "eval_loss": 0.0971890240907669, | |
| "eval_runtime": 1820.3139, | |
| "eval_samples_per_second": 3.216, | |
| "eval_steps_per_second": 0.804, | |
| "eval_wer": 11.26525838754477, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 2.3747516973524267, | |
| "grad_norm": 0.007269201334565878, | |
| "learning_rate": 1.0565488289224695e-06, | |
| "loss": 0.0011, | |
| "step": 40050 | |
| }, | |
| { | |
| "epoch": 2.377716505084645, | |
| "grad_norm": 0.012200387194752693, | |
| "learning_rate": 1.046929433696125e-06, | |
| "loss": 0.0006, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 2.380681312816864, | |
| "grad_norm": 0.0072285993956029415, | |
| "learning_rate": 1.037348905844139e-06, | |
| "loss": 0.0007, | |
| "step": 40150 | |
| }, | |
| { | |
| "epoch": 2.3836461205490824, | |
| "grad_norm": 0.011018377728760242, | |
| "learning_rate": 1.027807339564163e-06, | |
| "loss": 0.0005, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 2.386610928281301, | |
| "grad_norm": 0.030931444838643074, | |
| "learning_rate": 1.0183048286707686e-06, | |
| "loss": 0.0003, | |
| "step": 40250 | |
| }, | |
| { | |
| "epoch": 2.3895757360135197, | |
| "grad_norm": 0.0054734209552407265, | |
| "learning_rate": 1.0088414665945312e-06, | |
| "loss": 0.0006, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 2.392540543745738, | |
| "grad_norm": 0.0037654677871614695, | |
| "learning_rate": 9.994173463811008e-07, | |
| "loss": 0.001, | |
| "step": 40350 | |
| }, | |
| { | |
| "epoch": 2.3955053514779565, | |
| "grad_norm": 0.008112763054668903, | |
| "learning_rate": 9.900325606903033e-07, | |
| "loss": 0.0004, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 2.3984701592101754, | |
| "grad_norm": 0.012320293113589287, | |
| "learning_rate": 9.806872017952102e-07, | |
| "loss": 0.0004, | |
| "step": 40450 | |
| }, | |
| { | |
| "epoch": 2.401434966942394, | |
| "grad_norm": 0.02817295864224434, | |
| "learning_rate": 9.713813615812456e-07, | |
| "loss": 0.001, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 2.404399774674612, | |
| "grad_norm": 0.06625434756278992, | |
| "learning_rate": 9.621151315452792e-07, | |
| "loss": 0.0007, | |
| "step": 40550 | |
| }, | |
| { | |
| "epoch": 2.407364582406831, | |
| "grad_norm": 0.04207722470164299, | |
| "learning_rate": 9.528886027947215e-07, | |
| "loss": 0.0015, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 2.4103293901390495, | |
| "grad_norm": 0.04913393780589104, | |
| "learning_rate": 9.437018660466352e-07, | |
| "loss": 0.001, | |
| "step": 40650 | |
| }, | |
| { | |
| "epoch": 2.413294197871268, | |
| "grad_norm": 0.007566593121737242, | |
| "learning_rate": 9.345550116268404e-07, | |
| "loss": 0.0013, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 2.4162590056034867, | |
| "grad_norm": 0.01240174937993288, | |
| "learning_rate": 9.254481294690221e-07, | |
| "loss": 0.0004, | |
| "step": 40750 | |
| }, | |
| { | |
| "epoch": 2.419223813335705, | |
| "grad_norm": 0.013987046666443348, | |
| "learning_rate": 9.163813091138557e-07, | |
| "loss": 0.0004, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 2.4221886210679235, | |
| "grad_norm": 0.012768911197781563, | |
| "learning_rate": 9.073546397081185e-07, | |
| "loss": 0.0011, | |
| "step": 40850 | |
| }, | |
| { | |
| "epoch": 2.4251534288001424, | |
| "grad_norm": 0.010746861808001995, | |
| "learning_rate": 8.983682100038138e-07, | |
| "loss": 0.0005, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 2.428118236532361, | |
| "grad_norm": 0.11308763176202774, | |
| "learning_rate": 8.894221083573041e-07, | |
| "loss": 0.0003, | |
| "step": 40950 | |
| }, | |
| { | |
| "epoch": 2.4310830442645797, | |
| "grad_norm": 0.003393119666725397, | |
| "learning_rate": 8.805164227284336e-07, | |
| "loss": 0.0012, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 2.434047851996798, | |
| "grad_norm": 0.0036778796929866076, | |
| "learning_rate": 8.716512406796724e-07, | |
| "loss": 0.001, | |
| "step": 41050 | |
| }, | |
| { | |
| "epoch": 2.4370126597290165, | |
| "grad_norm": 0.2601497769355774, | |
| "learning_rate": 8.628266493752496e-07, | |
| "loss": 0.0004, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 2.439977467461235, | |
| "grad_norm": 0.0293083768337965, | |
| "learning_rate": 8.540427355802988e-07, | |
| "loss": 0.0005, | |
| "step": 41150 | |
| }, | |
| { | |
| "epoch": 2.4429422751934537, | |
| "grad_norm": 0.11809295415878296, | |
| "learning_rate": 8.45299585660005e-07, | |
| "loss": 0.0019, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 2.445907082925672, | |
| "grad_norm": Infinity, | |
| "learning_rate": 8.365972855787496e-07, | |
| "loss": 0.0008, | |
| "step": 41250 | |
| }, | |
| { | |
| "epoch": 2.448871890657891, | |
| "grad_norm": 0.0271657295525074, | |
| "learning_rate": 8.281087464744486e-07, | |
| "loss": 0.0005, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 2.4518366983901094, | |
| "grad_norm": 0.40079793334007263, | |
| "learning_rate": 8.194875811138108e-07, | |
| "loss": 0.001, | |
| "step": 41350 | |
| }, | |
| { | |
| "epoch": 2.454801506122328, | |
| "grad_norm": 0.03252971172332764, | |
| "learning_rate": 8.109075193809662e-07, | |
| "loss": 0.0004, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 2.4577663138545467, | |
| "grad_norm": 0.16962403059005737, | |
| "learning_rate": 8.023686456367818e-07, | |
| "loss": 0.0008, | |
| "step": 41450 | |
| }, | |
| { | |
| "epoch": 2.460731121586765, | |
| "grad_norm": 0.01961754634976387, | |
| "learning_rate": 7.938710438371617e-07, | |
| "loss": 0.0004, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 2.4636959293189835, | |
| "grad_norm": 0.4819841682910919, | |
| "learning_rate": 7.854147975322113e-07, | |
| "loss": 0.0004, | |
| "step": 41550 | |
| }, | |
| { | |
| "epoch": 2.4666607370512024, | |
| "grad_norm": 0.0028431855607777834, | |
| "learning_rate": 7.76999989865424e-07, | |
| "loss": 0.0003, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 2.469625544783421, | |
| "grad_norm": 0.004796815570443869, | |
| "learning_rate": 7.686267035728595e-07, | |
| "loss": 0.0005, | |
| "step": 41650 | |
| }, | |
| { | |
| "epoch": 2.472590352515639, | |
| "grad_norm": 0.014351542107760906, | |
| "learning_rate": 7.602950209823279e-07, | |
| "loss": 0.0004, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 2.475555160247858, | |
| "grad_norm": 0.04429518058896065, | |
| "learning_rate": 7.520050240125876e-07, | |
| "loss": 0.001, | |
| "step": 41750 | |
| }, | |
| { | |
| "epoch": 2.4785199679800765, | |
| "grad_norm": 0.03187648952007294, | |
| "learning_rate": 7.437567941725348e-07, | |
| "loss": 0.0002, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 2.481484775712295, | |
| "grad_norm": 0.005897920113056898, | |
| "learning_rate": 7.355504125604007e-07, | |
| "loss": 0.0003, | |
| "step": 41850 | |
| }, | |
| { | |
| "epoch": 2.4844495834445137, | |
| "grad_norm": 0.009089338593184948, | |
| "learning_rate": 7.273859598629596e-07, | |
| "loss": 0.0004, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 2.487414391176732, | |
| "grad_norm": 0.01698206551373005, | |
| "learning_rate": 7.192635163547284e-07, | |
| "loss": 0.0003, | |
| "step": 41950 | |
| }, | |
| { | |
| "epoch": 2.4903791989089505, | |
| "grad_norm": 0.006857879459857941, | |
| "learning_rate": 7.111831618971848e-07, | |
| "loss": 0.0003, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 2.4933440066411694, | |
| "grad_norm": 0.02485840767621994, | |
| "learning_rate": 7.031449759379799e-07, | |
| "loss": 0.0002, | |
| "step": 42050 | |
| }, | |
| { | |
| "epoch": 2.496308814373388, | |
| "grad_norm": 0.003701738314703107, | |
| "learning_rate": 6.951490375101494e-07, | |
| "loss": 0.0008, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 2.4992736221056067, | |
| "grad_norm": 0.6532867550849915, | |
| "learning_rate": 6.871954252313489e-07, | |
| "loss": 0.0008, | |
| "step": 42150 | |
| }, | |
| { | |
| "epoch": 2.502238429837825, | |
| "grad_norm": 0.047790996730327606, | |
| "learning_rate": 6.792842173030729e-07, | |
| "loss": 0.0004, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 2.5052032375700435, | |
| "grad_norm": 0.020621391013264656, | |
| "learning_rate": 6.714154915098875e-07, | |
| "loss": 0.0003, | |
| "step": 42250 | |
| }, | |
| { | |
| "epoch": 2.508168045302262, | |
| "grad_norm": 0.010965166613459587, | |
| "learning_rate": 6.635893252186676e-07, | |
| "loss": 0.0003, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 2.5111328530344807, | |
| "grad_norm": 0.004576478153467178, | |
| "learning_rate": 6.558057953778313e-07, | |
| "loss": 0.0007, | |
| "step": 42350 | |
| }, | |
| { | |
| "epoch": 2.514097660766699, | |
| "grad_norm": 0.007650887127965689, | |
| "learning_rate": 6.480649785165899e-07, | |
| "loss": 0.0009, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 2.517062468498918, | |
| "grad_norm": 0.01692899316549301, | |
| "learning_rate": 6.403669507441917e-07, | |
| "loss": 0.0005, | |
| "step": 42450 | |
| }, | |
| { | |
| "epoch": 2.5200272762311364, | |
| "grad_norm": 0.03952137380838394, | |
| "learning_rate": 6.327117877491717e-07, | |
| "loss": 0.0007, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 2.5200272762311364, | |
| "eval_loss": 0.09690303355455399, | |
| "eval_runtime": 1935.2254, | |
| "eval_samples_per_second": 3.025, | |
| "eval_steps_per_second": 0.757, | |
| "eval_wer": 13.773846941013085, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 2.522992083963355, | |
| "grad_norm": 0.006857364438474178, | |
| "learning_rate": 6.250995647986141e-07, | |
| "loss": 0.0004, | |
| "step": 42550 | |
| }, | |
| { | |
| "epoch": 2.5259568916955737, | |
| "grad_norm": 0.020079879090189934, | |
| "learning_rate": 6.175303567374036e-07, | |
| "loss": 0.0008, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 2.528921699427792, | |
| "grad_norm": 0.006522559095174074, | |
| "learning_rate": 6.100042379874971e-07, | |
| "loss": 0.0008, | |
| "step": 42650 | |
| }, | |
| { | |
| "epoch": 2.5318865071600105, | |
| "grad_norm": 0.18019999563694, | |
| "learning_rate": 6.025212825471882e-07, | |
| "loss": 0.0012, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 2.5348513148922294, | |
| "grad_norm": 0.03285055235028267, | |
| "learning_rate": 5.950815639903784e-07, | |
| "loss": 0.0005, | |
| "step": 42750 | |
| }, | |
| { | |
| "epoch": 2.537816122624448, | |
| "grad_norm": 0.9281402826309204, | |
| "learning_rate": 5.876851554658585e-07, | |
| "loss": 0.0006, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 2.540780930356666, | |
| "grad_norm": 0.004797223024070263, | |
| "learning_rate": 5.803321296965842e-07, | |
| "loss": 0.0011, | |
| "step": 42850 | |
| }, | |
| { | |
| "epoch": 2.543745738088885, | |
| "grad_norm": 0.006800955627113581, | |
| "learning_rate": 5.730225589789645e-07, | |
| "loss": 0.0007, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 2.5467105458211035, | |
| "grad_norm": 0.016797848045825958, | |
| "learning_rate": 5.657565151821509e-07, | |
| "loss": 0.0004, | |
| "step": 42950 | |
| }, | |
| { | |
| "epoch": 2.5496753535533223, | |
| "grad_norm": 0.008737844415009022, | |
| "learning_rate": 5.585340697473257e-07, | |
| "loss": 0.0004, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 2.5526401612855407, | |
| "grad_norm": 0.002092113019898534, | |
| "learning_rate": 5.513552936870065e-07, | |
| "loss": 0.001, | |
| "step": 43050 | |
| }, | |
| { | |
| "epoch": 2.555604969017759, | |
| "grad_norm": 0.004055564291775227, | |
| "learning_rate": 5.442202575843458e-07, | |
| "loss": 0.0005, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 2.5585697767499775, | |
| "grad_norm": 0.027113450691103935, | |
| "learning_rate": 5.372704263215106e-07, | |
| "loss": 0.0008, | |
| "step": 43150 | |
| }, | |
| { | |
| "epoch": 2.5615345844821964, | |
| "grad_norm": 1.4055336713790894, | |
| "learning_rate": 5.302222018855358e-07, | |
| "loss": 0.0004, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 2.564499392214415, | |
| "grad_norm": 0.016375090926885605, | |
| "learning_rate": 5.232179251919717e-07, | |
| "loss": 0.0004, | |
| "step": 43250 | |
| }, | |
| { | |
| "epoch": 2.5674641999466337, | |
| "grad_norm": 0.009804923087358475, | |
| "learning_rate": 5.162576651082541e-07, | |
| "loss": 0.0001, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 2.570429007678852, | |
| "grad_norm": 1.839759349822998, | |
| "learning_rate": 5.093414900690458e-07, | |
| "loss": 0.001, | |
| "step": 43350 | |
| }, | |
| { | |
| "epoch": 2.5733938154110705, | |
| "grad_norm": 0.0017718200106173754, | |
| "learning_rate": 5.024694680755493e-07, | |
| "loss": 0.0003, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 2.576358623143289, | |
| "grad_norm": 0.004344862885773182, | |
| "learning_rate": 4.956416666948494e-07, | |
| "loss": 0.0004, | |
| "step": 43450 | |
| }, | |
| { | |
| "epoch": 2.5793234308755077, | |
| "grad_norm": 0.0035795283038169146, | |
| "learning_rate": 4.888581530592456e-07, | |
| "loss": 0.0007, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 2.582288238607726, | |
| "grad_norm": 0.04727310314774513, | |
| "learning_rate": 4.821189938655885e-07, | |
| "loss": 0.0004, | |
| "step": 43550 | |
| }, | |
| { | |
| "epoch": 2.585253046339945, | |
| "grad_norm": 0.003710675286129117, | |
| "learning_rate": 4.754242553746297e-07, | |
| "loss": 0.0008, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 2.5882178540721634, | |
| "grad_norm": 0.0148782255128026, | |
| "learning_rate": 4.687740034103672e-07, | |
| "loss": 0.0003, | |
| "step": 43650 | |
| }, | |
| { | |
| "epoch": 2.591182661804382, | |
| "grad_norm": 0.1427607536315918, | |
| "learning_rate": 4.621683033593971e-07, | |
| "loss": 0.0007, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 2.5941474695366007, | |
| "grad_norm": 0.5589999556541443, | |
| "learning_rate": 4.556072201702733e-07, | |
| "loss": 0.0002, | |
| "step": 43750 | |
| }, | |
| { | |
| "epoch": 2.597112277268819, | |
| "grad_norm": 0.008652674965560436, | |
| "learning_rate": 4.490908183528697e-07, | |
| "loss": 0.0003, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 2.600077085001038, | |
| "grad_norm": 2.5965774059295654, | |
| "learning_rate": 4.4261916197773924e-07, | |
| "loss": 0.001, | |
| "step": 43850 | |
| }, | |
| { | |
| "epoch": 2.6030418927332564, | |
| "grad_norm": 0.11119319498538971, | |
| "learning_rate": 4.361923146754948e-07, | |
| "loss": 0.0004, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 2.606006700465475, | |
| "grad_norm": 0.0016937926411628723, | |
| "learning_rate": 4.298103396361719e-07, | |
| "loss": 0.0004, | |
| "step": 43950 | |
| }, | |
| { | |
| "epoch": 2.608971508197693, | |
| "grad_norm": 0.0073602148331701756, | |
| "learning_rate": 4.234732996086172e-07, | |
| "loss": 0.0005, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.611936315929912, | |
| "grad_norm": 0.08338925987482071, | |
| "learning_rate": 4.1718125689986677e-07, | |
| "loss": 0.0017, | |
| "step": 44050 | |
| }, | |
| { | |
| "epoch": 2.6149011236621305, | |
| "grad_norm": 0.004543852526694536, | |
| "learning_rate": 4.1093427337453195e-07, | |
| "loss": 0.0005, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 2.6178659313943493, | |
| "grad_norm": 0.007618986535817385, | |
| "learning_rate": 4.0473241045419554e-07, | |
| "loss": 0.0008, | |
| "step": 44150 | |
| }, | |
| { | |
| "epoch": 2.6208307391265677, | |
| "grad_norm": 0.31889769434928894, | |
| "learning_rate": 3.9857572911680296e-07, | |
| "loss": 0.0004, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 2.623795546858786, | |
| "grad_norm": 0.06992673128843307, | |
| "learning_rate": 3.924642898960679e-07, | |
| "loss": 0.0003, | |
| "step": 44250 | |
| }, | |
| { | |
| "epoch": 2.6267603545910045, | |
| "grad_norm": 0.04514357075095177, | |
| "learning_rate": 3.8639815288087465e-07, | |
| "loss": 0.0004, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 2.6297251623232234, | |
| "grad_norm": 0.005873105023056269, | |
| "learning_rate": 3.803773777146852e-07, | |
| "loss": 0.0009, | |
| "step": 44350 | |
| }, | |
| { | |
| "epoch": 2.632689970055442, | |
| "grad_norm": 0.003560218960046768, | |
| "learning_rate": 3.7440202359495583e-07, | |
| "loss": 0.0003, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 2.6356547777876607, | |
| "grad_norm": 0.0036311550065875053, | |
| "learning_rate": 3.6847214927255517e-07, | |
| "loss": 0.0005, | |
| "step": 44450 | |
| }, | |
| { | |
| "epoch": 2.638619585519879, | |
| "grad_norm": 0.010604312643408775, | |
| "learning_rate": 3.625878130511834e-07, | |
| "loss": 0.0007, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 2.6415843932520975, | |
| "grad_norm": 0.015460291877388954, | |
| "learning_rate": 3.567490727868028e-07, | |
| "loss": 0.0007, | |
| "step": 44550 | |
| }, | |
| { | |
| "epoch": 2.644549200984316, | |
| "grad_norm": 0.1841159462928772, | |
| "learning_rate": 3.5095598588706537e-07, | |
| "loss": 0.0009, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 2.6475140087165348, | |
| "grad_norm": 0.018142210319638252, | |
| "learning_rate": 3.452086093107515e-07, | |
| "loss": 0.0002, | |
| "step": 44650 | |
| }, | |
| { | |
| "epoch": 2.650478816448753, | |
| "grad_norm": 0.0007878096075728536, | |
| "learning_rate": 3.3950699956721013e-07, | |
| "loss": 0.0003, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 2.653443624180972, | |
| "grad_norm": 0.030863964930176735, | |
| "learning_rate": 3.338512127157978e-07, | |
| "loss": 0.0002, | |
| "step": 44750 | |
| }, | |
| { | |
| "epoch": 2.6564084319131904, | |
| "grad_norm": 0.00733591802418232, | |
| "learning_rate": 3.2824130436533484e-07, | |
| "loss": 0.0013, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 2.659373239645409, | |
| "grad_norm": 0.00609589321538806, | |
| "learning_rate": 3.2267732967355136e-07, | |
| "loss": 0.0007, | |
| "step": 44850 | |
| }, | |
| { | |
| "epoch": 2.6623380473776277, | |
| "grad_norm": 1.0534803867340088, | |
| "learning_rate": 3.1715934334655306e-07, | |
| "loss": 0.0007, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 2.665302855109846, | |
| "grad_norm": 0.005737192463129759, | |
| "learning_rate": 3.1168739963827574e-07, | |
| "loss": 0.0004, | |
| "step": 44950 | |
| }, | |
| { | |
| "epoch": 2.668267662842065, | |
| "grad_norm": 1.1238080263137817, | |
| "learning_rate": 3.062615523499557e-07, | |
| "loss": 0.0009, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 2.668267662842065, | |
| "eval_loss": 0.09690423309803009, | |
| "eval_runtime": 1796.5872, | |
| "eval_samples_per_second": 3.259, | |
| "eval_steps_per_second": 0.815, | |
| "eval_wer": 12.171624881222133, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 2.6712324705742834, | |
| "grad_norm": 0.004947836045175791, | |
| "learning_rate": 3.008818548296e-07, | |
| "loss": 0.001, | |
| "step": 45050 | |
| }, | |
| { | |
| "epoch": 2.674197278306502, | |
| "grad_norm": 0.017444021999835968, | |
| "learning_rate": 2.955483599714637e-07, | |
| "loss": 0.0004, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 2.67716208603872, | |
| "grad_norm": 0.004913876764476299, | |
| "learning_rate": 2.902611202155259e-07, | |
| "loss": 0.0001, | |
| "step": 45150 | |
| }, | |
| { | |
| "epoch": 2.680126893770939, | |
| "grad_norm": 0.012263476848602295, | |
| "learning_rate": 2.850201875469771e-07, | |
| "loss": 0.0005, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 2.6830917015031575, | |
| "grad_norm": 0.002866186900064349, | |
| "learning_rate": 2.7982561349570724e-07, | |
| "loss": 0.0007, | |
| "step": 45250 | |
| }, | |
| { | |
| "epoch": 2.6860565092353763, | |
| "grad_norm": 0.029279733076691628, | |
| "learning_rate": 2.7467744913579953e-07, | |
| "loss": 0.0005, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 2.6890213169675947, | |
| "grad_norm": 0.004325803369283676, | |
| "learning_rate": 2.695757450850284e-07, | |
| "loss": 0.0006, | |
| "step": 45350 | |
| }, | |
| { | |
| "epoch": 2.691986124699813, | |
| "grad_norm": 0.01715932786464691, | |
| "learning_rate": 2.645205515043597e-07, | |
| "loss": 0.0001, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 2.6949509324320315, | |
| "grad_norm": 0.23892708122730255, | |
| "learning_rate": 2.5951191809746146e-07, | |
| "loss": 0.0004, | |
| "step": 45450 | |
| }, | |
| { | |
| "epoch": 2.6979157401642504, | |
| "grad_norm": 0.0025347827468067408, | |
| "learning_rate": 2.5454989411020926e-07, | |
| "loss": 0.0005, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 2.700880547896469, | |
| "grad_norm": 0.0021720200311392546, | |
| "learning_rate": 2.4963452833020876e-07, | |
| "loss": 0.0014, | |
| "step": 45550 | |
| }, | |
| { | |
| "epoch": 2.7038453556286877, | |
| "grad_norm": 0.007619790267199278, | |
| "learning_rate": 2.447658690863125e-07, | |
| "loss": 0.0002, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 2.706810163360906, | |
| "grad_norm": 0.009241044521331787, | |
| "learning_rate": 2.399439642481433e-07, | |
| "loss": 0.0019, | |
| "step": 45650 | |
| }, | |
| { | |
| "epoch": 2.7097749710931245, | |
| "grad_norm": 0.06682003289461136, | |
| "learning_rate": 2.3516886122562642e-07, | |
| "loss": 0.0012, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 2.7127397788253433, | |
| "grad_norm": 0.007818753831088543, | |
| "learning_rate": 2.3044060696852444e-07, | |
| "loss": 0.0008, | |
| "step": 45750 | |
| }, | |
| { | |
| "epoch": 2.7157045865575618, | |
| "grad_norm": 0.011879599653184414, | |
| "learning_rate": 2.2575924796596926e-07, | |
| "loss": 0.0003, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 2.71866939428978, | |
| "grad_norm": 0.007650415413081646, | |
| "learning_rate": 2.2112483024601228e-07, | |
| "loss": 0.0005, | |
| "step": 45850 | |
| }, | |
| { | |
| "epoch": 2.721634202021999, | |
| "grad_norm": 0.00841108150780201, | |
| "learning_rate": 2.165373993751696e-07, | |
| "loss": 0.0003, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 2.7245990097542174, | |
| "grad_norm": 0.007607500068843365, | |
| "learning_rate": 2.1199700045797077e-07, | |
| "loss": 0.0004, | |
| "step": 45950 | |
| }, | |
| { | |
| "epoch": 2.727563817486436, | |
| "grad_norm": 0.0019570747390389442, | |
| "learning_rate": 2.075036781365186e-07, | |
| "loss": 0.0003, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 2.7305286252186547, | |
| "grad_norm": 0.00809240061789751, | |
| "learning_rate": 2.0305747659005114e-07, | |
| "loss": 0.0002, | |
| "step": 46050 | |
| }, | |
| { | |
| "epoch": 2.733493432950873, | |
| "grad_norm": 0.004082467406988144, | |
| "learning_rate": 1.9865843953450424e-07, | |
| "loss": 0.0009, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 2.736458240683092, | |
| "grad_norm": 0.007133570034056902, | |
| "learning_rate": 1.9430661022208252e-07, | |
| "loss": 0.0003, | |
| "step": 46150 | |
| }, | |
| { | |
| "epoch": 2.7394230484153104, | |
| "grad_norm": 0.31264781951904297, | |
| "learning_rate": 1.9000203144083628e-07, | |
| "loss": 0.0003, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 2.742387856147529, | |
| "grad_norm": 0.23617079854011536, | |
| "learning_rate": 1.8574474551423804e-07, | |
| "loss": 0.0002, | |
| "step": 46250 | |
| }, | |
| { | |
| "epoch": 2.745352663879747, | |
| "grad_norm": 0.004803156014531851, | |
| "learning_rate": 1.815347943007678e-07, | |
| "loss": 0.0005, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 2.748317471611966, | |
| "grad_norm": 0.04453460872173309, | |
| "learning_rate": 1.7737221919350177e-07, | |
| "loss": 0.0007, | |
| "step": 46350 | |
| }, | |
| { | |
| "epoch": 2.7512822793441845, | |
| "grad_norm": 0.00791526585817337, | |
| "learning_rate": 1.732570611197043e-07, | |
| "loss": 0.0006, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 2.7542470870764033, | |
| "grad_norm": 0.016163982450962067, | |
| "learning_rate": 1.6918936054042656e-07, | |
| "loss": 0.0004, | |
| "step": 46450 | |
| }, | |
| { | |
| "epoch": 2.7572118948086217, | |
| "grad_norm": 0.0028755166567862034, | |
| "learning_rate": 1.651691574501074e-07, | |
| "loss": 0.0003, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 2.76017670254084, | |
| "grad_norm": 0.002072387607768178, | |
| "learning_rate": 1.611964913761821e-07, | |
| "loss": 0.0007, | |
| "step": 46550 | |
| }, | |
| { | |
| "epoch": 2.7631415102730585, | |
| "grad_norm": 0.013004027307033539, | |
| "learning_rate": 1.5727140137869134e-07, | |
| "loss": 0.0008, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 2.7661063180052774, | |
| "grad_norm": 0.008961477316915989, | |
| "learning_rate": 1.53393926049899e-07, | |
| "loss": 0.0006, | |
| "step": 46650 | |
| }, | |
| { | |
| "epoch": 2.769071125737496, | |
| "grad_norm": 0.015411244705319405, | |
| "learning_rate": 1.4956410351391281e-07, | |
| "loss": 0.0009, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 2.7720359334697147, | |
| "grad_norm": 0.059329330921173096, | |
| "learning_rate": 1.457819714263081e-07, | |
| "loss": 0.0004, | |
| "step": 46750 | |
| }, | |
| { | |
| "epoch": 2.775000741201933, | |
| "grad_norm": 0.007074225228279829, | |
| "learning_rate": 1.4204756697375698e-07, | |
| "loss": 0.0009, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 2.7779655489341515, | |
| "grad_norm": 0.36626216769218445, | |
| "learning_rate": 1.3836092687366575e-07, | |
| "loss": 0.0005, | |
| "step": 46850 | |
| }, | |
| { | |
| "epoch": 2.7809303566663703, | |
| "grad_norm": 0.3594434857368469, | |
| "learning_rate": 1.3472208737381198e-07, | |
| "loss": 0.0005, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 2.7838951643985888, | |
| "grad_norm": 0.027519946917891502, | |
| "learning_rate": 1.311310842519875e-07, | |
| "loss": 0.0003, | |
| "step": 46950 | |
| }, | |
| { | |
| "epoch": 2.786859972130807, | |
| "grad_norm": 0.12820805609226227, | |
| "learning_rate": 1.2758795281564763e-07, | |
| "loss": 0.0002, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 2.789824779863026, | |
| "grad_norm": 0.016581403091549873, | |
| "learning_rate": 1.2409272790156412e-07, | |
| "loss": 0.0001, | |
| "step": 47050 | |
| }, | |
| { | |
| "epoch": 2.7927895875952444, | |
| "grad_norm": 0.09786204993724823, | |
| "learning_rate": 1.2064544387548116e-07, | |
| "loss": 0.0003, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 2.795754395327463, | |
| "grad_norm": 0.00287329638376832, | |
| "learning_rate": 1.1724613463178047e-07, | |
| "loss": 0.0006, | |
| "step": 47150 | |
| }, | |
| { | |
| "epoch": 2.7987192030596817, | |
| "grad_norm": 0.003532948438078165, | |
| "learning_rate": 1.1389483359314502e-07, | |
| "loss": 0.0005, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 2.8016840107919, | |
| "grad_norm": 0.015650948509573936, | |
| "learning_rate": 1.1059157371023088e-07, | |
| "loss": 0.0006, | |
| "step": 47250 | |
| }, | |
| { | |
| "epoch": 2.804648818524119, | |
| "grad_norm": 0.0076681459322571754, | |
| "learning_rate": 1.0733638746134645e-07, | |
| "loss": 0.0007, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 2.8076136262563374, | |
| "grad_norm": 0.026479622349143028, | |
| "learning_rate": 1.0412930685212652e-07, | |
| "loss": 0.0008, | |
| "step": 47350 | |
| }, | |
| { | |
| "epoch": 2.810578433988556, | |
| "grad_norm": 0.002863786881789565, | |
| "learning_rate": 1.0097036341522703e-07, | |
| "loss": 0.0006, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 2.813543241720774, | |
| "grad_norm": 1.0755295753479004, | |
| "learning_rate": 9.785958821000418e-08, | |
| "loss": 0.0009, | |
| "step": 47450 | |
| }, | |
| { | |
| "epoch": 2.816508049452993, | |
| "grad_norm": 0.028524285182356834, | |
| "learning_rate": 9.479701182221912e-08, | |
| "loss": 0.0006, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 2.816508049452993, | |
| "eval_loss": 0.09689270704984665, | |
| "eval_runtime": 1776.8698, | |
| "eval_samples_per_second": 3.295, | |
| "eval_steps_per_second": 0.824, | |
| "eval_wer": 11.217016299978072, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 2.8194728571852115, | |
| "grad_norm": 0.05420482158660889, | |
| "learning_rate": 9.178266436372985e-08, | |
| "loss": 0.0004, | |
| "step": 47550 | |
| }, | |
| { | |
| "epoch": 2.8224376649174303, | |
| "grad_norm": 0.0069849551655352116, | |
| "learning_rate": 8.881657547219869e-08, | |
| "loss": 0.0002, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 2.8254024726496487, | |
| "grad_norm": 0.026374366134405136, | |
| "learning_rate": 8.58987743108003e-08, | |
| "loss": 0.0004, | |
| "step": 47650 | |
| }, | |
| { | |
| "epoch": 2.828367280381867, | |
| "grad_norm": 0.018248042091727257, | |
| "learning_rate": 8.302928956793576e-08, | |
| "loss": 0.0003, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 2.8313320881140855, | |
| "grad_norm": 1.0653489828109741, | |
| "learning_rate": 8.020814945694733e-08, | |
| "loss": 0.0007, | |
| "step": 47750 | |
| }, | |
| { | |
| "epoch": 2.8342968958463044, | |
| "grad_norm": 0.03135114163160324, | |
| "learning_rate": 7.743538171584464e-08, | |
| "loss": 0.0002, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 2.837261703578523, | |
| "grad_norm": 0.0379420705139637, | |
| "learning_rate": 7.471101360703115e-08, | |
| "loss": 0.0009, | |
| "step": 47850 | |
| }, | |
| { | |
| "epoch": 2.8402265113107417, | |
| "grad_norm": 0.007235648576170206, | |
| "learning_rate": 7.20350719170343e-08, | |
| "loss": 0.0007, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 2.84319131904296, | |
| "grad_norm": 0.14785419404506683, | |
| "learning_rate": 6.940758295624406e-08, | |
| "loss": 0.0005, | |
| "step": 47950 | |
| }, | |
| { | |
| "epoch": 2.8461561267751785, | |
| "grad_norm": 0.008452442474663258, | |
| "learning_rate": 6.687967751188796e-08, | |
| "loss": 0.0006, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 2.8491209345073973, | |
| "grad_norm": 0.01667088270187378, | |
| "learning_rate": 6.434820071100967e-08, | |
| "loss": 0.0006, | |
| "step": 48050 | |
| }, | |
| { | |
| "epoch": 2.8520857422396158, | |
| "grad_norm": 0.0059527806006371975, | |
| "learning_rate": 6.186525221818119e-08, | |
| "loss": 0.0006, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 2.855050549971834, | |
| "grad_norm": 0.00854993611574173, | |
| "learning_rate": 5.943085644624536e-08, | |
| "loss": 0.0007, | |
| "step": 48150 | |
| }, | |
| { | |
| "epoch": 2.858015357704053, | |
| "grad_norm": 0.6749946475028992, | |
| "learning_rate": 5.704503733066358e-08, | |
| "loss": 0.0009, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 2.8609801654362714, | |
| "grad_norm": 0.15325838327407837, | |
| "learning_rate": 5.470781832928374e-08, | |
| "loss": 0.0006, | |
| "step": 48250 | |
| }, | |
| { | |
| "epoch": 2.86394497316849, | |
| "grad_norm": 0.013085714541375637, | |
| "learning_rate": 5.241922242210595e-08, | |
| "loss": 0.0003, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 2.8669097809007087, | |
| "grad_norm": 0.04127352684736252, | |
| "learning_rate": 5.017927211106e-08, | |
| "loss": 0.001, | |
| "step": 48350 | |
| }, | |
| { | |
| "epoch": 2.869874588632927, | |
| "grad_norm": 0.28855589032173157, | |
| "learning_rate": 4.7987989419782665e-08, | |
| "loss": 0.0014, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 2.872839396365146, | |
| "grad_norm": 0.06962502002716064, | |
| "learning_rate": 4.584539589339854e-08, | |
| "loss": 0.0002, | |
| "step": 48450 | |
| }, | |
| { | |
| "epoch": 2.8758042040973644, | |
| "grad_norm": 0.00731350714340806, | |
| "learning_rate": 4.375151259831401e-08, | |
| "loss": 0.0004, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 2.878769011829583, | |
| "grad_norm": 0.025366390123963356, | |
| "learning_rate": 4.170636012200413e-08, | |
| "loss": 0.0003, | |
| "step": 48550 | |
| }, | |
| { | |
| "epoch": 2.881733819561801, | |
| "grad_norm": 0.011618382297456264, | |
| "learning_rate": 3.970995857281446e-08, | |
| "loss": 0.0005, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 2.88469862729402, | |
| "grad_norm": 0.001259606797248125, | |
| "learning_rate": 3.776232757976117e-08, | |
| "loss": 0.0009, | |
| "step": 48650 | |
| }, | |
| { | |
| "epoch": 2.8876634350262385, | |
| "grad_norm": 0.002274399623274803, | |
| "learning_rate": 3.586348629233849e-08, | |
| "loss": 0.0005, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 2.8906282427584573, | |
| "grad_norm": 0.009601451456546783, | |
| "learning_rate": 3.401345338033102e-08, | |
| "loss": 0.0005, | |
| "step": 48750 | |
| }, | |
| { | |
| "epoch": 2.8935930504906757, | |
| "grad_norm": 0.007640424184501171, | |
| "learning_rate": 3.2212247033629465e-08, | |
| "loss": 0.0004, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 2.896557858222894, | |
| "grad_norm": 0.0022892076522111893, | |
| "learning_rate": 3.045988496205243e-08, | |
| "loss": 0.0012, | |
| "step": 48850 | |
| }, | |
| { | |
| "epoch": 2.8995226659551125, | |
| "grad_norm": 0.005462869070470333, | |
| "learning_rate": 2.8756384395171032e-08, | |
| "loss": 0.0003, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 2.9024874736873314, | |
| "grad_norm": 0.008442184887826443, | |
| "learning_rate": 2.7101762082141216e-08, | |
| "loss": 0.0004, | |
| "step": 48950 | |
| }, | |
| { | |
| "epoch": 2.90545228141955, | |
| "grad_norm": 0.14164365828037262, | |
| "learning_rate": 2.549603429153835e-08, | |
| "loss": 0.0003, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 2.9084170891517687, | |
| "grad_norm": 0.1371513307094574, | |
| "learning_rate": 2.393921681119571e-08, | |
| "loss": 0.0005, | |
| "step": 49050 | |
| }, | |
| { | |
| "epoch": 2.911381896883987, | |
| "grad_norm": 0.01292746514081955, | |
| "learning_rate": 2.243132494805289e-08, | |
| "loss": 0.0005, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 2.9143467046162055, | |
| "grad_norm": 0.09286642074584961, | |
| "learning_rate": 2.0972373528000966e-08, | |
| "loss": 0.0006, | |
| "step": 49150 | |
| }, | |
| { | |
| "epoch": 2.9173115123484243, | |
| "grad_norm": 0.027824992313981056, | |
| "learning_rate": 1.9562376895740363e-08, | |
| "loss": 0.0007, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 2.9202763200806428, | |
| "grad_norm": 0.06678981333971024, | |
| "learning_rate": 1.820134891463765e-08, | |
| "loss": 0.0005, | |
| "step": 49250 | |
| }, | |
| { | |
| "epoch": 2.9232411278128616, | |
| "grad_norm": 0.007972050458192825, | |
| "learning_rate": 1.688930296659064e-08, | |
| "loss": 0.0004, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 2.92620593554508, | |
| "grad_norm": 0.016727251932024956, | |
| "learning_rate": 1.56262519518946e-08, | |
| "loss": 0.0004, | |
| "step": 49350 | |
| }, | |
| { | |
| "epoch": 2.9291707432772984, | |
| "grad_norm": 0.8780906796455383, | |
| "learning_rate": 1.4412208289118491e-08, | |
| "loss": 0.0007, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 2.932135551009517, | |
| "grad_norm": 0.006345132831484079, | |
| "learning_rate": 1.3247183914980033e-08, | |
| "loss": 0.0008, | |
| "step": 49450 | |
| }, | |
| { | |
| "epoch": 2.9351003587417357, | |
| "grad_norm": 0.5253104567527771, | |
| "learning_rate": 1.2131190284230266e-08, | |
| "loss": 0.0014, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 2.938065166473954, | |
| "grad_norm": 0.0056500621140003204, | |
| "learning_rate": 1.1064238369540292e-08, | |
| "loss": 0.0002, | |
| "step": 49550 | |
| }, | |
| { | |
| "epoch": 2.941029974206173, | |
| "grad_norm": 0.06418248265981674, | |
| "learning_rate": 1.0046338661392485e-08, | |
| "loss": 0.0008, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 2.9439947819383914, | |
| "grad_norm": 1.171954870223999, | |
| "learning_rate": 9.077501167979451e-09, | |
| "loss": 0.0013, | |
| "step": 49650 | |
| }, | |
| { | |
| "epoch": 2.94695958967061, | |
| "grad_norm": 0.10743585973978043, | |
| "learning_rate": 8.157735415103563e-09, | |
| "loss": 0.0004, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 2.949924397402828, | |
| "grad_norm": 0.0010991474846377969, | |
| "learning_rate": 7.2870504460842425e-09, | |
| "loss": 0.0006, | |
| "step": 49750 | |
| }, | |
| { | |
| "epoch": 2.952889205135047, | |
| "grad_norm": 0.0020814514718949795, | |
| "learning_rate": 6.465454821668604e-09, | |
| "loss": 0.0005, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 2.9558540128672655, | |
| "grad_norm": 0.1303500384092331, | |
| "learning_rate": 5.6929566199481755e-09, | |
| "loss": 0.0005, | |
| "step": 49850 | |
| }, | |
| { | |
| "epoch": 2.9588188205994843, | |
| "grad_norm": 0.008715493604540825, | |
| "learning_rate": 4.969563436278413e-09, | |
| "loss": 0.0001, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 2.9617836283317027, | |
| "grad_norm": 0.005461179651319981, | |
| "learning_rate": 4.295282383204868e-09, | |
| "loss": 0.0005, | |
| "step": 49950 | |
| }, | |
| { | |
| "epoch": 2.964748436063921, | |
| "grad_norm": 0.0482095368206501, | |
| "learning_rate": 3.6701200903921373e-09, | |
| "loss": 0.0005, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 2.964748436063921, | |
| "eval_loss": 0.09686783701181412, | |
| "eval_runtime": 1785.8542, | |
| "eval_samples_per_second": 3.279, | |
| "eval_steps_per_second": 0.82, | |
| "eval_wer": 11.62341934069147, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 2.9677132437961395, | |
| "grad_norm": 0.37676167488098145, | |
| "learning_rate": 3.094082704560575e-09, | |
| "loss": 0.0005, | |
| "step": 50050 | |
| }, | |
| { | |
| "epoch": 2.9706780515283584, | |
| "grad_norm": 0.004809459205716848, | |
| "learning_rate": 2.567175889424123e-09, | |
| "loss": 0.0002, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 2.973642859260577, | |
| "grad_norm": 0.024880768731236458, | |
| "learning_rate": 2.0894048256348e-09, | |
| "loss": 0.0004, | |
| "step": 50150 | |
| }, | |
| { | |
| "epoch": 2.9766076669927957, | |
| "grad_norm": 0.007508778013288975, | |
| "learning_rate": 1.6607742107327408e-09, | |
| "loss": 0.0009, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 2.979572474725014, | |
| "grad_norm": 0.007764613721519709, | |
| "learning_rate": 1.2812882590990116e-09, | |
| "loss": 0.0009, | |
| "step": 50250 | |
| }, | |
| { | |
| "epoch": 2.9825372824572325, | |
| "grad_norm": 0.04864663630723953, | |
| "learning_rate": 9.509507019145326e-10, | |
| "loss": 0.0005, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 2.9855020901894513, | |
| "grad_norm": 0.09490835666656494, | |
| "learning_rate": 6.697647871245494e-10, | |
| "loss": 0.0002, | |
| "step": 50350 | |
| }, | |
| { | |
| "epoch": 2.9884668979216698, | |
| "grad_norm": 0.039669957011938095, | |
| "learning_rate": 4.3773327940477285e-10, | |
| "loss": 0.0005, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 2.9914317056538886, | |
| "grad_norm": 0.461487352848053, | |
| "learning_rate": 2.5485846013362234e-10, | |
| "loss": 0.0007, | |
| "step": 50450 | |
| }, | |
| { | |
| "epoch": 2.994396513386107, | |
| "grad_norm": 0.10162738710641861, | |
| "learning_rate": 1.2114212737446286e-10, | |
| "loss": 0.0002, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 2.9973613211183254, | |
| "grad_norm": 0.023312032222747803, | |
| "learning_rate": 3.658559585173471e-11, | |
| "loss": 0.0012, | |
| "step": 50550 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 50595, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 2500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.3366413561430016e+20, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |