Invalid JSON:Unexpected token 'N', ..."ad_norm": NaN,
"... is not valid JSON
| { | |
| "best_global_step": 22000, | |
| "best_metric": 0.09801159451698542, | |
| "best_model_checkpoint": "w2v-bert-urmi-out-v3/checkpoint-22000", | |
| "epoch": 19.113814074717638, | |
| "eval_steps": 500, | |
| "global_step": 22000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.043440486533449174, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.9400000000000002e-06, | |
| "loss": 17.6344189453125, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.08688097306689835, | |
| "grad_norm": 41.381343841552734, | |
| "learning_rate": 5.940000000000001e-06, | |
| "loss": 8.653125, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.13032145960034752, | |
| "grad_norm": 25.690353393554688, | |
| "learning_rate": 8.939999999999999e-06, | |
| "loss": 6.376434936523437, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1737619461337967, | |
| "grad_norm": 150.42007446289062, | |
| "learning_rate": 1.1940000000000001e-05, | |
| "loss": 5.640718994140625, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.21720243266724587, | |
| "grad_norm": 31.25239372253418, | |
| "learning_rate": 1.4940000000000001e-05, | |
| "loss": 4.098101806640625, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.26064291920069504, | |
| "grad_norm": 36.728145599365234, | |
| "learning_rate": 1.794e-05, | |
| "loss": 2.84884033203125, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.3040834057341442, | |
| "grad_norm": 19.722448348999023, | |
| "learning_rate": 2.094e-05, | |
| "loss": 2.7515597534179688, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.3475238922675934, | |
| "grad_norm": 72.23578643798828, | |
| "learning_rate": 2.394e-05, | |
| "loss": 2.2729856872558596, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.39096437880104257, | |
| "grad_norm": 29.115379333496094, | |
| "learning_rate": 2.6940000000000003e-05, | |
| "loss": 2.120067443847656, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.43440486533449174, | |
| "grad_norm": 41.9510612487793, | |
| "learning_rate": 2.994e-05, | |
| "loss": 2.0917138671875, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.43440486533449174, | |
| "eval_cer": 0.2925177439453866, | |
| "eval_loss": 1.1169049739837646, | |
| "eval_runtime": 41.0307, | |
| "eval_samples_per_second": 24.323, | |
| "eval_steps_per_second": 12.162, | |
| "eval_wer": 0.860224586288416, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4778453518679409, | |
| "grad_norm": 56.66565704345703, | |
| "learning_rate": 2.995680282104026e-05, | |
| "loss": 2.0159649658203125, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.5212858384013901, | |
| "grad_norm": 93.8177261352539, | |
| "learning_rate": 2.9912724066999708e-05, | |
| "loss": 2.4505059814453123, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5647263249348393, | |
| "grad_norm": 21.54208755493164, | |
| "learning_rate": 2.9868645312959155e-05, | |
| "loss": 1.9825759887695313, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.6081668114682884, | |
| "grad_norm": 67.06742095947266, | |
| "learning_rate": 2.9824566558918603e-05, | |
| "loss": 1.764338836669922, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.6516072980017377, | |
| "grad_norm": 52.83509063720703, | |
| "learning_rate": 2.978048780487805e-05, | |
| "loss": 1.8609919738769531, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.6950477845351868, | |
| "grad_norm": 19.600238800048828, | |
| "learning_rate": 2.9736409050837498e-05, | |
| "loss": 1.800018310546875, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.738488271068636, | |
| "grad_norm": 131.72647094726562, | |
| "learning_rate": 2.9692330296796945e-05, | |
| "loss": 1.884906005859375, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.7819287576020851, | |
| "grad_norm": 90.5487060546875, | |
| "learning_rate": 2.9648251542756393e-05, | |
| "loss": 1.8686543273925782, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.8253692441355344, | |
| "grad_norm": 109.63233184814453, | |
| "learning_rate": 2.960417278871584e-05, | |
| "loss": 1.5970869445800782, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.8688097306689835, | |
| "grad_norm": 33.57727813720703, | |
| "learning_rate": 2.9560094034675285e-05, | |
| "loss": 1.7014007568359375, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.8688097306689835, | |
| "eval_cer": 0.2367123584547868, | |
| "eval_loss": 0.8549327850341797, | |
| "eval_runtime": 36.6894, | |
| "eval_samples_per_second": 27.201, | |
| "eval_steps_per_second": 13.601, | |
| "eval_wer": 0.7121749408983451, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9122502172024327, | |
| "grad_norm": 43.89970016479492, | |
| "learning_rate": 2.9516015280634735e-05, | |
| "loss": 1.5748690795898437, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.9556907037358818, | |
| "grad_norm": 26.835451126098633, | |
| "learning_rate": 2.947193652659418e-05, | |
| "loss": 1.4466169738769532, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.9991311902693311, | |
| "grad_norm": 56.00555419921875, | |
| "learning_rate": 2.942785777255363e-05, | |
| "loss": 1.6500205993652344, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.0425716768027802, | |
| "grad_norm": 14.68392562866211, | |
| "learning_rate": 2.9383779018513075e-05, | |
| "loss": 1.589685821533203, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.0860121633362294, | |
| "grad_norm": 43.6255989074707, | |
| "learning_rate": 2.9339700264472526e-05, | |
| "loss": 1.181229705810547, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.1294526498696786, | |
| "grad_norm": 73.56681060791016, | |
| "learning_rate": 2.929562151043197e-05, | |
| "loss": 1.37734619140625, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.1728931364031276, | |
| "grad_norm": 12.448647499084473, | |
| "learning_rate": 2.925154275639142e-05, | |
| "loss": 1.4884031677246095, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.2163336229365769, | |
| "grad_norm": 68.00922393798828, | |
| "learning_rate": 2.9207464002350868e-05, | |
| "loss": 1.596350860595703, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.259774109470026, | |
| "grad_norm": 39.86298370361328, | |
| "learning_rate": 2.9163385248310316e-05, | |
| "loss": 1.3065278625488281, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.3032145960034751, | |
| "grad_norm": 15.449691772460938, | |
| "learning_rate": 2.9119306494269763e-05, | |
| "loss": 1.1519753265380859, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.3032145960034751, | |
| "eval_cer": 0.2101099853714038, | |
| "eval_loss": 0.790473997592926, | |
| "eval_runtime": 35.6641, | |
| "eval_samples_per_second": 27.983, | |
| "eval_steps_per_second": 13.992, | |
| "eval_wer": 0.6616430260047281, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.3466550825369243, | |
| "grad_norm": 24.626739501953125, | |
| "learning_rate": 2.907522774022921e-05, | |
| "loss": 1.427688751220703, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.3900955690703736, | |
| "grad_norm": 17.682024002075195, | |
| "learning_rate": 2.9031148986188658e-05, | |
| "loss": 1.4333070373535157, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.4335360556038228, | |
| "grad_norm": 23.89002227783203, | |
| "learning_rate": 2.8987070232148106e-05, | |
| "loss": 1.2861351013183593, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.476976542137272, | |
| "grad_norm": 30.092050552368164, | |
| "learning_rate": 2.8942991478107553e-05, | |
| "loss": 1.256303176879883, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.520417028670721, | |
| "grad_norm": 18.956981658935547, | |
| "learning_rate": 2.8898912724067e-05, | |
| "loss": 1.2424105834960937, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.5638575152041703, | |
| "grad_norm": 56.31697082519531, | |
| "learning_rate": 2.8854833970026448e-05, | |
| "loss": 1.1252889251708984, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.6072980017376195, | |
| "grad_norm": 42.870338439941406, | |
| "learning_rate": 2.8810755215985896e-05, | |
| "loss": 1.0607293701171876, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.6507384882710685, | |
| "grad_norm": 61.81471633911133, | |
| "learning_rate": 2.8766676461945343e-05, | |
| "loss": 1.077663116455078, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.694178974804518, | |
| "grad_norm": 14.434207916259766, | |
| "learning_rate": 2.872259770790479e-05, | |
| "loss": 1.0206593322753905, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.737619461337967, | |
| "grad_norm": 42.81059265136719, | |
| "learning_rate": 2.8678518953864238e-05, | |
| "loss": 1.1406269836425782, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.737619461337967, | |
| "eval_cer": 0.18090697296418703, | |
| "eval_loss": 0.7465401887893677, | |
| "eval_runtime": 35.7694, | |
| "eval_samples_per_second": 27.901, | |
| "eval_steps_per_second": 13.95, | |
| "eval_wer": 0.5706264775413712, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.7810599478714162, | |
| "grad_norm": 10.328418731689453, | |
| "learning_rate": 2.8634440199823686e-05, | |
| "loss": 1.0407346343994142, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.8245004344048654, | |
| "grad_norm": 57.201133728027344, | |
| "learning_rate": 2.8590361445783133e-05, | |
| "loss": 1.36724365234375, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.8679409209383144, | |
| "grad_norm": 63.23184585571289, | |
| "learning_rate": 2.854628269174258e-05, | |
| "loss": 1.1011062622070313, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.9113814074717637, | |
| "grad_norm": 12.780070304870605, | |
| "learning_rate": 2.850220393770203e-05, | |
| "loss": 1.0486875915527343, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.954821894005213, | |
| "grad_norm": 15.590168952941895, | |
| "learning_rate": 2.8458125183661476e-05, | |
| "loss": 1.4535511779785155, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.998262380538662, | |
| "grad_norm": 23.453882217407227, | |
| "learning_rate": 2.8414046429620923e-05, | |
| "loss": 1.2782644653320312, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.0417028670721113, | |
| "grad_norm": 96.71955108642578, | |
| "learning_rate": 2.836996767558037e-05, | |
| "loss": 0.843255615234375, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.0851433536055604, | |
| "grad_norm": 17.284881591796875, | |
| "learning_rate": 2.832588892153982e-05, | |
| "loss": 0.7496602630615234, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.1285838401390094, | |
| "grad_norm": 19.61467742919922, | |
| "learning_rate": 2.8281810167499266e-05, | |
| "loss": 0.8246018218994141, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.172024326672459, | |
| "grad_norm": 20.361276626586914, | |
| "learning_rate": 2.8237731413458713e-05, | |
| "loss": 1.0796304321289063, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.172024326672459, | |
| "eval_cer": 0.16687435661266728, | |
| "eval_loss": 0.6873559951782227, | |
| "eval_runtime": 35.5627, | |
| "eval_samples_per_second": 28.063, | |
| "eval_steps_per_second": 14.032, | |
| "eval_wer": 0.5325059101654847, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.215464813205908, | |
| "grad_norm": 41.729736328125, | |
| "learning_rate": 2.819365265941816e-05, | |
| "loss": 0.8797608947753907, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 2.2589052997393573, | |
| "grad_norm": 11.56946086883545, | |
| "learning_rate": 2.814957390537761e-05, | |
| "loss": 0.9233754730224609, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.3023457862728063, | |
| "grad_norm": 13.053935050964355, | |
| "learning_rate": 2.8105495151337056e-05, | |
| "loss": 1.0405730438232421, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 2.3457862728062553, | |
| "grad_norm": 27.34178352355957, | |
| "learning_rate": 2.8061416397296503e-05, | |
| "loss": 0.9149618530273438, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.3892267593397047, | |
| "grad_norm": 32.077274322509766, | |
| "learning_rate": 2.801733764325595e-05, | |
| "loss": 0.9278230285644531, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 2.4326672458731537, | |
| "grad_norm": 14.97318172454834, | |
| "learning_rate": 2.79732588892154e-05, | |
| "loss": 1.0948815155029297, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.4761077324066028, | |
| "grad_norm": 0.24952514469623566, | |
| "learning_rate": 2.7929180135174846e-05, | |
| "loss": 0.7527609252929688, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 2.519548218940052, | |
| "grad_norm": 42.21710205078125, | |
| "learning_rate": 2.7885101381134294e-05, | |
| "loss": 0.8132963562011719, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.562988705473501, | |
| "grad_norm": 10.806293487548828, | |
| "learning_rate": 2.784102262709374e-05, | |
| "loss": 1.0475637817382812, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 2.6064291920069502, | |
| "grad_norm": 24.606548309326172, | |
| "learning_rate": 2.779694387305319e-05, | |
| "loss": 0.8934781646728516, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.6064291920069502, | |
| "eval_cer": 0.20637156634339274, | |
| "eval_loss": 0.6298205256462097, | |
| "eval_runtime": 35.3882, | |
| "eval_samples_per_second": 28.201, | |
| "eval_steps_per_second": 14.101, | |
| "eval_wer": 0.567080378250591, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.6498696785403997, | |
| "grad_norm": 34.39348220825195, | |
| "learning_rate": 2.7752865119012636e-05, | |
| "loss": 0.920788803100586, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 2.6933101650738487, | |
| "grad_norm": 67.26911163330078, | |
| "learning_rate": 2.7708786364972084e-05, | |
| "loss": 0.8773422241210938, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.736750651607298, | |
| "grad_norm": 62.36620330810547, | |
| "learning_rate": 2.766470761093153e-05, | |
| "loss": 1.0072268676757812, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 2.780191138140747, | |
| "grad_norm": 28.642549514770508, | |
| "learning_rate": 2.7620628856890982e-05, | |
| "loss": 1.3133396911621094, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.8236316246741966, | |
| "grad_norm": 20.24125862121582, | |
| "learning_rate": 2.7576550102850426e-05, | |
| "loss": 0.9312178802490234, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.8670721112076456, | |
| "grad_norm": 87.64716339111328, | |
| "learning_rate": 2.7532471348809877e-05, | |
| "loss": 0.9268650817871094, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.9105125977410946, | |
| "grad_norm": 10.313736915588379, | |
| "learning_rate": 2.748839259476932e-05, | |
| "loss": 0.8256442260742187, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.953953084274544, | |
| "grad_norm": 31.871875762939453, | |
| "learning_rate": 2.7444313840728772e-05, | |
| "loss": 1.0591201782226562, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.997393570807993, | |
| "grad_norm": 51.40370559692383, | |
| "learning_rate": 2.7400235086688216e-05, | |
| "loss": 1.0520962524414061, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 3.040834057341442, | |
| "grad_norm": 7.437458515167236, | |
| "learning_rate": 2.7356156332647667e-05, | |
| "loss": 0.7271649169921875, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.040834057341442, | |
| "eval_cer": 0.15457549981037005, | |
| "eval_loss": 0.6895098090171814, | |
| "eval_runtime": 35.2334, | |
| "eval_samples_per_second": 28.325, | |
| "eval_steps_per_second": 14.163, | |
| "eval_wer": 0.5041371158392435, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.0842745438748915, | |
| "grad_norm": 2.256615161895752, | |
| "learning_rate": 2.731207757860711e-05, | |
| "loss": 0.6253271865844726, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 3.1277150304083405, | |
| "grad_norm": 24.19891357421875, | |
| "learning_rate": 2.7267998824566562e-05, | |
| "loss": 0.8554808807373047, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 3.1711555169417895, | |
| "grad_norm": 25.919506072998047, | |
| "learning_rate": 2.7223920070526006e-05, | |
| "loss": 0.7264094543457031, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 3.214596003475239, | |
| "grad_norm": 0.6518918871879578, | |
| "learning_rate": 2.7179841316485454e-05, | |
| "loss": 0.633333511352539, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 3.258036490008688, | |
| "grad_norm": 36.61137390136719, | |
| "learning_rate": 2.71357625624449e-05, | |
| "loss": 0.8081251525878906, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 3.3014769765421375, | |
| "grad_norm": 8.081766128540039, | |
| "learning_rate": 2.709168380840435e-05, | |
| "loss": 0.8773213195800781, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 3.3449174630755865, | |
| "grad_norm": 24.704824447631836, | |
| "learning_rate": 2.7047605054363796e-05, | |
| "loss": 0.8086146545410157, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 3.3883579496090355, | |
| "grad_norm": 0.8145921230316162, | |
| "learning_rate": 2.7003526300323244e-05, | |
| "loss": 0.6982787322998046, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 3.431798436142485, | |
| "grad_norm": 37.42679214477539, | |
| "learning_rate": 2.695944754628269e-05, | |
| "loss": 0.7531932067871093, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 3.475238922675934, | |
| "grad_norm": 32.73085403442383, | |
| "learning_rate": 2.691536879224214e-05, | |
| "loss": 0.6939554595947266, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.475238922675934, | |
| "eval_cer": 0.1360459446280544, | |
| "eval_loss": 0.6202276349067688, | |
| "eval_runtime": 34.9968, | |
| "eval_samples_per_second": 28.517, | |
| "eval_steps_per_second": 14.258, | |
| "eval_wer": 0.42671394799054374, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.5186794092093834, | |
| "grad_norm": 0.5364285111427307, | |
| "learning_rate": 2.6871290038201586e-05, | |
| "loss": 0.7825308227539063, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 3.5621198957428324, | |
| "grad_norm": 26.980627059936523, | |
| "learning_rate": 2.6827211284161034e-05, | |
| "loss": 0.7656624603271485, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 3.6055603822762814, | |
| "grad_norm": 10.756477355957031, | |
| "learning_rate": 2.6783132530120485e-05, | |
| "loss": 0.7668492889404297, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 3.649000868809731, | |
| "grad_norm": 13.8463773727417, | |
| "learning_rate": 2.673905377607993e-05, | |
| "loss": 0.7058528900146485, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 3.69244135534318, | |
| "grad_norm": 18.059154510498047, | |
| "learning_rate": 2.669497502203938e-05, | |
| "loss": 0.7425822448730469, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 3.735881841876629, | |
| "grad_norm": 14.087454795837402, | |
| "learning_rate": 2.6650896267998824e-05, | |
| "loss": 0.7796754455566406, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 3.7793223284100783, | |
| "grad_norm": 4.631764888763428, | |
| "learning_rate": 2.6606817513958275e-05, | |
| "loss": 0.7211798095703125, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 3.8227628149435273, | |
| "grad_norm": 0.7707765698432922, | |
| "learning_rate": 2.656273875991772e-05, | |
| "loss": 0.6928179168701172, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 3.8662033014769763, | |
| "grad_norm": 0.21713215112686157, | |
| "learning_rate": 2.651866000587717e-05, | |
| "loss": 0.7452503204345703, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 3.909643788010426, | |
| "grad_norm": 19.084728240966797, | |
| "learning_rate": 2.6474581251836614e-05, | |
| "loss": 0.6911500549316406, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.909643788010426, | |
| "eval_cer": 0.14384786259955573, | |
| "eval_loss": 0.6342427730560303, | |
| "eval_runtime": 35.3212, | |
| "eval_samples_per_second": 28.255, | |
| "eval_steps_per_second": 14.128, | |
| "eval_wer": 0.4435579196217494, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.953084274543875, | |
| "grad_norm": 0.5245521068572998, | |
| "learning_rate": 2.6430502497796065e-05, | |
| "loss": 0.6018388748168946, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 3.996524761077324, | |
| "grad_norm": 22.038259506225586, | |
| "learning_rate": 2.638642374375551e-05, | |
| "loss": 0.5375812149047852, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 4.039965247610773, | |
| "grad_norm": 11.167423248291016, | |
| "learning_rate": 2.634234498971496e-05, | |
| "loss": 0.5369546508789063, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 4.083405734144223, | |
| "grad_norm": 0.10995540767908096, | |
| "learning_rate": 2.6298266235674404e-05, | |
| "loss": 0.6637758636474609, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 4.126846220677671, | |
| "grad_norm": 596.5354614257812, | |
| "learning_rate": 2.6254187481633855e-05, | |
| "loss": 0.5059263610839844, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 4.170286707211121, | |
| "grad_norm": 49.69171905517578, | |
| "learning_rate": 2.62101087275933e-05, | |
| "loss": 0.5901547622680664, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 4.21372719374457, | |
| "grad_norm": 0.014338035136461258, | |
| "learning_rate": 2.616602997355275e-05, | |
| "loss": 0.6108988571166992, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 4.257167680278019, | |
| "grad_norm": 0.33344972133636475, | |
| "learning_rate": 2.6121951219512194e-05, | |
| "loss": 0.5445775985717773, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 4.300608166811468, | |
| "grad_norm": 0.7429609298706055, | |
| "learning_rate": 2.6077872465471645e-05, | |
| "loss": 0.4996451187133789, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 4.344048653344918, | |
| "grad_norm": 0.05325142666697502, | |
| "learning_rate": 2.603379371143109e-05, | |
| "loss": 0.5604157257080078, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 4.344048653344918, | |
| "eval_cer": 0.1346914449802243, | |
| "eval_loss": 0.5602818727493286, | |
| "eval_runtime": 35.3794, | |
| "eval_samples_per_second": 28.209, | |
| "eval_steps_per_second": 14.104, | |
| "eval_wer": 0.4231678486997636, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 4.387489139878367, | |
| "grad_norm": 0.4052943289279938, | |
| "learning_rate": 2.5989714957390537e-05, | |
| "loss": 0.571678810119629, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 4.430929626411816, | |
| "grad_norm": 7.114663600921631, | |
| "learning_rate": 2.5945636203349988e-05, | |
| "loss": 0.5940496826171875, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 4.474370112945265, | |
| "grad_norm": 3.226045846939087, | |
| "learning_rate": 2.5901557449309432e-05, | |
| "loss": 0.48196929931640625, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 4.5178105994787146, | |
| "grad_norm": 14.2632474899292, | |
| "learning_rate": 2.5857478695268883e-05, | |
| "loss": 0.4521299362182617, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 4.561251086012163, | |
| "grad_norm": 11.746747016906738, | |
| "learning_rate": 2.5813399941228327e-05, | |
| "loss": 0.6751963806152343, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 4.604691572545613, | |
| "grad_norm": 16.07468605041504, | |
| "learning_rate": 2.5769321187187778e-05, | |
| "loss": 0.45375862121582033, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 4.648132059079062, | |
| "grad_norm": 11.938125610351562, | |
| "learning_rate": 2.5725242433147222e-05, | |
| "loss": 0.5193147277832031, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 4.691572545612511, | |
| "grad_norm": 14.44975757598877, | |
| "learning_rate": 2.5681163679106673e-05, | |
| "loss": 0.5906137084960937, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 4.73501303214596, | |
| "grad_norm": 0.14901815354824066, | |
| "learning_rate": 2.5637084925066117e-05, | |
| "loss": 0.5064856338500977, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 4.7784535186794095, | |
| "grad_norm": 0.9449958801269531, | |
| "learning_rate": 2.5593006171025568e-05, | |
| "loss": 0.6325591278076171, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 4.7784535186794095, | |
| "eval_cer": 0.1300319661916888, | |
| "eval_loss": 0.5885463356971741, | |
| "eval_runtime": 35.2148, | |
| "eval_samples_per_second": 28.34, | |
| "eval_steps_per_second": 14.17, | |
| "eval_wer": 0.4078014184397163, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 4.821894005212858, | |
| "grad_norm": 0.016951393336057663, | |
| "learning_rate": 2.5548927416985012e-05, | |
| "loss": 0.5301705551147461, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 4.8653344917463075, | |
| "grad_norm": 31.289724349975586, | |
| "learning_rate": 2.5504848662944463e-05, | |
| "loss": 0.4865913009643555, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 4.908774978279757, | |
| "grad_norm": 0.2750867009162903, | |
| "learning_rate": 2.5460769908903907e-05, | |
| "loss": 0.6053089523315429, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 4.9522154648132055, | |
| "grad_norm": 0.15572036802768707, | |
| "learning_rate": 2.5416691154863358e-05, | |
| "loss": 0.44040061950683596, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 4.995655951346655, | |
| "grad_norm": 1.536003828048706, | |
| "learning_rate": 2.5372612400822802e-05, | |
| "loss": 0.6176298141479493, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 5.039096437880104, | |
| "grad_norm": 3.888091564178467, | |
| "learning_rate": 2.5328533646782253e-05, | |
| "loss": 0.6050854873657227, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 5.082536924413553, | |
| "grad_norm": 1.9056124687194824, | |
| "learning_rate": 2.5284454892741697e-05, | |
| "loss": 0.5753683090209961, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 5.125977410947002, | |
| "grad_norm": 0.10637835413217545, | |
| "learning_rate": 2.5240376138701148e-05, | |
| "loss": 0.47484302520751953, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 5.169417897480452, | |
| "grad_norm": 4.4535441398620605, | |
| "learning_rate": 2.5196297384660595e-05, | |
| "loss": 0.34999225616455076, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 5.212858384013901, | |
| "grad_norm": 0.6373205780982971, | |
| "learning_rate": 2.5152218630620043e-05, | |
| "loss": 0.38846492767333984, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 5.212858384013901, | |
| "eval_cer": 0.1255350273608929, | |
| "eval_loss": 0.6293100118637085, | |
| "eval_runtime": 35.1024, | |
| "eval_samples_per_second": 28.431, | |
| "eval_steps_per_second": 14.216, | |
| "eval_wer": 0.3983451536643026, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 5.25629887054735, | |
| "grad_norm": 0.3430880010128021, | |
| "learning_rate": 2.510813987657949e-05, | |
| "loss": 0.3233113479614258, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 5.299739357080799, | |
| "grad_norm": 0.023547176271677017, | |
| "learning_rate": 2.5064061122538938e-05, | |
| "loss": 0.5810712432861328, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 5.343179843614249, | |
| "grad_norm": 0.45001161098480225, | |
| "learning_rate": 2.5019982368498385e-05, | |
| "loss": 0.31497194290161135, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 5.386620330147697, | |
| "grad_norm": 0.07451729476451874, | |
| "learning_rate": 2.4975903614457833e-05, | |
| "loss": 0.3424281311035156, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 5.430060816681147, | |
| "grad_norm": 102.05135345458984, | |
| "learning_rate": 2.493182486041728e-05, | |
| "loss": 0.42556037902832033, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 5.473501303214596, | |
| "grad_norm": 1.4394115209579468, | |
| "learning_rate": 2.4887746106376728e-05, | |
| "loss": 0.39192684173583986, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 5.516941789748045, | |
| "grad_norm": 2.0899856090545654, | |
| "learning_rate": 2.4843667352336176e-05, | |
| "loss": 0.4351010513305664, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 5.560382276281494, | |
| "grad_norm": 0.12065482884645462, | |
| "learning_rate": 2.479958859829562e-05, | |
| "loss": 0.508093376159668, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 5.603822762814944, | |
| "grad_norm": 0.041007447987794876, | |
| "learning_rate": 2.475550984425507e-05, | |
| "loss": 0.4111709213256836, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 5.647263249348393, | |
| "grad_norm": 14.792854309082031, | |
| "learning_rate": 2.4711431090214515e-05, | |
| "loss": 0.34780517578125, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 5.647263249348393, | |
| "eval_cer": 0.1270520669664626, | |
| "eval_loss": 0.6646775007247925, | |
| "eval_runtime": 35.5266, | |
| "eval_samples_per_second": 28.092, | |
| "eval_steps_per_second": 14.046, | |
| "eval_wer": 0.3980496453900709, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 5.690703735881842, | |
| "grad_norm": 28.922000885009766, | |
| "learning_rate": 2.4667352336173966e-05, | |
| "loss": 0.4107795715332031, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 5.734144222415291, | |
| "grad_norm": 0.07848715782165527, | |
| "learning_rate": 2.462327358213341e-05, | |
| "loss": 0.5832571029663086, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 5.777584708948741, | |
| "grad_norm": 19.316383361816406, | |
| "learning_rate": 2.457919482809286e-05, | |
| "loss": 0.41104129791259764, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 5.821025195482189, | |
| "grad_norm": 0.20225679874420166, | |
| "learning_rate": 2.4535116074052305e-05, | |
| "loss": 0.4999349975585938, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 5.864465682015639, | |
| "grad_norm": 0.04317609593272209, | |
| "learning_rate": 2.4491037320011756e-05, | |
| "loss": 0.5584917449951172, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 5.907906168549088, | |
| "grad_norm": 0.3524606227874756, | |
| "learning_rate": 2.44469585659712e-05, | |
| "loss": 0.4921522521972656, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 5.951346655082537, | |
| "grad_norm": 29.436384201049805, | |
| "learning_rate": 2.440287981193065e-05, | |
| "loss": 0.5514765548706054, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 5.994787141615986, | |
| "grad_norm": 0.23278824985027313, | |
| "learning_rate": 2.4358801057890098e-05, | |
| "loss": 0.3556842422485352, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 6.038227628149436, | |
| "grad_norm": 0.08552414178848267, | |
| "learning_rate": 2.4314722303849546e-05, | |
| "loss": 0.33310401916503907, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 6.081668114682884, | |
| "grad_norm": 11.057211875915527, | |
| "learning_rate": 2.4270643549808993e-05, | |
| "loss": 0.36625064849853517, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 6.081668114682884, | |
| "eval_cer": 0.12483068754402124, | |
| "eval_loss": 0.6519187688827515, | |
| "eval_runtime": 35.4455, | |
| "eval_samples_per_second": 28.156, | |
| "eval_steps_per_second": 14.078, | |
| "eval_wer": 0.3945035460992908, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 6.125108601216334, | |
| "grad_norm": 0.06223779171705246, | |
| "learning_rate": 2.422656479576844e-05, | |
| "loss": 0.4004500198364258, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 6.168549087749783, | |
| "grad_norm": 0.009129839017987251, | |
| "learning_rate": 2.4182486041727888e-05, | |
| "loss": 0.26837165832519533, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 6.211989574283232, | |
| "grad_norm": 1.0068172216415405, | |
| "learning_rate": 2.4138407287687336e-05, | |
| "loss": 0.3842990112304687, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 6.255430060816681, | |
| "grad_norm": 0.03263875097036362, | |
| "learning_rate": 2.4094328533646783e-05, | |
| "loss": 0.46779460906982423, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 6.2988705473501305, | |
| "grad_norm": 0.025848915800452232, | |
| "learning_rate": 2.405024977960623e-05, | |
| "loss": 0.46671478271484373, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 6.342311033883579, | |
| "grad_norm": 0.032335590571165085, | |
| "learning_rate": 2.4006171025565678e-05, | |
| "loss": 0.2948387336730957, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 6.3857515204170285, | |
| "grad_norm": 0.07902107387781143, | |
| "learning_rate": 2.3962092271525126e-05, | |
| "loss": 0.2986873435974121, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 6.429192006950478, | |
| "grad_norm": 1.8951733112335205, | |
| "learning_rate": 2.3918013517484573e-05, | |
| "loss": 0.48029232025146484, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 6.4726324934839266, | |
| "grad_norm": 0.1298227608203888, | |
| "learning_rate": 2.387393476344402e-05, | |
| "loss": 0.45991172790527346, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 6.516072980017376, | |
| "grad_norm": 8.462530136108398, | |
| "learning_rate": 2.382985600940347e-05, | |
| "loss": 0.3584669876098633, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 6.516072980017376, | |
| "eval_cer": 0.12033374871322533, | |
| "eval_loss": 0.6832783818244934, | |
| "eval_runtime": 35.4555, | |
| "eval_samples_per_second": 28.148, | |
| "eval_steps_per_second": 14.074, | |
| "eval_wer": 0.3844562647754137, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 6.5595134665508255, | |
| "grad_norm": 0.03215891495347023, | |
| "learning_rate": 2.3785777255362916e-05, | |
| "loss": 0.3052578163146973, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 6.602953953084275, | |
| "grad_norm": 3.956105947494507, | |
| "learning_rate": 2.3741698501322363e-05, | |
| "loss": 0.3200105667114258, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 6.6463944396177235, | |
| "grad_norm": 10.573678016662598, | |
| "learning_rate": 2.369761974728181e-05, | |
| "loss": 0.3665552520751953, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 6.689834926151173, | |
| "grad_norm": 2.567551374435425, | |
| "learning_rate": 2.365354099324126e-05, | |
| "loss": 0.3944419479370117, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 6.733275412684622, | |
| "grad_norm": 2.1139237880706787, | |
| "learning_rate": 2.3609462239200703e-05, | |
| "loss": 0.36841018676757814, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 6.776715899218071, | |
| "grad_norm": 0.01942128874361515, | |
| "learning_rate": 2.3565383485160153e-05, | |
| "loss": 0.3560383987426758, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 6.82015638575152, | |
| "grad_norm": 1.474857211112976, | |
| "learning_rate": 2.35213047311196e-05, | |
| "loss": 0.46931259155273436, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 6.86359687228497, | |
| "grad_norm": 24.063940048217773, | |
| "learning_rate": 2.347722597707905e-05, | |
| "loss": 0.3131961250305176, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 6.907037358818418, | |
| "grad_norm": 0.024980274960398674, | |
| "learning_rate": 2.3433147223038496e-05, | |
| "loss": 0.3744655609130859, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 6.950477845351868, | |
| "grad_norm": 19.342248916625977, | |
| "learning_rate": 2.3389068468997944e-05, | |
| "loss": 0.38378406524658204, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 6.950477845351868, | |
| "eval_cer": 0.14720702172617436, | |
| "eval_loss": 0.6338760852813721, | |
| "eval_runtime": 35.924, | |
| "eval_samples_per_second": 27.781, | |
| "eval_steps_per_second": 13.89, | |
| "eval_wer": 0.40573286052009455, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 6.993918331885317, | |
| "grad_norm": 3.440767765045166, | |
| "learning_rate": 2.334498971495739e-05, | |
| "loss": 0.44374298095703124, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 7.037358818418766, | |
| "grad_norm": 78.23323822021484, | |
| "learning_rate": 2.330091096091684e-05, | |
| "loss": 0.4143082809448242, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 7.080799304952215, | |
| "grad_norm": 20.11145782470703, | |
| "learning_rate": 2.3256832206876286e-05, | |
| "loss": 0.2681180191040039, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 7.124239791485665, | |
| "grad_norm": 4.221235275268555, | |
| "learning_rate": 2.3212753452835734e-05, | |
| "loss": 0.33470783233642576, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 7.167680278019114, | |
| "grad_norm": 0.00418456643819809, | |
| "learning_rate": 2.316867469879518e-05, | |
| "loss": 0.29219053268432615, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 7.211120764552563, | |
| "grad_norm": 48.96384048461914, | |
| "learning_rate": 2.312459594475463e-05, | |
| "loss": 0.2650064277648926, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 7.254561251086012, | |
| "grad_norm": 0.4012812077999115, | |
| "learning_rate": 2.3080517190714076e-05, | |
| "loss": 0.2377411651611328, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 7.298001737619462, | |
| "grad_norm": 0.04035955294966698, | |
| "learning_rate": 2.3036438436673524e-05, | |
| "loss": 0.39625030517578125, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 7.34144222415291, | |
| "grad_norm": 0.015255268663167953, | |
| "learning_rate": 2.299235968263297e-05, | |
| "loss": 0.29354951858520506, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 7.38488271068636, | |
| "grad_norm": 0.1737648993730545, | |
| "learning_rate": 2.294828092859242e-05, | |
| "loss": 0.43962146759033205, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 7.38488271068636, | |
| "eval_cer": 0.11811236929078399, | |
| "eval_loss": 0.6835731863975525, | |
| "eval_runtime": 35.5953, | |
| "eval_samples_per_second": 28.037, | |
| "eval_steps_per_second": 14.019, | |
| "eval_wer": 0.37056737588652483, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 7.428323197219809, | |
| "grad_norm": 26.738134384155273, | |
| "learning_rate": 2.2904202174551866e-05, | |
| "loss": 0.32676326751708984, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 7.471763683753258, | |
| "grad_norm": 91.86631774902344, | |
| "learning_rate": 2.2860123420511314e-05, | |
| "loss": 0.27230093002319333, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 7.515204170286707, | |
| "grad_norm": 0.2025415152311325, | |
| "learning_rate": 2.281604466647076e-05, | |
| "loss": 0.34510005950927736, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 7.558644656820157, | |
| "grad_norm": 0.06521395593881607, | |
| "learning_rate": 2.2771965912430212e-05, | |
| "loss": 0.31739959716796873, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 7.602085143353605, | |
| "grad_norm": 0.023135656490921974, | |
| "learning_rate": 2.2727887158389656e-05, | |
| "loss": 0.4019832992553711, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 7.645525629887055, | |
| "grad_norm": 0.0029301783069968224, | |
| "learning_rate": 2.2683808404349107e-05, | |
| "loss": 0.3610734558105469, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 7.688966116420504, | |
| "grad_norm": 155.0016326904297, | |
| "learning_rate": 2.263972965030855e-05, | |
| "loss": 0.4037496566772461, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 7.732406602953953, | |
| "grad_norm": 25.407201766967773, | |
| "learning_rate": 2.2595650896268002e-05, | |
| "loss": 0.22004886627197265, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 7.775847089487402, | |
| "grad_norm": 0.04883955046534538, | |
| "learning_rate": 2.2551572142227446e-05, | |
| "loss": 0.4110527420043945, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 7.819287576020852, | |
| "grad_norm": 6.237477779388428, | |
| "learning_rate": 2.2507493388186897e-05, | |
| "loss": 0.21914356231689452, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 7.819287576020852, | |
| "eval_cer": 0.11708294955843311, | |
| "eval_loss": 0.6818587183952332, | |
| "eval_runtime": 35.3665, | |
| "eval_samples_per_second": 28.219, | |
| "eval_steps_per_second": 14.109, | |
| "eval_wer": 0.37706855791962174, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 7.8627280625543, | |
| "grad_norm": 2.864680528640747, | |
| "learning_rate": 2.246341463414634e-05, | |
| "loss": 0.3091525459289551, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 7.90616854908775, | |
| "grad_norm": 0.009744558483362198, | |
| "learning_rate": 2.241933588010579e-05, | |
| "loss": 0.18931781768798828, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 7.949609035621199, | |
| "grad_norm": 0.018469370901584625, | |
| "learning_rate": 2.2375257126065236e-05, | |
| "loss": 0.25914777755737306, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 7.9930495221546485, | |
| "grad_norm": 0.11463995277881622, | |
| "learning_rate": 2.2331178372024684e-05, | |
| "loss": 0.3924109649658203, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 8.036490008688098, | |
| "grad_norm": 0.18527474999427795, | |
| "learning_rate": 2.228709961798413e-05, | |
| "loss": 0.4139134979248047, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 8.079930495221546, | |
| "grad_norm": 0.012078936211764812, | |
| "learning_rate": 2.224302086394358e-05, | |
| "loss": 0.20489992141723634, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 8.123370981754995, | |
| "grad_norm": 0.026449766010046005, | |
| "learning_rate": 2.2198942109903026e-05, | |
| "loss": 0.23680988311767578, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 8.166811468288445, | |
| "grad_norm": 1.4742465019226074, | |
| "learning_rate": 2.2154863355862474e-05, | |
| "loss": 0.2400914192199707, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 8.210251954821894, | |
| "grad_norm": 0.0015448889462277293, | |
| "learning_rate": 2.211078460182192e-05, | |
| "loss": 0.31873985290527346, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 8.253692441355343, | |
| "grad_norm": 0.6546465158462524, | |
| "learning_rate": 2.206670584778137e-05, | |
| "loss": 0.16370586395263673, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 8.253692441355343, | |
| "eval_cer": 0.11431977027685973, | |
| "eval_loss": 0.722854495048523, | |
| "eval_runtime": 34.9616, | |
| "eval_samples_per_second": 28.546, | |
| "eval_steps_per_second": 14.273, | |
| "eval_wer": 0.3602245862884161, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 8.297132927888793, | |
| "grad_norm": 0.0220937579870224, | |
| "learning_rate": 2.2022627093740816e-05, | |
| "loss": 0.16179698944091797, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 8.340573414422241, | |
| "grad_norm": 0.8495884537696838, | |
| "learning_rate": 2.1978548339700264e-05, | |
| "loss": 0.2629365348815918, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 8.38401390095569, | |
| "grad_norm": 0.9506490230560303, | |
| "learning_rate": 2.1934469585659715e-05, | |
| "loss": 0.2445651626586914, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 8.42745438748914, | |
| "grad_norm": 0.26221564412117004, | |
| "learning_rate": 2.189039083161916e-05, | |
| "loss": 0.20401872634887697, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 8.470894874022589, | |
| "grad_norm": 0.00027192034758627415, | |
| "learning_rate": 2.184631207757861e-05, | |
| "loss": 0.2544666290283203, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 8.514335360556037, | |
| "grad_norm": 0.04336933791637421, | |
| "learning_rate": 2.1802233323538054e-05, | |
| "loss": 0.3723867797851563, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 8.557775847089488, | |
| "grad_norm": 0.14333416521549225, | |
| "learning_rate": 2.1758154569497505e-05, | |
| "loss": 0.25252397537231447, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 8.601216333622936, | |
| "grad_norm": 0.31019526720046997, | |
| "learning_rate": 2.171407581545695e-05, | |
| "loss": 0.23379629135131835, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 8.644656820156385, | |
| "grad_norm": 0.9922002553939819, | |
| "learning_rate": 2.16699970614164e-05, | |
| "loss": 0.3892123031616211, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 8.688097306689835, | |
| "grad_norm": 0.00887572392821312, | |
| "learning_rate": 2.1625918307375844e-05, | |
| "loss": 0.2666620254516602, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 8.688097306689835, | |
| "eval_cer": 0.11231511079807119, | |
| "eval_loss": 0.7393125891685486, | |
| "eval_runtime": 35.3264, | |
| "eval_samples_per_second": 28.251, | |
| "eval_steps_per_second": 14.125, | |
| "eval_wer": 0.35726950354609927, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 8.731537793223284, | |
| "grad_norm": 0.017117468640208244, | |
| "learning_rate": 2.1581839553335295e-05, | |
| "loss": 0.18703149795532226, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 8.774978279756734, | |
| "grad_norm": 0.20450972020626068, | |
| "learning_rate": 2.153776079929474e-05, | |
| "loss": 0.16164979934692383, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 8.818418766290183, | |
| "grad_norm": 0.00887273158878088, | |
| "learning_rate": 2.149368204525419e-05, | |
| "loss": 0.2993427085876465, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 8.861859252823631, | |
| "grad_norm": 0.00210910034365952, | |
| "learning_rate": 2.1449603291213634e-05, | |
| "loss": 0.2953006172180176, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 8.90529973935708, | |
| "grad_norm": 0.0051006837747991085, | |
| "learning_rate": 2.1405524537173085e-05, | |
| "loss": 0.24485448837280274, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 8.94874022589053, | |
| "grad_norm": 0.5796188712120056, | |
| "learning_rate": 2.136144578313253e-05, | |
| "loss": 0.3098959159851074, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 8.992180712423979, | |
| "grad_norm": 0.01159872580319643, | |
| "learning_rate": 2.131736702909198e-05, | |
| "loss": 0.27299707412719726, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 9.035621198957429, | |
| "grad_norm": 2.516123056411743, | |
| "learning_rate": 2.1273288275051424e-05, | |
| "loss": 0.25595357894897464, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 9.079061685490878, | |
| "grad_norm": 0.0016837273724377155, | |
| "learning_rate": 2.1229209521010872e-05, | |
| "loss": 0.2204635238647461, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 9.122502172024326, | |
| "grad_norm": 0.004055003169924021, | |
| "learning_rate": 2.118513076697032e-05, | |
| "loss": 0.29069057464599607, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 9.122502172024326, | |
| "eval_cer": 0.1140488703472937, | |
| "eval_loss": 0.7343300580978394, | |
| "eval_runtime": 35.4446, | |
| "eval_samples_per_second": 28.157, | |
| "eval_steps_per_second": 14.078, | |
| "eval_wer": 0.3472222222222222, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 9.165942658557777, | |
| "grad_norm": 10.814416885375977, | |
| "learning_rate": 2.1141052012929767e-05, | |
| "loss": 0.22457393646240234, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 9.209383145091225, | |
| "grad_norm": 0.531550407409668, | |
| "learning_rate": 2.1096973258889218e-05, | |
| "loss": 0.19709733963012696, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 9.252823631624674, | |
| "grad_norm": 0.02372005581855774, | |
| "learning_rate": 2.1052894504848662e-05, | |
| "loss": 0.21724346160888672, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 9.296264118158124, | |
| "grad_norm": 0.003351462772116065, | |
| "learning_rate": 2.1008815750808113e-05, | |
| "loss": 0.2724002838134766, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 9.339704604691573, | |
| "grad_norm": 0.2525140047073364, | |
| "learning_rate": 2.0964736996767557e-05, | |
| "loss": 0.23882347106933594, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 9.383145091225021, | |
| "grad_norm": 0.14738580584526062, | |
| "learning_rate": 2.0920658242727008e-05, | |
| "loss": 0.1564232349395752, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 9.426585577758472, | |
| "grad_norm": 0.10283453017473221, | |
| "learning_rate": 2.0876579488686452e-05, | |
| "loss": 0.14069479942321778, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 9.47002606429192, | |
| "grad_norm": 0.07120943069458008, | |
| "learning_rate": 2.0832500734645903e-05, | |
| "loss": 0.20460891723632812, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 9.513466550825369, | |
| "grad_norm": 0.229303777217865, | |
| "learning_rate": 2.0788421980605347e-05, | |
| "loss": 0.29092355728149416, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 9.556907037358819, | |
| "grad_norm": 0.011797781102359295, | |
| "learning_rate": 2.0744343226564798e-05, | |
| "loss": 0.14928483963012695, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 9.556907037358819, | |
| "eval_cer": 0.11589098986834263, | |
| "eval_loss": 0.6468539237976074, | |
| "eval_runtime": 35.2293, | |
| "eval_samples_per_second": 28.329, | |
| "eval_steps_per_second": 14.164, | |
| "eval_wer": 0.366725768321513, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 9.600347523892268, | |
| "grad_norm": 0.0010864798678085208, | |
| "learning_rate": 2.0700264472524242e-05, | |
| "loss": 0.22057802200317383, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 9.643788010425716, | |
| "grad_norm": 0.00047053879825398326, | |
| "learning_rate": 2.0656185718483693e-05, | |
| "loss": 0.1952187156677246, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 9.687228496959166, | |
| "grad_norm": 0.004543100483715534, | |
| "learning_rate": 2.0612106964443137e-05, | |
| "loss": 0.3174121856689453, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 9.730668983492615, | |
| "grad_norm": 0.0010513780871406198, | |
| "learning_rate": 2.0568028210402588e-05, | |
| "loss": 0.16007177352905275, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 9.774109470026064, | |
| "grad_norm": 0.0026681029703468084, | |
| "learning_rate": 2.0523949456362032e-05, | |
| "loss": 0.2065435218811035, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 9.817549956559514, | |
| "grad_norm": 0.2069607824087143, | |
| "learning_rate": 2.0479870702321483e-05, | |
| "loss": 0.22219644546508788, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 9.860990443092962, | |
| "grad_norm": 0.012031909078359604, | |
| "learning_rate": 2.0435791948280927e-05, | |
| "loss": 0.1956252098083496, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 9.904430929626411, | |
| "grad_norm": 0.0008321640198118985, | |
| "learning_rate": 2.0391713194240378e-05, | |
| "loss": 0.3007790565490723, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 9.947871416159861, | |
| "grad_norm": 0.00023682558094151318, | |
| "learning_rate": 2.0347634440199825e-05, | |
| "loss": 0.25140411376953126, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 9.99131190269331, | |
| "grad_norm": 0.18799935281276703, | |
| "learning_rate": 2.0303555686159273e-05, | |
| "loss": 0.20868509292602538, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 9.99131190269331, | |
| "eval_cer": 0.11296527062902964, | |
| "eval_loss": 0.871296226978302, | |
| "eval_runtime": 35.7614, | |
| "eval_samples_per_second": 27.907, | |
| "eval_steps_per_second": 13.954, | |
| "eval_wer": 0.35786052009456265, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 10.034752389226758, | |
| "grad_norm": 0.001729931216686964, | |
| "learning_rate": 2.025947693211872e-05, | |
| "loss": 0.2267488098144531, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 10.078192875760209, | |
| "grad_norm": 0.0033665213268250227, | |
| "learning_rate": 2.0215398178078168e-05, | |
| "loss": 0.11680364608764648, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 10.121633362293657, | |
| "grad_norm": 0.000843276153318584, | |
| "learning_rate": 2.0171319424037616e-05, | |
| "loss": 0.20855466842651368, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 10.165073848827106, | |
| "grad_norm": 0.0007557457429356873, | |
| "learning_rate": 2.0127240669997063e-05, | |
| "loss": 0.17802534103393555, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 10.208514335360556, | |
| "grad_norm": 0.08655949681997299, | |
| "learning_rate": 2.008316191595651e-05, | |
| "loss": 0.14240021705627443, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 10.251954821894005, | |
| "grad_norm": 55.311119079589844, | |
| "learning_rate": 2.0039083161915955e-05, | |
| "loss": 0.3166378211975098, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 10.295395308427455, | |
| "grad_norm": 0.04812853783369064, | |
| "learning_rate": 1.9995004407875406e-05, | |
| "loss": 0.1832990837097168, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 10.338835794960904, | |
| "grad_norm": 4.462372303009033, | |
| "learning_rate": 1.995092565383485e-05, | |
| "loss": 0.1998225212097168, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 10.382276281494352, | |
| "grad_norm": 0.030581099912524223, | |
| "learning_rate": 1.99068468997943e-05, | |
| "loss": 0.1649586296081543, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 10.425716768027803, | |
| "grad_norm": 0.0061181094497442245, | |
| "learning_rate": 1.9862768145753745e-05, | |
| "loss": 0.21640779495239257, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 10.425716768027803, | |
| "eval_cer": 0.11507829007964458, | |
| "eval_loss": 0.7006326913833618, | |
| "eval_runtime": 35.3681, | |
| "eval_samples_per_second": 28.218, | |
| "eval_steps_per_second": 14.109, | |
| "eval_wer": 0.3489952718676123, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 10.469157254561251, | |
| "grad_norm": 9.876059532165527, | |
| "learning_rate": 1.9818689391713196e-05, | |
| "loss": 0.24267179489135743, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 10.5125977410947, | |
| "grad_norm": 0.017044102773070335, | |
| "learning_rate": 1.977461063767264e-05, | |
| "loss": 0.17035614013671874, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 10.55603822762815, | |
| "grad_norm": 0.0013389646774157882, | |
| "learning_rate": 1.973053188363209e-05, | |
| "loss": 0.154972562789917, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 10.599478714161599, | |
| "grad_norm": 0.0071999249048531055, | |
| "learning_rate": 1.9686453129591535e-05, | |
| "loss": 0.08084283828735352, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 10.642919200695047, | |
| "grad_norm": 0.29191315174102783, | |
| "learning_rate": 1.9642374375550986e-05, | |
| "loss": 0.2428382682800293, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 10.686359687228498, | |
| "grad_norm": 2.9929769039154053, | |
| "learning_rate": 1.959829562151043e-05, | |
| "loss": 0.165596923828125, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 10.729800173761946, | |
| "grad_norm": 0.6568811535835266, | |
| "learning_rate": 1.955421686746988e-05, | |
| "loss": 0.24114521026611327, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 10.773240660295395, | |
| "grad_norm": 0.00521878432482481, | |
| "learning_rate": 1.9510138113429328e-05, | |
| "loss": 0.13222161293029785, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 10.816681146828845, | |
| "grad_norm": 0.004102786537259817, | |
| "learning_rate": 1.9466059359388776e-05, | |
| "loss": 0.2611697006225586, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 10.860121633362294, | |
| "grad_norm": 0.009258633479475975, | |
| "learning_rate": 1.9421980605348223e-05, | |
| "loss": 0.17743043899536132, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 10.860121633362294, | |
| "eval_cer": 0.11280273067129003, | |
| "eval_loss": 0.8380096554756165, | |
| "eval_runtime": 35.6349, | |
| "eval_samples_per_second": 28.006, | |
| "eval_steps_per_second": 14.003, | |
| "eval_wer": 0.3549054373522459, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 10.903562119895742, | |
| "grad_norm": 0.010119021870195866, | |
| "learning_rate": 1.937790185130767e-05, | |
| "loss": 0.2513529586791992, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 10.947002606429193, | |
| "grad_norm": 0.06954587996006012, | |
| "learning_rate": 1.9333823097267118e-05, | |
| "loss": 0.18938690185546875, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 10.990443092962641, | |
| "grad_norm": 0.012158134952187538, | |
| "learning_rate": 1.9289744343226566e-05, | |
| "loss": 0.1424751377105713, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 11.03388357949609, | |
| "grad_norm": 0.35711684823036194, | |
| "learning_rate": 1.9245665589186013e-05, | |
| "loss": 0.22175674438476561, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 11.07732406602954, | |
| "grad_norm": 0.029316997155547142, | |
| "learning_rate": 1.920158683514546e-05, | |
| "loss": 0.09745993614196777, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 11.120764552562989, | |
| "grad_norm": 0.0045172832906246185, | |
| "learning_rate": 1.915750808110491e-05, | |
| "loss": 0.14967589378356932, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 11.164205039096437, | |
| "grad_norm": 0.1485351026058197, | |
| "learning_rate": 1.9113429327064356e-05, | |
| "loss": 0.15214619636535645, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 11.207645525629887, | |
| "grad_norm": 0.013465415686368942, | |
| "learning_rate": 1.9069350573023803e-05, | |
| "loss": 0.20777603149414062, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 11.251086012163336, | |
| "grad_norm": 0.003324932884424925, | |
| "learning_rate": 1.902527181898325e-05, | |
| "loss": 0.14591985702514648, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 11.294526498696785, | |
| "grad_norm": 18.002288818359375, | |
| "learning_rate": 1.89811930649427e-05, | |
| "loss": 0.0729653549194336, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 11.294526498696785, | |
| "eval_cer": 0.10716801213631684, | |
| "eval_loss": 0.8233883380889893, | |
| "eval_runtime": 35.8606, | |
| "eval_samples_per_second": 27.83, | |
| "eval_steps_per_second": 13.915, | |
| "eval_wer": 0.3354018912529551, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 11.337966985230235, | |
| "grad_norm": 0.008703617379069328, | |
| "learning_rate": 1.8937114310902146e-05, | |
| "loss": 0.18068933486938477, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 11.381407471763684, | |
| "grad_norm": 0.016712911427021027, | |
| "learning_rate": 1.8893035556861593e-05, | |
| "loss": 0.18568845748901366, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 11.424847958297132, | |
| "grad_norm": 0.0025050437543541193, | |
| "learning_rate": 1.884895680282104e-05, | |
| "loss": 0.12276277542114258, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 11.468288444830582, | |
| "grad_norm": 0.0009163509821519256, | |
| "learning_rate": 1.880487804878049e-05, | |
| "loss": 0.14427170753479004, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 11.511728931364031, | |
| "grad_norm": 0.0007597589865326881, | |
| "learning_rate": 1.8760799294739933e-05, | |
| "loss": 0.21098020553588867, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 11.555169417897481, | |
| "grad_norm": 0.00016254196816589683, | |
| "learning_rate": 1.8716720540699384e-05, | |
| "loss": 0.18421314239501954, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 11.59860990443093, | |
| "grad_norm": 0.006345795933157206, | |
| "learning_rate": 1.867264178665883e-05, | |
| "loss": 0.20616317749023438, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 11.642050390964378, | |
| "grad_norm": 0.0005729036638513207, | |
| "learning_rate": 1.862856303261828e-05, | |
| "loss": 0.10284842491149902, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 11.685490877497829, | |
| "grad_norm": 0.014439227990806103, | |
| "learning_rate": 1.8584484278577726e-05, | |
| "loss": 0.17948501586914062, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 11.728931364031277, | |
| "grad_norm": 1.6784127950668335, | |
| "learning_rate": 1.8540405524537174e-05, | |
| "loss": 0.15696640014648439, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 11.728931364031277, | |
| "eval_cer": 0.1050549926857019, | |
| "eval_loss": 0.827880322933197, | |
| "eval_runtime": 35.4053, | |
| "eval_samples_per_second": 28.188, | |
| "eval_steps_per_second": 14.094, | |
| "eval_wer": 0.3271276595744681, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 11.772371850564726, | |
| "grad_norm": 0.0005249602254480124, | |
| "learning_rate": 1.849632677049662e-05, | |
| "loss": 0.1481422519683838, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 11.815812337098176, | |
| "grad_norm": 0.04822874069213867, | |
| "learning_rate": 1.845224801645607e-05, | |
| "loss": 0.17740755081176757, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 11.859252823631625, | |
| "grad_norm": 0.0025418957229703665, | |
| "learning_rate": 1.8408169262415516e-05, | |
| "loss": 0.12424736022949219, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 11.902693310165073, | |
| "grad_norm": 0.004390745423734188, | |
| "learning_rate": 1.8364090508374964e-05, | |
| "loss": 0.13344883918762207, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 11.946133796698524, | |
| "grad_norm": 32.29993438720703, | |
| "learning_rate": 1.832001175433441e-05, | |
| "loss": 0.08959797859191894, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 11.989574283231972, | |
| "grad_norm": 0.01902751810848713, | |
| "learning_rate": 1.827593300029386e-05, | |
| "loss": 0.15901991844177246, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 12.03301476976542, | |
| "grad_norm": 0.0059561156667768955, | |
| "learning_rate": 1.8231854246253306e-05, | |
| "loss": 0.17461122512817384, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 12.076455256298871, | |
| "grad_norm": 0.018380964174866676, | |
| "learning_rate": 1.8187775492212754e-05, | |
| "loss": 0.07262963771820069, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 12.11989574283232, | |
| "grad_norm": 0.0007720252615399659, | |
| "learning_rate": 1.81436967381722e-05, | |
| "loss": 0.12178866386413574, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 12.163336229365768, | |
| "grad_norm": 0.005173459183424711, | |
| "learning_rate": 1.809961798413165e-05, | |
| "loss": 0.18882158279418945, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 12.163336229365768, | |
| "eval_cer": 0.109335211572845, | |
| "eval_loss": 0.7686098217964172, | |
| "eval_runtime": 35.2553, | |
| "eval_samples_per_second": 28.308, | |
| "eval_steps_per_second": 14.154, | |
| "eval_wer": 0.3380614657210402, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 12.206776715899219, | |
| "grad_norm": 0.005660334601998329, | |
| "learning_rate": 1.8055539230091096e-05, | |
| "loss": 0.10836532592773437, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 12.250217202432667, | |
| "grad_norm": 0.24879610538482666, | |
| "learning_rate": 1.8011460476050544e-05, | |
| "loss": 0.06245335102081299, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 12.293657688966116, | |
| "grad_norm": 0.0002563217713031918, | |
| "learning_rate": 1.796738172200999e-05, | |
| "loss": 0.09659749031066894, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 12.337098175499566, | |
| "grad_norm": 0.0001977673382498324, | |
| "learning_rate": 1.792330296796944e-05, | |
| "loss": 0.08525155067443847, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 12.380538662033015, | |
| "grad_norm": 0.00024911269429139793, | |
| "learning_rate": 1.7879224213928886e-05, | |
| "loss": 0.1183913516998291, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 12.423979148566463, | |
| "grad_norm": 0.001824671751819551, | |
| "learning_rate": 1.7835145459888337e-05, | |
| "loss": 0.08873219490051269, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 12.467419635099914, | |
| "grad_norm": 0.004962866194546223, | |
| "learning_rate": 1.779106670584778e-05, | |
| "loss": 0.11354425430297851, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 12.510860121633362, | |
| "grad_norm": 0.0757075771689415, | |
| "learning_rate": 1.7746987951807232e-05, | |
| "loss": 0.11156253814697266, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 12.55430060816681, | |
| "grad_norm": 0.02478897199034691, | |
| "learning_rate": 1.7702909197766676e-05, | |
| "loss": 0.12282137870788574, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 12.597741094700261, | |
| "grad_norm": 2.5461020469665527, | |
| "learning_rate": 1.7658830443726127e-05, | |
| "loss": 0.11555877685546875, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 12.597741094700261, | |
| "eval_cer": 0.10830579184049412, | |
| "eval_loss": 0.92600417137146, | |
| "eval_runtime": 35.2867, | |
| "eval_samples_per_second": 28.283, | |
| "eval_steps_per_second": 14.141, | |
| "eval_wer": 0.3309692671394799, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 12.64118158123371, | |
| "grad_norm": 0.5628868341445923, | |
| "learning_rate": 1.761475168968557e-05, | |
| "loss": 0.2381545639038086, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 12.684622067767158, | |
| "grad_norm": 0.01276449766010046, | |
| "learning_rate": 1.757067293564502e-05, | |
| "loss": 0.10359532356262208, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 12.728062554300609, | |
| "grad_norm": 0.009611076675355434, | |
| "learning_rate": 1.7526594181604466e-05, | |
| "loss": 0.10290337562561035, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 12.771503040834057, | |
| "grad_norm": 0.00047707941848784685, | |
| "learning_rate": 1.7482515427563914e-05, | |
| "loss": 0.20995697021484375, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 12.814943527367507, | |
| "grad_norm": 10.169084548950195, | |
| "learning_rate": 1.743843667352336e-05, | |
| "loss": 0.15165854454040528, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 12.858384013900956, | |
| "grad_norm": 0.0020368106197565794, | |
| "learning_rate": 1.739435791948281e-05, | |
| "loss": 0.22781238555908204, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 12.901824500434405, | |
| "grad_norm": 0.04858289286494255, | |
| "learning_rate": 1.7350279165442256e-05, | |
| "loss": 0.13032222747802735, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 12.945264986967853, | |
| "grad_norm": 0.0008267110679298639, | |
| "learning_rate": 1.7306200411401704e-05, | |
| "loss": 0.06278028964996338, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 12.988705473501303, | |
| "grad_norm": 0.14715807139873505, | |
| "learning_rate": 1.726212165736115e-05, | |
| "loss": 0.16469184875488282, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 13.032145960034752, | |
| "grad_norm": 0.18887297809123993, | |
| "learning_rate": 1.72180429033206e-05, | |
| "loss": 0.248513126373291, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 13.032145960034752, | |
| "eval_cer": 0.11063553123476189, | |
| "eval_loss": 0.8484429717063904, | |
| "eval_runtime": 35.4635, | |
| "eval_samples_per_second": 28.142, | |
| "eval_steps_per_second": 14.071, | |
| "eval_wer": 0.33747044917257685, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 13.075586446568202, | |
| "grad_norm": 0.0044303713366389275, | |
| "learning_rate": 1.7173964149280047e-05, | |
| "loss": 0.13554862022399902, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 13.119026933101651, | |
| "grad_norm": 0.006357671692967415, | |
| "learning_rate": 1.7129885395239494e-05, | |
| "loss": 0.1657179069519043, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 13.1624674196351, | |
| "grad_norm": 0.004660587292164564, | |
| "learning_rate": 1.7085806641198945e-05, | |
| "loss": 0.07184979438781738, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 13.20590790616855, | |
| "grad_norm": 0.001002687495201826, | |
| "learning_rate": 1.704172788715839e-05, | |
| "loss": 0.11178950309753417, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 13.249348392701998, | |
| "grad_norm": 0.0017005419358611107, | |
| "learning_rate": 1.699764913311784e-05, | |
| "loss": 0.14817577362060547, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 13.292788879235447, | |
| "grad_norm": 30.164806365966797, | |
| "learning_rate": 1.6953570379077284e-05, | |
| "loss": 0.11133524894714356, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 13.336229365768897, | |
| "grad_norm": 0.20776331424713135, | |
| "learning_rate": 1.6909491625036735e-05, | |
| "loss": 0.08040478706359863, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 13.379669852302346, | |
| "grad_norm": 0.0001020112176775001, | |
| "learning_rate": 1.686541287099618e-05, | |
| "loss": 0.15835739135742188, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 13.423110338835794, | |
| "grad_norm": 0.020164845511317253, | |
| "learning_rate": 1.682133411695563e-05, | |
| "loss": 0.14341225624084472, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 13.466550825369245, | |
| "grad_norm": 0.0017340014455839992, | |
| "learning_rate": 1.6777255362915074e-05, | |
| "loss": 0.1316046142578125, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 13.466550825369245, | |
| "eval_cer": 0.10104567372812483, | |
| "eval_loss": 0.9770230650901794, | |
| "eval_runtime": 35.2978, | |
| "eval_samples_per_second": 28.274, | |
| "eval_steps_per_second": 14.137, | |
| "eval_wer": 0.32062647754137114, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 13.509991311902693, | |
| "grad_norm": 0.10325725376605988, | |
| "learning_rate": 1.6733176608874525e-05, | |
| "loss": 0.14392637252807616, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 13.553431798436142, | |
| "grad_norm": 7.639220714569092, | |
| "learning_rate": 1.668909785483397e-05, | |
| "loss": 0.11816396713256835, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 13.596872284969592, | |
| "grad_norm": 0.011842885985970497, | |
| "learning_rate": 1.664501910079342e-05, | |
| "loss": 0.06573171615600586, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 13.64031277150304, | |
| "grad_norm": 0.36505550146102905, | |
| "learning_rate": 1.6600940346752864e-05, | |
| "loss": 0.12598639488220215, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 13.68375325803649, | |
| "grad_norm": 0.01986199989914894, | |
| "learning_rate": 1.6556861592712315e-05, | |
| "loss": 0.08807419776916504, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 13.72719374456994, | |
| "grad_norm": 0.0006646508118137717, | |
| "learning_rate": 1.651278283867176e-05, | |
| "loss": 0.07460322380065917, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 13.770634231103388, | |
| "grad_norm": 0.017491919919848442, | |
| "learning_rate": 1.646870408463121e-05, | |
| "loss": 0.08792648315429688, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 13.814074717636837, | |
| "grad_norm": 64.46247863769531, | |
| "learning_rate": 1.6424625330590654e-05, | |
| "loss": 0.19781913757324218, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 13.857515204170287, | |
| "grad_norm": 0.004558779299259186, | |
| "learning_rate": 1.6380546576550102e-05, | |
| "loss": 0.10111617088317872, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 13.900955690703736, | |
| "grad_norm": 0.00020643201423808932, | |
| "learning_rate": 1.633646782250955e-05, | |
| "loss": 0.08666461944580078, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 13.900955690703736, | |
| "eval_cer": 0.10776399198136208, | |
| "eval_loss": 0.8977736234664917, | |
| "eval_runtime": 35.6835, | |
| "eval_samples_per_second": 27.968, | |
| "eval_steps_per_second": 13.984, | |
| "eval_wer": 0.3271276595744681, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 13.944396177237184, | |
| "grad_norm": 0.031363021582365036, | |
| "learning_rate": 1.6292389068468997e-05, | |
| "loss": 0.09195023536682129, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 13.987836663770635, | |
| "grad_norm": 0.024453002959489822, | |
| "learning_rate": 1.6248310314428448e-05, | |
| "loss": 0.05720340728759766, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 14.031277150304083, | |
| "grad_norm": 0.020940568298101425, | |
| "learning_rate": 1.6204231560387892e-05, | |
| "loss": 0.11965296745300293, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 14.074717636837532, | |
| "grad_norm": 0.020178375765681267, | |
| "learning_rate": 1.6160152806347343e-05, | |
| "loss": 0.11014815330505372, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 14.118158123370982, | |
| "grad_norm": 1.0401362180709839, | |
| "learning_rate": 1.6116074052306787e-05, | |
| "loss": 0.06974054336547851, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 14.16159860990443, | |
| "grad_norm": 0.007594361901283264, | |
| "learning_rate": 1.6071995298266238e-05, | |
| "loss": 0.041026763916015625, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 14.20503909643788, | |
| "grad_norm": 0.0018089961959049106, | |
| "learning_rate": 1.6027916544225682e-05, | |
| "loss": 0.05419292449951172, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 14.24847958297133, | |
| "grad_norm": 8.15002727508545, | |
| "learning_rate": 1.5983837790185133e-05, | |
| "loss": 0.0943959903717041, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 14.291920069504778, | |
| "grad_norm": 0.0004822172923013568, | |
| "learning_rate": 1.5939759036144577e-05, | |
| "loss": 0.09241563796997071, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 14.335360556038228, | |
| "grad_norm": 0.0005213666008785367, | |
| "learning_rate": 1.5895680282104028e-05, | |
| "loss": 0.08321575164794921, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 14.335360556038228, | |
| "eval_cer": 0.10608441241805278, | |
| "eval_loss": 0.936793327331543, | |
| "eval_runtime": 35.3702, | |
| "eval_samples_per_second": 28.216, | |
| "eval_steps_per_second": 14.108, | |
| "eval_wer": 0.32476359338061467, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 14.378801042571677, | |
| "grad_norm": 7.210012699943036e-05, | |
| "learning_rate": 1.5851601528063472e-05, | |
| "loss": 0.09619697570800781, | |
| "step": 16550 | |
| }, | |
| { | |
| "epoch": 14.422241529105126, | |
| "grad_norm": 4.460615158081055, | |
| "learning_rate": 1.5807522774022923e-05, | |
| "loss": 0.06220272541046143, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 14.465682015638576, | |
| "grad_norm": 3.5526578426361084, | |
| "learning_rate": 1.5763444019982367e-05, | |
| "loss": 0.07247277259826661, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 14.509122502172024, | |
| "grad_norm": 0.0009735809871926904, | |
| "learning_rate": 1.5719365265941818e-05, | |
| "loss": 0.14418716430664064, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 14.552562988705473, | |
| "grad_norm": 0.002880257787182927, | |
| "learning_rate": 1.5675286511901262e-05, | |
| "loss": 0.05156928539276123, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 14.596003475238923, | |
| "grad_norm": 0.009934864938259125, | |
| "learning_rate": 1.5631207757860713e-05, | |
| "loss": 0.1062159538269043, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 14.639443961772372, | |
| "grad_norm": 0.13457264006137848, | |
| "learning_rate": 1.5587129003820157e-05, | |
| "loss": 0.05868762016296387, | |
| "step": 16850 | |
| }, | |
| { | |
| "epoch": 14.68288444830582, | |
| "grad_norm": 0.00943897757679224, | |
| "learning_rate": 1.5543050249779608e-05, | |
| "loss": 0.07642593383789062, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 14.72632493483927, | |
| "grad_norm": 0.026743775233626366, | |
| "learning_rate": 1.5498971495739052e-05, | |
| "loss": 0.10913041114807129, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 14.76976542137272, | |
| "grad_norm": 0.003263711929321289, | |
| "learning_rate": 1.5454892741698503e-05, | |
| "loss": 0.12181022644042969, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 14.76976542137272, | |
| "eval_cer": 0.10294197323508696, | |
| "eval_loss": 0.8898913264274597, | |
| "eval_runtime": 35.138, | |
| "eval_samples_per_second": 28.402, | |
| "eval_steps_per_second": 14.201, | |
| "eval_wer": 0.3188534278959811, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 14.813205907906168, | |
| "grad_norm": 0.020024575293064117, | |
| "learning_rate": 1.541081398765795e-05, | |
| "loss": 0.06474356651306153, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 14.856646394439618, | |
| "grad_norm": 0.02727115899324417, | |
| "learning_rate": 1.5366735233617398e-05, | |
| "loss": 0.064862699508667, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 14.900086880973067, | |
| "grad_norm": 0.06588542461395264, | |
| "learning_rate": 1.5322656479576846e-05, | |
| "loss": 0.11551046371459961, | |
| "step": 17150 | |
| }, | |
| { | |
| "epoch": 14.943527367506515, | |
| "grad_norm": 1.188116431236267, | |
| "learning_rate": 1.5278577725536293e-05, | |
| "loss": 0.0937428092956543, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 14.986967854039966, | |
| "grad_norm": 0.003894130466505885, | |
| "learning_rate": 1.5234498971495739e-05, | |
| "loss": 0.11846747398376464, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 15.030408340573414, | |
| "grad_norm": 0.0014571856008842587, | |
| "learning_rate": 1.5190420217455185e-05, | |
| "loss": 0.05842185020446777, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 15.073848827106863, | |
| "grad_norm": 0.0016659823013469577, | |
| "learning_rate": 1.5146341463414634e-05, | |
| "loss": 0.055425772666931154, | |
| "step": 17350 | |
| }, | |
| { | |
| "epoch": 15.117289313640313, | |
| "grad_norm": 0.0008578883716836572, | |
| "learning_rate": 1.510226270937408e-05, | |
| "loss": 0.10561844825744629, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 15.160729800173762, | |
| "grad_norm": 0.028916161507368088, | |
| "learning_rate": 1.505818395533353e-05, | |
| "loss": 0.15631651878356934, | |
| "step": 17450 | |
| }, | |
| { | |
| "epoch": 15.20417028670721, | |
| "grad_norm": 0.01692270301282406, | |
| "learning_rate": 1.5014105201292976e-05, | |
| "loss": 0.04396585464477539, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 15.20417028670721, | |
| "eval_cer": 0.10250853334778133, | |
| "eval_loss": 0.9507099390029907, | |
| "eval_runtime": 35.4616, | |
| "eval_samples_per_second": 28.143, | |
| "eval_steps_per_second": 14.072, | |
| "eval_wer": 0.3200354609929078, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 15.24761077324066, | |
| "grad_norm": 0.00015645832172594965, | |
| "learning_rate": 1.4970026447252426e-05, | |
| "loss": 0.10505289077758789, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 15.29105125977411, | |
| "grad_norm": 6.809161277487874e-05, | |
| "learning_rate": 1.4925947693211873e-05, | |
| "loss": 0.06061763286590576, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 15.334491746307558, | |
| "grad_norm": 0.002175210742279887, | |
| "learning_rate": 1.488186893917132e-05, | |
| "loss": 0.08643261909484863, | |
| "step": 17650 | |
| }, | |
| { | |
| "epoch": 15.377932232841008, | |
| "grad_norm": 0.00033852062188088894, | |
| "learning_rate": 1.4837790185130768e-05, | |
| "loss": 0.058766045570373536, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 15.421372719374457, | |
| "grad_norm": 0.032032400369644165, | |
| "learning_rate": 1.4793711431090216e-05, | |
| "loss": 0.06575697422027588, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 15.464813205907905, | |
| "grad_norm": 0.007524843327701092, | |
| "learning_rate": 1.4749632677049663e-05, | |
| "loss": 0.039991099834442136, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 15.508253692441356, | |
| "grad_norm": 0.38591468334198, | |
| "learning_rate": 1.470555392300911e-05, | |
| "loss": 0.06621292591094971, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 15.551694178974804, | |
| "grad_norm": 0.005559583194553852, | |
| "learning_rate": 1.4661475168968558e-05, | |
| "loss": 0.05986703395843506, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 15.595134665508255, | |
| "grad_norm": 0.0015642516082152724, | |
| "learning_rate": 1.4617396414928004e-05, | |
| "loss": 0.05914860725402832, | |
| "step": 17950 | |
| }, | |
| { | |
| "epoch": 15.638575152041703, | |
| "grad_norm": 0.025494471192359924, | |
| "learning_rate": 1.4573317660887452e-05, | |
| "loss": 0.049571285247802736, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 15.638575152041703, | |
| "eval_cer": 0.10175001354499648, | |
| "eval_loss": 0.9704659581184387, | |
| "eval_runtime": 35.2476, | |
| "eval_samples_per_second": 28.314, | |
| "eval_steps_per_second": 14.157, | |
| "eval_wer": 0.31501182033096925, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 15.682015638575152, | |
| "grad_norm": 0.0005379091016948223, | |
| "learning_rate": 1.45292389068469e-05, | |
| "loss": 0.04489382266998291, | |
| "step": 18050 | |
| }, | |
| { | |
| "epoch": 15.725456125108602, | |
| "grad_norm": 0.0002697557501960546, | |
| "learning_rate": 1.4485160152806347e-05, | |
| "loss": 0.03827667951583862, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 15.76889661164205, | |
| "grad_norm": 0.04231059551239014, | |
| "learning_rate": 1.4441081398765794e-05, | |
| "loss": 0.06413057327270508, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 15.8123370981755, | |
| "grad_norm": 0.0001920364738907665, | |
| "learning_rate": 1.4397002644725242e-05, | |
| "loss": 0.06431771278381347, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 15.85577758470895, | |
| "grad_norm": 0.0009730961173772812, | |
| "learning_rate": 1.435292389068469e-05, | |
| "loss": 0.14641772270202635, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 15.899218071242398, | |
| "grad_norm": 0.04817694053053856, | |
| "learning_rate": 1.4308845136644137e-05, | |
| "loss": 0.09290631294250488, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 15.942658557775847, | |
| "grad_norm": 0.0002339025668334216, | |
| "learning_rate": 1.4264766382603586e-05, | |
| "loss": 0.04536252975463867, | |
| "step": 18350 | |
| }, | |
| { | |
| "epoch": 15.986099044309297, | |
| "grad_norm": 5.864691734313965, | |
| "learning_rate": 1.4220687628563033e-05, | |
| "loss": 0.07572299003601074, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 16.029539530842744, | |
| "grad_norm": 0.00020901852985844016, | |
| "learning_rate": 1.4176608874522481e-05, | |
| "loss": 0.04608057975769043, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 16.072980017376196, | |
| "grad_norm": 0.07316890358924866, | |
| "learning_rate": 1.4132530120481928e-05, | |
| "loss": 0.11023859977722168, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 16.072980017376196, | |
| "eval_cer": 0.11616188979790865, | |
| "eval_loss": 1.0090523958206177, | |
| "eval_runtime": 35.2425, | |
| "eval_samples_per_second": 28.318, | |
| "eval_steps_per_second": 14.159, | |
| "eval_wer": 0.33037825059101655, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 16.116420503909644, | |
| "grad_norm": 0.0020939745008945465, | |
| "learning_rate": 1.4088451366441376e-05, | |
| "loss": 0.05038735389709473, | |
| "step": 18550 | |
| }, | |
| { | |
| "epoch": 16.159860990443093, | |
| "grad_norm": 0.0004580508393701166, | |
| "learning_rate": 1.4044372612400824e-05, | |
| "loss": 0.0617540168762207, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 16.20330147697654, | |
| "grad_norm": 0.0010127995628863573, | |
| "learning_rate": 1.4000293858360271e-05, | |
| "loss": 0.0612303876876831, | |
| "step": 18650 | |
| }, | |
| { | |
| "epoch": 16.24674196350999, | |
| "grad_norm": 0.015361390076577663, | |
| "learning_rate": 1.3956215104319719e-05, | |
| "loss": 0.04825174331665039, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 16.290182450043442, | |
| "grad_norm": 0.0008976504323072731, | |
| "learning_rate": 1.3912136350279166e-05, | |
| "loss": 0.05854806423187256, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 16.33362293657689, | |
| "grad_norm": 0.013188125565648079, | |
| "learning_rate": 1.3868057596238614e-05, | |
| "loss": 0.04929457664489746, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 16.37706342311034, | |
| "grad_norm": 0.013670213520526886, | |
| "learning_rate": 1.3823978842198061e-05, | |
| "loss": 0.038565528392791745, | |
| "step": 18850 | |
| }, | |
| { | |
| "epoch": 16.420503909643788, | |
| "grad_norm": 0.08130084723234177, | |
| "learning_rate": 1.3779900088157509e-05, | |
| "loss": 0.04009881019592285, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 16.463944396177236, | |
| "grad_norm": 2.4593734741210938, | |
| "learning_rate": 1.3735821334116956e-05, | |
| "loss": 0.043494491577148436, | |
| "step": 18950 | |
| }, | |
| { | |
| "epoch": 16.507384882710685, | |
| "grad_norm": 0.0002031345502473414, | |
| "learning_rate": 1.3691742580076404e-05, | |
| "loss": 0.027528271675109864, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 16.507384882710685, | |
| "eval_cer": 0.10440483285474346, | |
| "eval_loss": 0.9773461818695068, | |
| "eval_runtime": 35.5819, | |
| "eval_samples_per_second": 28.048, | |
| "eval_steps_per_second": 14.024, | |
| "eval_wer": 0.3212174940898345, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 16.550825369244137, | |
| "grad_norm": 0.00017495028441771865, | |
| "learning_rate": 1.3647663826035851e-05, | |
| "loss": 0.06181173324584961, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 16.594265855777586, | |
| "grad_norm": 0.00031455489806830883, | |
| "learning_rate": 1.3603585071995299e-05, | |
| "loss": 0.046858110427856446, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 16.637706342311034, | |
| "grad_norm": 0.03978965803980827, | |
| "learning_rate": 1.3559506317954746e-05, | |
| "loss": 0.05867977142333984, | |
| "step": 19150 | |
| }, | |
| { | |
| "epoch": 16.681146828844483, | |
| "grad_norm": 0.00650749821215868, | |
| "learning_rate": 1.3515427563914194e-05, | |
| "loss": 0.06390885829925537, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 16.72458731537793, | |
| "grad_norm": 0.002027066657319665, | |
| "learning_rate": 1.3471348809873641e-05, | |
| "loss": 0.0747562313079834, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 16.76802780191138, | |
| "grad_norm": 0.00012768770102411509, | |
| "learning_rate": 1.3427270055833089e-05, | |
| "loss": 0.0417702579498291, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 16.811468288444832, | |
| "grad_norm": 4.3758605897892267e-05, | |
| "learning_rate": 1.3383191301792536e-05, | |
| "loss": 0.043452243804931644, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 16.85490877497828, | |
| "grad_norm": 0.009404808282852173, | |
| "learning_rate": 1.3339112547751984e-05, | |
| "loss": 0.07918959617614746, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 16.89834926151173, | |
| "grad_norm": 0.003255483927205205, | |
| "learning_rate": 1.3295033793711431e-05, | |
| "loss": 0.031140968799591065, | |
| "step": 19450 | |
| }, | |
| { | |
| "epoch": 16.941789748045178, | |
| "grad_norm": 0.046869829297065735, | |
| "learning_rate": 1.3250955039670879e-05, | |
| "loss": 0.053838644027709964, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 16.941789748045178, | |
| "eval_cer": 0.10142493362951725, | |
| "eval_loss": 1.045753836631775, | |
| "eval_runtime": 35.7261, | |
| "eval_samples_per_second": 27.935, | |
| "eval_steps_per_second": 13.967, | |
| "eval_wer": 0.3141252955082742, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 16.985230234578626, | |
| "grad_norm": 0.0014443215914070606, | |
| "learning_rate": 1.3206876285630326e-05, | |
| "loss": 0.055178966522216794, | |
| "step": 19550 | |
| }, | |
| { | |
| "epoch": 17.028670721112075, | |
| "grad_norm": 0.0004687681212089956, | |
| "learning_rate": 1.3162797531589774e-05, | |
| "loss": 0.09418526649475098, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 17.072111207645527, | |
| "grad_norm": 0.0004573004553094506, | |
| "learning_rate": 1.3118718777549221e-05, | |
| "loss": 0.026365480422973632, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 17.115551694178976, | |
| "grad_norm": 0.0036469711922109127, | |
| "learning_rate": 1.3074640023508669e-05, | |
| "loss": 0.058814377784729005, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 17.158992180712424, | |
| "grad_norm": 0.00524592399597168, | |
| "learning_rate": 1.3030561269468116e-05, | |
| "loss": 0.04088939189910889, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 17.202432667245873, | |
| "grad_norm": 0.00013877540186513215, | |
| "learning_rate": 1.2986482515427564e-05, | |
| "loss": 0.06733872890472412, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 17.24587315377932, | |
| "grad_norm": 0.04638398066163063, | |
| "learning_rate": 1.2942403761387011e-05, | |
| "loss": 0.02715529441833496, | |
| "step": 19850 | |
| }, | |
| { | |
| "epoch": 17.28931364031277, | |
| "grad_norm": 0.0002255926956422627, | |
| "learning_rate": 1.2898325007346459e-05, | |
| "loss": 0.024372515678405763, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 17.332754126846222, | |
| "grad_norm": 0.0001305036712437868, | |
| "learning_rate": 1.2854246253305906e-05, | |
| "loss": 0.05264826774597168, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 17.37619461337967, | |
| "grad_norm": 0.012165222316980362, | |
| "learning_rate": 1.2810167499265354e-05, | |
| "loss": 0.022559099197387696, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 17.37619461337967, | |
| "eval_cer": 0.10581351248848675, | |
| "eval_loss": 0.9943767786026001, | |
| "eval_runtime": 35.1598, | |
| "eval_samples_per_second": 28.385, | |
| "eval_steps_per_second": 14.192, | |
| "eval_wer": 0.3271276595744681, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 17.41963509991312, | |
| "grad_norm": 0.2545449733734131, | |
| "learning_rate": 1.2766088745224801e-05, | |
| "loss": 0.02598097801208496, | |
| "step": 20050 | |
| }, | |
| { | |
| "epoch": 17.463075586446568, | |
| "grad_norm": 0.5349053144454956, | |
| "learning_rate": 1.2722009991184249e-05, | |
| "loss": 0.02631650447845459, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 17.506516072980016, | |
| "grad_norm": 0.001936123939231038, | |
| "learning_rate": 1.2677931237143697e-05, | |
| "loss": 0.021945018768310547, | |
| "step": 20150 | |
| }, | |
| { | |
| "epoch": 17.54995655951347, | |
| "grad_norm": 0.000843520334456116, | |
| "learning_rate": 1.2633852483103146e-05, | |
| "loss": 0.09685382843017579, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 17.593397046046917, | |
| "grad_norm": 0.0006347526214085519, | |
| "learning_rate": 1.2589773729062593e-05, | |
| "loss": 0.07540733814239502, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 17.636837532580365, | |
| "grad_norm": 0.00029396990430541337, | |
| "learning_rate": 1.254569497502204e-05, | |
| "loss": 0.04331284999847412, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 17.680278019113814, | |
| "grad_norm": 0.0012669226853176951, | |
| "learning_rate": 1.2501616220981488e-05, | |
| "loss": 0.05464168548583984, | |
| "step": 20350 | |
| }, | |
| { | |
| "epoch": 17.723718505647263, | |
| "grad_norm": 8.315537706948817e-05, | |
| "learning_rate": 1.2457537466940936e-05, | |
| "loss": 0.043418560028076175, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 17.76715899218071, | |
| "grad_norm": 0.014166179113090038, | |
| "learning_rate": 1.2413458712900383e-05, | |
| "loss": 0.057585406303405764, | |
| "step": 20450 | |
| }, | |
| { | |
| "epoch": 17.810599478714163, | |
| "grad_norm": 0.0003503711777739227, | |
| "learning_rate": 1.236937995885983e-05, | |
| "loss": 0.06322105884552003, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 17.810599478714163, | |
| "eval_cer": 0.09947445413664192, | |
| "eval_loss": 0.9832805395126343, | |
| "eval_runtime": 35.3191, | |
| "eval_samples_per_second": 28.257, | |
| "eval_steps_per_second": 14.128, | |
| "eval_wer": 0.3076241134751773, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 17.854039965247612, | |
| "grad_norm": 0.00030440345290116966, | |
| "learning_rate": 1.2325301204819278e-05, | |
| "loss": 0.060886926651000976, | |
| "step": 20550 | |
| }, | |
| { | |
| "epoch": 17.89748045178106, | |
| "grad_norm": 0.007375710643827915, | |
| "learning_rate": 1.2281222450778726e-05, | |
| "loss": 0.04395482540130615, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 17.94092093831451, | |
| "grad_norm": 0.0019175054039806128, | |
| "learning_rate": 1.2237143696738172e-05, | |
| "loss": 0.023046765327453613, | |
| "step": 20650 | |
| }, | |
| { | |
| "epoch": 17.984361424847958, | |
| "grad_norm": 0.0014469270827248693, | |
| "learning_rate": 1.219306494269762e-05, | |
| "loss": 0.041912388801574704, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 18.027801911381406, | |
| "grad_norm": 0.008292295038700104, | |
| "learning_rate": 1.2148986188657067e-05, | |
| "loss": 0.05653272151947022, | |
| "step": 20750 | |
| }, | |
| { | |
| "epoch": 18.071242397914858, | |
| "grad_norm": 0.0011951219057664275, | |
| "learning_rate": 1.2104907434616514e-05, | |
| "loss": 0.03046605587005615, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 18.114682884448307, | |
| "grad_norm": 0.004597791470587254, | |
| "learning_rate": 1.2060828680575962e-05, | |
| "loss": 0.034540703296661375, | |
| "step": 20850 | |
| }, | |
| { | |
| "epoch": 18.158123370981755, | |
| "grad_norm": 0.0003544765349943191, | |
| "learning_rate": 1.201674992653541e-05, | |
| "loss": 0.015487746000289918, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 18.201563857515204, | |
| "grad_norm": 0.001666396390646696, | |
| "learning_rate": 1.1972671172494857e-05, | |
| "loss": 0.028139712810516356, | |
| "step": 20950 | |
| }, | |
| { | |
| "epoch": 18.245004344048652, | |
| "grad_norm": 0.4877508282661438, | |
| "learning_rate": 1.1928592418454304e-05, | |
| "loss": 0.045163874626159665, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 18.245004344048652, | |
| "eval_cer": 0.10554261255892074, | |
| "eval_loss": 1.0116287469863892, | |
| "eval_runtime": 35.5341, | |
| "eval_samples_per_second": 28.086, | |
| "eval_steps_per_second": 14.043, | |
| "eval_wer": 0.3182624113475177, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 18.2884448305821, | |
| "grad_norm": 4.470763451536186e-05, | |
| "learning_rate": 1.1884513664413752e-05, | |
| "loss": 0.023325955867767333, | |
| "step": 21050 | |
| }, | |
| { | |
| "epoch": 18.331885317115553, | |
| "grad_norm": 0.000781964510679245, | |
| "learning_rate": 1.18404349103732e-05, | |
| "loss": 0.025803213119506837, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 18.375325803649, | |
| "grad_norm": 19.389554977416992, | |
| "learning_rate": 1.1796356156332649e-05, | |
| "loss": 0.030954115390777588, | |
| "step": 21150 | |
| }, | |
| { | |
| "epoch": 18.41876629018245, | |
| "grad_norm": 7.068664126563817e-05, | |
| "learning_rate": 1.1752277402292096e-05, | |
| "loss": 0.04330010414123535, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 18.4622067767159, | |
| "grad_norm": 0.00017082026170101017, | |
| "learning_rate": 1.1708198648251544e-05, | |
| "loss": 0.04214978694915771, | |
| "step": 21250 | |
| }, | |
| { | |
| "epoch": 18.505647263249347, | |
| "grad_norm": 0.0019248217577114701, | |
| "learning_rate": 1.1664119894210991e-05, | |
| "loss": 0.021218812465667723, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 18.549087749782796, | |
| "grad_norm": 0.2257125824689865, | |
| "learning_rate": 1.1620041140170439e-05, | |
| "loss": 0.04343417644500733, | |
| "step": 21350 | |
| }, | |
| { | |
| "epoch": 18.592528236316248, | |
| "grad_norm": 0.004617325030267239, | |
| "learning_rate": 1.1575962386129886e-05, | |
| "loss": 0.031105964183807372, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 18.635968722849697, | |
| "grad_norm": 6.712381582474336e-05, | |
| "learning_rate": 1.1531883632089334e-05, | |
| "loss": 0.022620809078216553, | |
| "step": 21450 | |
| }, | |
| { | |
| "epoch": 18.679409209383145, | |
| "grad_norm": 11.555673599243164, | |
| "learning_rate": 1.1487804878048781e-05, | |
| "loss": 0.051412558555603026, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 18.679409209383145, | |
| "eval_cer": 0.09855339437611746, | |
| "eval_loss": 1.0771058797836304, | |
| "eval_runtime": 35.2368, | |
| "eval_samples_per_second": 28.323, | |
| "eval_steps_per_second": 14.161, | |
| "eval_wer": 0.3108747044917258, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 18.722849695916594, | |
| "grad_norm": 0.001295646419748664, | |
| "learning_rate": 1.1443726124008229e-05, | |
| "loss": 0.0214068603515625, | |
| "step": 21550 | |
| }, | |
| { | |
| "epoch": 18.766290182450042, | |
| "grad_norm": 0.022851450368762016, | |
| "learning_rate": 1.1399647369967676e-05, | |
| "loss": 0.03087963581085205, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 18.80973066898349, | |
| "grad_norm": 0.0012702015228569508, | |
| "learning_rate": 1.1355568615927124e-05, | |
| "loss": 0.046400198936462404, | |
| "step": 21650 | |
| }, | |
| { | |
| "epoch": 18.853171155516943, | |
| "grad_norm": 0.0037327518220990896, | |
| "learning_rate": 1.1311489861886571e-05, | |
| "loss": 0.024634184837341307, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 18.89661164205039, | |
| "grad_norm": 6.548186502186581e-05, | |
| "learning_rate": 1.1267411107846019e-05, | |
| "loss": 0.03668407678604126, | |
| "step": 21750 | |
| }, | |
| { | |
| "epoch": 18.94005212858384, | |
| "grad_norm": 3.877016544342041, | |
| "learning_rate": 1.1223332353805466e-05, | |
| "loss": 0.02262542963027954, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 18.98349261511729, | |
| "grad_norm": 0.0001716541883070022, | |
| "learning_rate": 1.1179253599764914e-05, | |
| "loss": 0.05151228427886963, | |
| "step": 21850 | |
| }, | |
| { | |
| "epoch": 19.026933101650737, | |
| "grad_norm": 0.0001898752962006256, | |
| "learning_rate": 1.1135174845724361e-05, | |
| "loss": 0.0242765212059021, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 19.07037358818419, | |
| "grad_norm": 0.10014080256223679, | |
| "learning_rate": 1.1091096091683809e-05, | |
| "loss": 0.037711410522460936, | |
| "step": 21950 | |
| }, | |
| { | |
| "epoch": 19.113814074717638, | |
| "grad_norm": 0.00022042197815608233, | |
| "learning_rate": 1.1047017337643256e-05, | |
| "loss": 0.025053555965423583, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 19.113814074717638, | |
| "eval_cer": 0.09801159451698542, | |
| "eval_loss": 1.0469719171524048, | |
| "eval_runtime": 35.8341, | |
| "eval_samples_per_second": 27.851, | |
| "eval_steps_per_second": 13.925, | |
| "eval_wer": 0.30112293144208035, | |
| "step": 22000 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 34530, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 30, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.810244226353391e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |