| { |
| "best_metric": 0.1786624384166714, |
| "best_model_checkpoint": "/scratch/elec/puhe/p/palp3/sami_ASR/base_model_output/wav2vec2-base-sami-22k/widv/widv_2dips_validate/60epoch_default/outputs/checkpoint-58320", |
| "epoch": 60.0, |
| "eval_steps": 500, |
| "global_step": 58320, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "grad_norm": 658.7919311523438, |
| "learning_rate": 3.275034293552812e-05, |
| "loss": 4165.6836, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_cer": 0.12357620217499614, |
| "eval_loss": 257.47174072265625, |
| "eval_runtime": 52.781, |
| "eval_samples_per_second": 32.72, |
| "eval_steps_per_second": 4.092, |
| "eval_wer": 0.5061441757743926, |
| "step": 972 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 529.923583984375, |
| "learning_rate": 6.608367626886145e-05, |
| "loss": 711.8623, |
| "step": 1944 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_cer": 0.07524009139794183, |
| "eval_loss": 178.0095672607422, |
| "eval_runtime": 52.2311, |
| "eval_samples_per_second": 33.065, |
| "eval_steps_per_second": 4.135, |
| "eval_wer": 0.30018687354889856, |
| "step": 1944 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 597.9530639648438, |
| "learning_rate": 9.941700960219479e-05, |
| "loss": 536.9188, |
| "step": 2916 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_cer": 0.07335887436219012, |
| "eval_loss": 177.63063049316406, |
| "eval_runtime": 51.8439, |
| "eval_samples_per_second": 33.312, |
| "eval_steps_per_second": 4.166, |
| "eval_wer": 0.3027351492156974, |
| "step": 2916 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 74.35365295410156, |
| "learning_rate": 0.00013275034293552812, |
| "loss": 499.5686, |
| "step": 3888 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_cer": 0.06541309464497397, |
| "eval_loss": 160.8972625732422, |
| "eval_runtime": 52.9987, |
| "eval_samples_per_second": 32.586, |
| "eval_steps_per_second": 4.076, |
| "eval_wer": 0.2519961492723257, |
| "step": 3888 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 652.4288940429688, |
| "learning_rate": 0.00016608367626886144, |
| "loss": 489.7358, |
| "step": 4860 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_cer": 0.0774906798151425, |
| "eval_loss": 174.74952697753906, |
| "eval_runtime": 51.7437, |
| "eval_samples_per_second": 33.376, |
| "eval_steps_per_second": 4.174, |
| "eval_wer": 0.28999377088170336, |
| "step": 4860 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 275.6253356933594, |
| "learning_rate": 0.00019941700960219482, |
| "loss": 508.3668, |
| "step": 5832 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_cer": 0.07830673286718093, |
| "eval_loss": 193.48646545410156, |
| "eval_runtime": 80.1664, |
| "eval_samples_per_second": 21.543, |
| "eval_steps_per_second": 2.694, |
| "eval_wer": 0.29441078203748794, |
| "step": 5832 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 890.0252685546875, |
| "learning_rate": 0.00023275034293552814, |
| "loss": 537.328, |
| "step": 6804 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_cer": 0.08461181644819352, |
| "eval_loss": 230.1122589111328, |
| "eval_runtime": 52.6861, |
| "eval_samples_per_second": 32.779, |
| "eval_steps_per_second": 4.1, |
| "eval_wer": 0.31037997621609376, |
| "step": 6804 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 165.17703247070312, |
| "learning_rate": 0.00026608367626886146, |
| "loss": 553.6925, |
| "step": 7776 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_cer": 0.10000515401927604, |
| "eval_loss": 256.9770812988281, |
| "eval_runtime": 52.1076, |
| "eval_samples_per_second": 33.143, |
| "eval_steps_per_second": 4.145, |
| "eval_wer": 0.3464522339883346, |
| "step": 7776 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 932.0779418945312, |
| "learning_rate": 0.0002994170096021948, |
| "loss": 581.7494, |
| "step": 8748 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_cer": 0.10558867490164413, |
| "eval_loss": 254.68865966796875, |
| "eval_runtime": 52.1145, |
| "eval_samples_per_second": 33.139, |
| "eval_steps_per_second": 4.145, |
| "eval_wer": 0.3680276346338977, |
| "step": 8748 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 767.27197265625, |
| "learning_rate": 0.00033275034293552816, |
| "loss": 623.7695, |
| "step": 9720 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_cer": 0.1115415671654612, |
| "eval_loss": 268.8912658691406, |
| "eval_runtime": 52.2444, |
| "eval_samples_per_second": 33.056, |
| "eval_steps_per_second": 4.134, |
| "eval_wer": 0.38099552636049605, |
| "step": 9720 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 251.00375366210938, |
| "learning_rate": 0.00036608367626886145, |
| "loss": 652.2123, |
| "step": 10692 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_cer": 0.11990825845688663, |
| "eval_loss": 315.2755432128906, |
| "eval_runtime": 51.9211, |
| "eval_samples_per_second": 33.262, |
| "eval_steps_per_second": 4.16, |
| "eval_wer": 0.40659153972478623, |
| "step": 10692 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 208.37503051757812, |
| "learning_rate": 0.0003993827160493827, |
| "loss": 702.8481, |
| "step": 11664 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_cer": 0.12610167162025185, |
| "eval_loss": 314.96868896484375, |
| "eval_runtime": 53.0922, |
| "eval_samples_per_second": 32.528, |
| "eval_steps_per_second": 4.068, |
| "eval_wer": 0.4256186647035506, |
| "step": 11664 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 410.18218994140625, |
| "learning_rate": 0.000432681755829904, |
| "loss": 743.4121, |
| "step": 12636 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_cer": 0.16132939337193122, |
| "eval_loss": 311.7789611816406, |
| "eval_runtime": 52.4611, |
| "eval_samples_per_second": 32.92, |
| "eval_steps_per_second": 4.117, |
| "eval_wer": 0.5520697661249221, |
| "step": 12636 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 260.27325439453125, |
| "learning_rate": 0.0004660150891632373, |
| "loss": 799.5601, |
| "step": 13608 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_cer": 0.13851426804336248, |
| "eval_loss": 359.827880859375, |
| "eval_runtime": 52.426, |
| "eval_samples_per_second": 32.942, |
| "eval_steps_per_second": 4.12, |
| "eval_wer": 0.45455575060875475, |
| "step": 13608 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 272.1807556152344, |
| "learning_rate": 0.0004993484224965707, |
| "loss": 816.097, |
| "step": 14580 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_cer": 0.18363770680502345, |
| "eval_loss": 376.90533447265625, |
| "eval_runtime": 51.4749, |
| "eval_samples_per_second": 33.55, |
| "eval_steps_per_second": 4.196, |
| "eval_wer": 0.572059573022255, |
| "step": 14580 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 301.32891845703125, |
| "learning_rate": 0.0004891060813900321, |
| "loss": 815.892, |
| "step": 15552 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_cer": 0.1669558644149329, |
| "eval_loss": 378.6718444824219, |
| "eval_runtime": 51.8917, |
| "eval_samples_per_second": 33.281, |
| "eval_steps_per_second": 4.163, |
| "eval_wer": 0.534571606546237, |
| "step": 15552 |
| }, |
| { |
| "epoch": 17.0, |
| "grad_norm": 509.3703918457031, |
| "learning_rate": 0.00047799497027892093, |
| "loss": 803.2213, |
| "step": 16524 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_cer": 0.14506846255604997, |
| "eval_loss": 358.10638427734375, |
| "eval_runtime": 51.7272, |
| "eval_samples_per_second": 33.387, |
| "eval_steps_per_second": 4.176, |
| "eval_wer": 0.4638427997055326, |
| "step": 16524 |
| }, |
| { |
| "epoch": 18.0, |
| "grad_norm": 278.8409423828125, |
| "learning_rate": 0.0004668952903520805, |
| "loss": 783.7681, |
| "step": 17496 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_cer": 0.14475063136736133, |
| "eval_loss": 332.414794921875, |
| "eval_runtime": 51.8709, |
| "eval_samples_per_second": 33.294, |
| "eval_steps_per_second": 4.164, |
| "eval_wer": 0.4774902316099439, |
| "step": 17496 |
| }, |
| { |
| "epoch": 19.0, |
| "grad_norm": 225.4127960205078, |
| "learning_rate": 0.0004557841792409694, |
| "loss": 730.573, |
| "step": 18468 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_cer": 0.13914134038861306, |
| "eval_loss": 312.7887268066406, |
| "eval_runtime": 53.4549, |
| "eval_samples_per_second": 32.308, |
| "eval_steps_per_second": 4.041, |
| "eval_wer": 0.45478226400135907, |
| "step": 18468 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 144.8686981201172, |
| "learning_rate": 0.00044467306812985823, |
| "loss": 722.3447, |
| "step": 19440 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_cer": 0.1279485285274967, |
| "eval_loss": 316.9328918457031, |
| "eval_runtime": 51.7766, |
| "eval_samples_per_second": 33.355, |
| "eval_steps_per_second": 4.172, |
| "eval_wer": 0.42692111671102556, |
| "step": 19440 |
| }, |
| { |
| "epoch": 21.0, |
| "grad_norm": 164.9013214111328, |
| "learning_rate": 0.00043357338820301783, |
| "loss": 683.925, |
| "step": 20412 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_cer": 0.12733863624649955, |
| "eval_loss": 290.0935974121094, |
| "eval_runtime": 51.8702, |
| "eval_samples_per_second": 33.295, |
| "eval_steps_per_second": 4.164, |
| "eval_wer": 0.414462880117787, |
| "step": 20412 |
| }, |
| { |
| "epoch": 22.0, |
| "grad_norm": 61.14198684692383, |
| "learning_rate": 0.00042247370827617743, |
| "loss": 650.4165, |
| "step": 21384 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_cer": 0.11940144656141014, |
| "eval_loss": 288.4570007324219, |
| "eval_runtime": 51.3929, |
| "eval_samples_per_second": 33.604, |
| "eval_steps_per_second": 4.203, |
| "eval_wer": 0.39889008437623874, |
| "step": 21384 |
| }, |
| { |
| "epoch": 23.0, |
| "grad_norm": 462.71295166015625, |
| "learning_rate": 0.0004113625971650663, |
| "loss": 631.0532, |
| "step": 22356 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_cer": 0.12083598192657241, |
| "eval_loss": 271.81787109375, |
| "eval_runtime": 51.8764, |
| "eval_samples_per_second": 33.291, |
| "eval_steps_per_second": 4.164, |
| "eval_wer": 0.4042697774505918, |
| "step": 22356 |
| }, |
| { |
| "epoch": 24.0, |
| "grad_norm": 70.46085357666016, |
| "learning_rate": 0.0004002514860539552, |
| "loss": 617.0345, |
| "step": 23328 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_cer": 0.11391241603243596, |
| "eval_loss": 240.84332275390625, |
| "eval_runtime": 52.4166, |
| "eval_samples_per_second": 32.948, |
| "eval_steps_per_second": 4.121, |
| "eval_wer": 0.38796081318307946, |
| "step": 23328 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 116.7966079711914, |
| "learning_rate": 0.0003891403749428441, |
| "loss": 579.2521, |
| "step": 24300 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_cer": 0.11367189513288779, |
| "eval_loss": 261.6251525878906, |
| "eval_runtime": 51.9507, |
| "eval_samples_per_second": 33.243, |
| "eval_steps_per_second": 4.158, |
| "eval_wer": 0.3953791267908715, |
| "step": 24300 |
| }, |
| { |
| "epoch": 26.0, |
| "grad_norm": 43.10677719116211, |
| "learning_rate": 0.0003780406950160037, |
| "loss": 571.9061, |
| "step": 25272 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_cer": 0.10684281959214527, |
| "eval_loss": 248.2078094482422, |
| "eval_runtime": 52.057, |
| "eval_samples_per_second": 33.175, |
| "eval_steps_per_second": 4.149, |
| "eval_wer": 0.36508296053004136, |
| "step": 25272 |
| }, |
| { |
| "epoch": 27.0, |
| "grad_norm": 309.9073181152344, |
| "learning_rate": 0.0003669295839048926, |
| "loss": 535.8284, |
| "step": 26244 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_cer": 0.10169739034823991, |
| "eval_loss": 210.3215789794922, |
| "eval_runtime": 51.8198, |
| "eval_samples_per_second": 33.327, |
| "eval_steps_per_second": 4.168, |
| "eval_wer": 0.35488985786284616, |
| "step": 26244 |
| }, |
| { |
| "epoch": 28.0, |
| "grad_norm": 109.1835708618164, |
| "learning_rate": 0.00035581847279378143, |
| "loss": 528.5526, |
| "step": 27216 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_cer": 0.10048619581837236, |
| "eval_loss": 223.02474975585938, |
| "eval_runtime": 51.9491, |
| "eval_samples_per_second": 33.244, |
| "eval_steps_per_second": 4.158, |
| "eval_wer": 0.3553995129962059, |
| "step": 27216 |
| }, |
| { |
| "epoch": 29.0, |
| "grad_norm": 356.6168518066406, |
| "learning_rate": 0.0003447073616826703, |
| "loss": 495.6612, |
| "step": 28188 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_cer": 0.10109608809936949, |
| "eval_loss": 214.10452270507812, |
| "eval_runtime": 52.5972, |
| "eval_samples_per_second": 32.834, |
| "eval_steps_per_second": 4.107, |
| "eval_wer": 0.34214847952885213, |
| "step": 28188 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 93.94068908691406, |
| "learning_rate": 0.00033359625057155924, |
| "loss": 489.6183, |
| "step": 29160 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_cer": 0.09630285017265965, |
| "eval_loss": 223.9063720703125, |
| "eval_runtime": 52.76, |
| "eval_samples_per_second": 32.733, |
| "eval_steps_per_second": 4.094, |
| "eval_wer": 0.33014326972082225, |
| "step": 29160 |
| }, |
| { |
| "epoch": 31.0, |
| "grad_norm": 254.2984619140625, |
| "learning_rate": 0.0003224851394604481, |
| "loss": 465.3683, |
| "step": 30132 |
| }, |
| { |
| "epoch": 31.0, |
| "eval_cer": 0.09759994502379439, |
| "eval_loss": 204.1656951904297, |
| "eval_runtime": 51.6234, |
| "eval_samples_per_second": 33.454, |
| "eval_steps_per_second": 4.184, |
| "eval_wer": 0.3407893991732261, |
| "step": 30132 |
| }, |
| { |
| "epoch": 32.0, |
| "grad_norm": 90.14813995361328, |
| "learning_rate": 0.0003113854595336077, |
| "loss": 447.7585, |
| "step": 31104 |
| }, |
| { |
| "epoch": 32.0, |
| "eval_cer": 0.0901867472984349, |
| "eval_loss": 204.1836395263672, |
| "eval_runtime": 52.1105, |
| "eval_samples_per_second": 33.141, |
| "eval_steps_per_second": 4.145, |
| "eval_wer": 0.31474035902372727, |
| "step": 31104 |
| }, |
| { |
| "epoch": 33.0, |
| "grad_norm": 284.7950134277344, |
| "learning_rate": 0.0003002743484224966, |
| "loss": 430.5009, |
| "step": 32076 |
| }, |
| { |
| "epoch": 33.0, |
| "eval_cer": 0.09058188877626402, |
| "eval_loss": 199.83998107910156, |
| "eval_runtime": 51.7349, |
| "eval_samples_per_second": 33.382, |
| "eval_steps_per_second": 4.175, |
| "eval_wer": 0.3149102440681805, |
| "step": 32076 |
| }, |
| { |
| "epoch": 34.0, |
| "grad_norm": 120.24885559082031, |
| "learning_rate": 0.00028916323731138544, |
| "loss": 423.8555, |
| "step": 33048 |
| }, |
| { |
| "epoch": 34.0, |
| "eval_cer": 0.08400192416719639, |
| "eval_loss": 188.04661560058594, |
| "eval_runtime": 52.0605, |
| "eval_samples_per_second": 33.173, |
| "eval_steps_per_second": 4.149, |
| "eval_wer": 0.29390112690412823, |
| "step": 33048 |
| }, |
| { |
| "epoch": 35.0, |
| "grad_norm": 108.36172485351562, |
| "learning_rate": 0.0002780521262002744, |
| "loss": 409.8573, |
| "step": 34020 |
| }, |
| { |
| "epoch": 35.0, |
| "eval_cer": 0.08067758173415568, |
| "eval_loss": 204.4093017578125, |
| "eval_runtime": 51.291, |
| "eval_samples_per_second": 33.671, |
| "eval_steps_per_second": 4.211, |
| "eval_wer": 0.2826887139702135, |
| "step": 34020 |
| }, |
| { |
| "epoch": 36.0, |
| "grad_norm": 993.7814331054688, |
| "learning_rate": 0.00026694101508916324, |
| "loss": 385.368, |
| "step": 34992 |
| }, |
| { |
| "epoch": 36.0, |
| "eval_cer": 0.08328036146855189, |
| "eval_loss": 187.58653259277344, |
| "eval_runtime": 51.8038, |
| "eval_samples_per_second": 33.337, |
| "eval_steps_per_second": 4.17, |
| "eval_wer": 0.29106970949657396, |
| "step": 34992 |
| }, |
| { |
| "epoch": 37.0, |
| "grad_norm": 117.57772827148438, |
| "learning_rate": 0.00025584133516232284, |
| "loss": 372.0869, |
| "step": 35964 |
| }, |
| { |
| "epoch": 37.0, |
| "eval_cer": 0.07700104798391946, |
| "eval_loss": 181.96978759765625, |
| "eval_runtime": 52.5811, |
| "eval_samples_per_second": 32.844, |
| "eval_steps_per_second": 4.108, |
| "eval_wer": 0.27300526643637807, |
| "step": 35964 |
| }, |
| { |
| "epoch": 38.0, |
| "grad_norm": 16.713836669921875, |
| "learning_rate": 0.0002447302240512117, |
| "loss": 360.2271, |
| "step": 36936 |
| }, |
| { |
| "epoch": 38.0, |
| "eval_cer": 0.07677770714862474, |
| "eval_loss": 181.98971557617188, |
| "eval_runtime": 52.5466, |
| "eval_samples_per_second": 32.866, |
| "eval_steps_per_second": 4.111, |
| "eval_wer": 0.2687015119768956, |
| "step": 36936 |
| }, |
| { |
| "epoch": 39.0, |
| "grad_norm": 172.10801696777344, |
| "learning_rate": 0.0002336191129401006, |
| "loss": 340.9098, |
| "step": 37908 |
| }, |
| { |
| "epoch": 39.0, |
| "eval_cer": 0.07745631968663563, |
| "eval_loss": 188.7457733154297, |
| "eval_runtime": 52.2724, |
| "eval_samples_per_second": 33.038, |
| "eval_steps_per_second": 4.132, |
| "eval_wer": 0.26502066934707513, |
| "step": 37908 |
| }, |
| { |
| "epoch": 40.0, |
| "grad_norm": 556.4381713867188, |
| "learning_rate": 0.00022250800182898947, |
| "loss": 329.051, |
| "step": 38880 |
| }, |
| { |
| "epoch": 40.0, |
| "eval_cer": 0.07525727146219527, |
| "eval_loss": 165.11180114746094, |
| "eval_runtime": 51.7142, |
| "eval_samples_per_second": 33.395, |
| "eval_steps_per_second": 4.177, |
| "eval_wer": 0.26320856220624045, |
| "step": 38880 |
| }, |
| { |
| "epoch": 41.0, |
| "grad_norm": 9.856818199157715, |
| "learning_rate": 0.00021139689071787837, |
| "loss": 311.5036, |
| "step": 39852 |
| }, |
| { |
| "epoch": 41.0, |
| "eval_cer": 0.07304104317350146, |
| "eval_loss": 184.70675659179688, |
| "eval_runtime": 51.9748, |
| "eval_samples_per_second": 33.228, |
| "eval_steps_per_second": 4.156, |
| "eval_wer": 0.26309530550993826, |
| "step": 39852 |
| }, |
| { |
| "epoch": 42.0, |
| "grad_norm": 177.51455688476562, |
| "learning_rate": 0.00020028577960676727, |
| "loss": 300.7056, |
| "step": 40824 |
| }, |
| { |
| "epoch": 42.0, |
| "eval_cer": 0.07146906729431168, |
| "eval_loss": 169.64820861816406, |
| "eval_runtime": 70.852, |
| "eval_samples_per_second": 24.375, |
| "eval_steps_per_second": 3.049, |
| "eval_wer": 0.25029729882779317, |
| "step": 40824 |
| }, |
| { |
| "epoch": 43.0, |
| "grad_norm": 356.5669860839844, |
| "learning_rate": 0.00018918609967992684, |
| "loss": 293.3254, |
| "step": 41796 |
| }, |
| { |
| "epoch": 43.0, |
| "eval_cer": 0.0705413438246259, |
| "eval_loss": 156.13795471191406, |
| "eval_runtime": 52.1899, |
| "eval_samples_per_second": 33.091, |
| "eval_steps_per_second": 4.139, |
| "eval_wer": 0.24469109236083583, |
| "step": 41796 |
| }, |
| { |
| "epoch": 44.0, |
| "grad_norm": 59.47640609741211, |
| "learning_rate": 0.00017807498856881575, |
| "loss": 272.1289, |
| "step": 42768 |
| }, |
| { |
| "epoch": 44.0, |
| "eval_cer": 0.0682821653752985, |
| "eval_loss": 174.929931640625, |
| "eval_runtime": 52.4672, |
| "eval_samples_per_second": 32.916, |
| "eval_steps_per_second": 4.117, |
| "eval_wer": 0.2404439662495045, |
| "step": 42768 |
| }, |
| { |
| "epoch": 45.0, |
| "grad_norm": 62.990211486816406, |
| "learning_rate": 0.00016696387745770462, |
| "loss": 266.0369, |
| "step": 43740 |
| }, |
| { |
| "epoch": 45.0, |
| "eval_cer": 0.06732008177710584, |
| "eval_loss": 156.83799743652344, |
| "eval_runtime": 51.9519, |
| "eval_samples_per_second": 33.242, |
| "eval_steps_per_second": 4.158, |
| "eval_wer": 0.23681975196783508, |
| "step": 43740 |
| }, |
| { |
| "epoch": 46.0, |
| "grad_norm": 29.509681701660156, |
| "learning_rate": 0.0001558641975308642, |
| "loss": 251.5002, |
| "step": 44712 |
| }, |
| { |
| "epoch": 46.0, |
| "eval_cer": 0.06648684866081399, |
| "eval_loss": 170.41712951660156, |
| "eval_runtime": 51.371, |
| "eval_samples_per_second": 33.618, |
| "eval_steps_per_second": 4.205, |
| "eval_wer": 0.23512090152330256, |
| "step": 44712 |
| }, |
| { |
| "epoch": 47.0, |
| "grad_norm": 654.94677734375, |
| "learning_rate": 0.0001447530864197531, |
| "loss": 235.648, |
| "step": 45684 |
| }, |
| { |
| "epoch": 47.0, |
| "eval_cer": 0.06262133420378992, |
| "eval_loss": 159.82781982421875, |
| "eval_runtime": 51.498, |
| "eval_samples_per_second": 33.535, |
| "eval_steps_per_second": 4.194, |
| "eval_wer": 0.22271929327821507, |
| "step": 45684 |
| }, |
| { |
| "epoch": 48.0, |
| "grad_norm": 47.048065185546875, |
| "learning_rate": 0.00013364197530864197, |
| "loss": 229.8135, |
| "step": 46656 |
| }, |
| { |
| "epoch": 48.0, |
| "eval_cer": 0.06253543388252272, |
| "eval_loss": 163.2967071533203, |
| "eval_runtime": 55.2126, |
| "eval_samples_per_second": 31.279, |
| "eval_steps_per_second": 3.912, |
| "eval_wer": 0.21983124752250977, |
| "step": 46656 |
| }, |
| { |
| "epoch": 49.0, |
| "grad_norm": 149.44444274902344, |
| "learning_rate": 0.00012253086419753087, |
| "loss": 220.4877, |
| "step": 47628 |
| }, |
| { |
| "epoch": 49.0, |
| "eval_cer": 0.0611610287422475, |
| "eval_loss": 167.35101318359375, |
| "eval_runtime": 51.8387, |
| "eval_samples_per_second": 33.315, |
| "eval_steps_per_second": 4.167, |
| "eval_wer": 0.21382864261849482, |
| "step": 47628 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 6.606735706329346, |
| "learning_rate": 0.00011143118427069045, |
| "loss": 210.467, |
| "step": 48600 |
| }, |
| { |
| "epoch": 50.0, |
| "eval_cer": 0.061470269898809424, |
| "eval_loss": 163.87408447265625, |
| "eval_runtime": 52.7221, |
| "eval_samples_per_second": 32.757, |
| "eval_steps_per_second": 4.097, |
| "eval_wer": 0.21377201427034373, |
| "step": 48600 |
| }, |
| { |
| "epoch": 51.0, |
| "grad_norm": 181.8941192626953, |
| "learning_rate": 0.00010032007315957933, |
| "loss": 208.0692, |
| "step": 49572 |
| }, |
| { |
| "epoch": 51.0, |
| "eval_cer": 0.05868709948975209, |
| "eval_loss": 159.9131622314453, |
| "eval_runtime": 52.7472, |
| "eval_samples_per_second": 32.741, |
| "eval_steps_per_second": 4.095, |
| "eval_wer": 0.20482473526247238, |
| "step": 49572 |
| }, |
| { |
| "epoch": 52.0, |
| "grad_norm": 153.64923095703125, |
| "learning_rate": 8.920896204846822e-05, |
| "loss": 188.9099, |
| "step": 50544 |
| }, |
| { |
| "epoch": 52.0, |
| "eval_cer": 0.05963200302369131, |
| "eval_loss": 160.80023193359375, |
| "eval_runtime": 52.5438, |
| "eval_samples_per_second": 32.868, |
| "eval_steps_per_second": 4.111, |
| "eval_wer": 0.20958151650716347, |
| "step": 50544 |
| }, |
| { |
| "epoch": 53.0, |
| "grad_norm": 438.6473388671875, |
| "learning_rate": 7.809785093735711e-05, |
| "loss": 183.2089, |
| "step": 51516 |
| }, |
| { |
| "epoch": 53.0, |
| "eval_cer": 0.05639356091191781, |
| "eval_loss": 154.5007781982422, |
| "eval_runtime": 53.724, |
| "eval_samples_per_second": 32.146, |
| "eval_steps_per_second": 4.021, |
| "eval_wer": 0.1979160767880401, |
| "step": 51516 |
| }, |
| { |
| "epoch": 54.0, |
| "grad_norm": 41.24275207519531, |
| "learning_rate": 6.698673982624601e-05, |
| "loss": 174.8595, |
| "step": 52488 |
| }, |
| { |
| "epoch": 54.0, |
| "eval_cer": 0.055955469273455086, |
| "eval_loss": 157.23744201660156, |
| "eval_runtime": 54.2259, |
| "eval_samples_per_second": 31.848, |
| "eval_steps_per_second": 3.983, |
| "eval_wer": 0.1963871113879608, |
| "step": 52488 |
| }, |
| { |
| "epoch": 55.0, |
| "grad_norm": 1312.384033203125, |
| "learning_rate": 5.587562871513489e-05, |
| "loss": 167.0076, |
| "step": 53460 |
| }, |
| { |
| "epoch": 55.0, |
| "eval_cer": 0.05501915577164258, |
| "eval_loss": 157.28213500976562, |
| "eval_runtime": 53.1558, |
| "eval_samples_per_second": 32.489, |
| "eval_steps_per_second": 4.064, |
| "eval_wer": 0.19180021518772297, |
| "step": 53460 |
| }, |
| { |
| "epoch": 56.0, |
| "grad_norm": 87.78731536865234, |
| "learning_rate": 4.476451760402378e-05, |
| "loss": 162.867, |
| "step": 54432 |
| }, |
| { |
| "epoch": 56.0, |
| "eval_cer": 0.05392822169154913, |
| "eval_loss": 158.17630004882812, |
| "eval_runtime": 53.3929, |
| "eval_samples_per_second": 32.345, |
| "eval_steps_per_second": 4.045, |
| "eval_wer": 0.185118070105895, |
| "step": 54432 |
| }, |
| { |
| "epoch": 57.0, |
| "grad_norm": 1.752543330192566, |
| "learning_rate": 3.366483767718336e-05, |
| "loss": 154.225, |
| "step": 55404 |
| }, |
| { |
| "epoch": 57.0, |
| "eval_cer": 0.053189478928651196, |
| "eval_loss": 157.72227478027344, |
| "eval_runtime": 52.67, |
| "eval_samples_per_second": 32.789, |
| "eval_steps_per_second": 4.101, |
| "eval_wer": 0.18290956452800272, |
| "step": 55404 |
| }, |
| { |
| "epoch": 58.0, |
| "grad_norm": 115.63265228271484, |
| "learning_rate": 2.2565157750342938e-05, |
| "loss": 149.2202, |
| "step": 56376 |
| }, |
| { |
| "epoch": 58.0, |
| "eval_cer": 0.05277715738656862, |
| "eval_loss": 159.25308227539062, |
| "eval_runtime": 52.3955, |
| "eval_samples_per_second": 32.961, |
| "eval_steps_per_second": 4.122, |
| "eval_wer": 0.18183362591313212, |
| "step": 56376 |
| }, |
| { |
| "epoch": 59.0, |
| "grad_norm": 131.82754516601562, |
| "learning_rate": 1.1454046639231825e-05, |
| "loss": 144.8521, |
| "step": 57348 |
| }, |
| { |
| "epoch": 59.0, |
| "eval_cer": 0.052193035201951656, |
| "eval_loss": 158.10299682617188, |
| "eval_runtime": 52.2821, |
| "eval_samples_per_second": 33.032, |
| "eval_steps_per_second": 4.131, |
| "eval_wer": 0.1796251203352398, |
| "step": 57348 |
| }, |
| { |
| "epoch": 60.0, |
| "grad_norm": 105.3376693725586, |
| "learning_rate": 3.4293552812071326e-07, |
| "loss": 145.8063, |
| "step": 58320 |
| }, |
| { |
| "epoch": 60.0, |
| "eval_cer": 0.05190956414176989, |
| "eval_loss": 156.37322998046875, |
| "eval_runtime": 52.1099, |
| "eval_samples_per_second": 33.141, |
| "eval_steps_per_second": 4.145, |
| "eval_wer": 0.1786624384166714, |
| "step": 58320 |
| }, |
| { |
| "epoch": 60.0, |
| "step": 58320, |
| "total_flos": 4.979407221481362e+19, |
| "train_loss": 513.291294528035, |
| "train_runtime": 37892.4227, |
| "train_samples_per_second": 24.607, |
| "train_steps_per_second": 1.539 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 58320, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 60, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.979407221481362e+19, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|