| { | |
| "best_metric": 0.45059974747474746, | |
| "best_model_checkpoint": "/scratch/elec/puhe/p/palp3/sami_ASR/base_model_output/wav2vec2-base-sami-22k/outputs/v2/checkpoint-2160", | |
| "epoch": 60.0, | |
| "eval_steps": 500, | |
| "global_step": 64800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 72.47711944580078, | |
| "learning_rate": 3.280864197530864e-05, | |
| "loss": 3988.287, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_cer": 0.17220767672775775, | |
| "eval_loss": 303.8312072753906, | |
| "eval_runtime": 23.5046, | |
| "eval_samples_per_second": 37.865, | |
| "eval_steps_per_second": 4.765, | |
| "eval_wer": 0.5741792929292929, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 781.6192626953125, | |
| "learning_rate": 6.614197530864198e-05, | |
| "loss": 674.1274, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_cer": 0.14150570567603615, | |
| "eval_loss": 270.2951965332031, | |
| "eval_runtime": 22.9653, | |
| "eval_samples_per_second": 38.754, | |
| "eval_steps_per_second": 4.877, | |
| "eval_wer": 0.45059974747474746, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 70.77464294433594, | |
| "learning_rate": 9.94753086419753e-05, | |
| "loss": 517.9397, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_cer": 0.1630440152151361, | |
| "eval_loss": 339.81317138671875, | |
| "eval_runtime": 23.0091, | |
| "eval_samples_per_second": 38.68, | |
| "eval_steps_per_second": 4.868, | |
| "eval_wer": 0.4930555555555556, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 499.06243896484375, | |
| "learning_rate": 0.00013280864197530865, | |
| "loss": 478.4345, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_cer": 0.16909548979894284, | |
| "eval_loss": 292.6837463378906, | |
| "eval_runtime": 22.9042, | |
| "eval_samples_per_second": 38.857, | |
| "eval_steps_per_second": 4.89, | |
| "eval_wer": 0.4895833333333333, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 550.7621459960938, | |
| "learning_rate": 0.00016614197530864197, | |
| "loss": 476.3436, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_cer": 0.17993874425727413, | |
| "eval_loss": 331.53070068359375, | |
| "eval_runtime": 22.969, | |
| "eval_samples_per_second": 38.748, | |
| "eval_steps_per_second": 4.876, | |
| "eval_wer": 0.509469696969697, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 407.94061279296875, | |
| "learning_rate": 0.0001994753086419753, | |
| "loss": 507.3865, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_cer": 0.20058785753099836, | |
| "eval_loss": 359.2405090332031, | |
| "eval_runtime": 23.949, | |
| "eval_samples_per_second": 37.162, | |
| "eval_steps_per_second": 4.677, | |
| "eval_wer": 0.5620265151515151, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 248.67550659179688, | |
| "learning_rate": 0.00023280864197530864, | |
| "loss": 511.676, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_cer": 0.2163710912414168, | |
| "eval_loss": 406.4503479003906, | |
| "eval_runtime": 25.894, | |
| "eval_samples_per_second": 34.371, | |
| "eval_steps_per_second": 4.325, | |
| "eval_wer": 0.5880681818181818, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 398.6786193847656, | |
| "learning_rate": 0.00026614197530864197, | |
| "loss": 546.8947, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_cer": 0.2145185990218841, | |
| "eval_loss": 367.28216552734375, | |
| "eval_runtime": 24.1336, | |
| "eval_samples_per_second": 36.878, | |
| "eval_steps_per_second": 4.641, | |
| "eval_wer": 0.5834911616161617, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 268.1892395019531, | |
| "learning_rate": 0.0002994753086419753, | |
| "loss": 578.7039, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_cer": 0.25860791384676185, | |
| "eval_loss": 441.93701171875, | |
| "eval_runtime": 24.1196, | |
| "eval_samples_per_second": 36.899, | |
| "eval_steps_per_second": 4.644, | |
| "eval_wer": 0.6559343434343434, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 191.7284393310547, | |
| "learning_rate": 0.0003328086419753086, | |
| "loss": 610.5788, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_cer": 0.2608803043027219, | |
| "eval_loss": 454.6888427734375, | |
| "eval_runtime": 24.3544, | |
| "eval_samples_per_second": 36.544, | |
| "eval_steps_per_second": 4.599, | |
| "eval_wer": 0.6856060606060606, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 265.8825988769531, | |
| "learning_rate": 0.00036614197530864196, | |
| "loss": 658.402, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_cer": 0.30240082991651435, | |
| "eval_loss": 472.7101745605469, | |
| "eval_runtime": 24.2238, | |
| "eval_samples_per_second": 36.741, | |
| "eval_steps_per_second": 4.624, | |
| "eval_wer": 0.7457386363636364, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 260.43603515625, | |
| "learning_rate": 0.00039944444444444446, | |
| "loss": 692.8005, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_cer": 0.2891369856246604, | |
| "eval_loss": 474.2547607421875, | |
| "eval_runtime": 33.8391, | |
| "eval_samples_per_second": 26.301, | |
| "eval_steps_per_second": 3.31, | |
| "eval_wer": 0.6993371212121212, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 305.93817138671875, | |
| "learning_rate": 0.00043277777777777775, | |
| "loss": 726.6378, | |
| "step": 14040 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_cer": 0.2942251642543101, | |
| "eval_loss": 476.57232666015625, | |
| "eval_runtime": 25.299, | |
| "eval_samples_per_second": 35.179, | |
| "eval_steps_per_second": 4.427, | |
| "eval_wer": 0.7154356060606061, | |
| "step": 14040 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 124.7826919555664, | |
| "learning_rate": 0.00046608024691358025, | |
| "loss": 775.3879, | |
| "step": 15120 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_cer": 0.2997579410166477, | |
| "eval_loss": 469.1170959472656, | |
| "eval_runtime": 24.1494, | |
| "eval_samples_per_second": 36.854, | |
| "eval_steps_per_second": 4.638, | |
| "eval_wer": 0.7359532828282829, | |
| "step": 15120 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 18.523788452148438, | |
| "learning_rate": 0.0004994135802469135, | |
| "loss": 803.9686, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_cer": 0.30311712690806697, | |
| "eval_loss": 503.41363525390625, | |
| "eval_runtime": 24.0216, | |
| "eval_samples_per_second": 37.05, | |
| "eval_steps_per_second": 4.662, | |
| "eval_wer": 0.7706755050505051, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 951.2265014648438, | |
| "learning_rate": 0.0004891049382716049, | |
| "loss": 818.0579, | |
| "step": 17280 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_cer": 0.31324408437484563, | |
| "eval_loss": 544.9780883789062, | |
| "eval_runtime": 24.4651, | |
| "eval_samples_per_second": 36.378, | |
| "eval_steps_per_second": 4.578, | |
| "eval_wer": 0.7586805555555556, | |
| "step": 17280 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 137.27139282226562, | |
| "learning_rate": 0.0004779938271604938, | |
| "loss": 808.2149, | |
| "step": 18360 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_cer": 0.30148693375487823, | |
| "eval_loss": 493.0830078125, | |
| "eval_runtime": 23.7982, | |
| "eval_samples_per_second": 37.398, | |
| "eval_steps_per_second": 4.706, | |
| "eval_wer": 0.7395833333333334, | |
| "step": 18360 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 212.45159912109375, | |
| "learning_rate": 0.00046689300411522635, | |
| "loss": 767.5317, | |
| "step": 19440 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_cer": 0.3046238205799536, | |
| "eval_loss": 527.8341064453125, | |
| "eval_runtime": 25.0889, | |
| "eval_samples_per_second": 35.474, | |
| "eval_steps_per_second": 4.464, | |
| "eval_wer": 0.7296401515151515, | |
| "step": 19440 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 351.9056701660156, | |
| "learning_rate": 0.0004557818930041152, | |
| "loss": 739.8194, | |
| "step": 20520 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_cer": 0.30847700439658154, | |
| "eval_loss": 500.3179016113281, | |
| "eval_runtime": 24.165, | |
| "eval_samples_per_second": 36.83, | |
| "eval_steps_per_second": 4.635, | |
| "eval_wer": 0.7558396464646465, | |
| "step": 20520 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 279.2209167480469, | |
| "learning_rate": 0.00044467078189300416, | |
| "loss": 716.691, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_cer": 0.2984241466185842, | |
| "eval_loss": 545.5074462890625, | |
| "eval_runtime": 23.6519, | |
| "eval_samples_per_second": 37.629, | |
| "eval_steps_per_second": 4.735, | |
| "eval_wer": 0.7234848484848485, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "grad_norm": 1185.030029296875, | |
| "learning_rate": 0.00043356995884773664, | |
| "loss": 682.661, | |
| "step": 22680 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_cer": 0.29002618189003604, | |
| "eval_loss": 516.1239013671875, | |
| "eval_runtime": 23.2998, | |
| "eval_samples_per_second": 38.198, | |
| "eval_steps_per_second": 4.807, | |
| "eval_wer": 0.751104797979798, | |
| "step": 22680 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "grad_norm": 240.5199737548828, | |
| "learning_rate": 0.00042245884773662554, | |
| "loss": 657.0491, | |
| "step": 23760 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_cer": 0.2775774341747765, | |
| "eval_loss": 549.7003784179688, | |
| "eval_runtime": 24.7473, | |
| "eval_samples_per_second": 35.964, | |
| "eval_steps_per_second": 4.526, | |
| "eval_wer": 0.6968118686868687, | |
| "step": 23760 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "grad_norm": 62.15650939941406, | |
| "learning_rate": 0.0004113477366255144, | |
| "loss": 629.1355, | |
| "step": 24840 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_cer": 0.28081312058489355, | |
| "eval_loss": 500.3792724609375, | |
| "eval_runtime": 23.2668, | |
| "eval_samples_per_second": 38.252, | |
| "eval_steps_per_second": 4.814, | |
| "eval_wer": 0.6974431818181818, | |
| "step": 24840 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "grad_norm": 132.52377319335938, | |
| "learning_rate": 0.00040023662551440334, | |
| "loss": 607.2812, | |
| "step": 25920 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_cer": 0.26996986612656226, | |
| "eval_loss": 528.1495971679688, | |
| "eval_runtime": 24.3686, | |
| "eval_samples_per_second": 36.522, | |
| "eval_steps_per_second": 4.596, | |
| "eval_wer": 0.695864898989899, | |
| "step": 25920 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 199.2706756591797, | |
| "learning_rate": 0.0003891255144032922, | |
| "loss": 595.4605, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_cer": 0.2833819097959789, | |
| "eval_loss": 495.3538513183594, | |
| "eval_runtime": 23.1173, | |
| "eval_samples_per_second": 38.499, | |
| "eval_steps_per_second": 4.845, | |
| "eval_wer": 0.7015467171717171, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "grad_norm": 902.6602172851562, | |
| "learning_rate": 0.0003780246913580247, | |
| "loss": 555.9978, | |
| "step": 28080 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_cer": 0.2781702316850269, | |
| "eval_loss": 500.2841491699219, | |
| "eval_runtime": 23.4702, | |
| "eval_samples_per_second": 37.92, | |
| "eval_steps_per_second": 4.772, | |
| "eval_wer": 0.7070707070707071, | |
| "step": 28080 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "grad_norm": 215.98854064941406, | |
| "learning_rate": 0.0003669135802469136, | |
| "loss": 544.9409, | |
| "step": 29160 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_cer": 0.2840488069950106, | |
| "eval_loss": 476.8066711425781, | |
| "eval_runtime": 23.3103, | |
| "eval_samples_per_second": 38.181, | |
| "eval_steps_per_second": 4.805, | |
| "eval_wer": 0.7075441919191919, | |
| "step": 29160 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "grad_norm": 149.82615661621094, | |
| "learning_rate": 0.0003558024691358025, | |
| "loss": 517.4491, | |
| "step": 30240 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_cer": 0.2703403645704688, | |
| "eval_loss": 513.64892578125, | |
| "eval_runtime": 23.5863, | |
| "eval_samples_per_second": 37.734, | |
| "eval_steps_per_second": 4.749, | |
| "eval_wer": 0.6824494949494949, | |
| "step": 30240 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "grad_norm": 158.89735412597656, | |
| "learning_rate": 0.000344701646090535, | |
| "loss": 502.3091, | |
| "step": 31320 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_cer": 0.26236229807834804, | |
| "eval_loss": 450.82098388671875, | |
| "eval_runtime": 25.6603, | |
| "eval_samples_per_second": 34.684, | |
| "eval_steps_per_second": 4.365, | |
| "eval_wer": 0.6879734848484849, | |
| "step": 31320 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 51.25532913208008, | |
| "learning_rate": 0.00033359053497942386, | |
| "loss": 477.324, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_cer": 0.2615719013980141, | |
| "eval_loss": 469.6161804199219, | |
| "eval_runtime": 23.2095, | |
| "eval_samples_per_second": 38.346, | |
| "eval_steps_per_second": 4.826, | |
| "eval_wer": 0.65625, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "grad_norm": 70.39710235595703, | |
| "learning_rate": 0.00032247942386831276, | |
| "loss": 461.2854, | |
| "step": 33480 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_cer": 0.24838215679494147, | |
| "eval_loss": 480.2809753417969, | |
| "eval_runtime": 25.0487, | |
| "eval_samples_per_second": 35.531, | |
| "eval_steps_per_second": 4.471, | |
| "eval_wer": 0.6639835858585859, | |
| "step": 33480 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "grad_norm": 117.32414245605469, | |
| "learning_rate": 0.00031136831275720167, | |
| "loss": 452.682, | |
| "step": 34560 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_cer": 0.26517808625203776, | |
| "eval_loss": 477.9761962890625, | |
| "eval_runtime": 23.3634, | |
| "eval_samples_per_second": 38.094, | |
| "eval_steps_per_second": 4.794, | |
| "eval_wer": 0.6638257575757576, | |
| "step": 34560 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "grad_norm": 3.261843204498291, | |
| "learning_rate": 0.00030025720164609057, | |
| "loss": 424.353, | |
| "step": 35640 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_cer": 0.25198834164896505, | |
| "eval_loss": 444.65106201171875, | |
| "eval_runtime": 23.2973, | |
| "eval_samples_per_second": 38.202, | |
| "eval_steps_per_second": 4.807, | |
| "eval_wer": 0.6532512626262627, | |
| "step": 35640 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "grad_norm": 66.93054962158203, | |
| "learning_rate": 0.00028915637860082305, | |
| "loss": 417.6179, | |
| "step": 36720 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_cer": 0.25255643926295507, | |
| "eval_loss": 412.5328674316406, | |
| "eval_runtime": 23.4814, | |
| "eval_samples_per_second": 37.902, | |
| "eval_steps_per_second": 4.77, | |
| "eval_wer": 0.6504103535353535, | |
| "step": 36720 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "grad_norm": 53.988014221191406, | |
| "learning_rate": 0.00027804526748971195, | |
| "loss": 389.705, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_cer": 0.26332559403250505, | |
| "eval_loss": 485.3770446777344, | |
| "eval_runtime": 24.4441, | |
| "eval_samples_per_second": 36.41, | |
| "eval_steps_per_second": 4.582, | |
| "eval_wer": 0.6744002525252525, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "grad_norm": 106.55242156982422, | |
| "learning_rate": 0.00026694444444444443, | |
| "loss": 375.7767, | |
| "step": 38880 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_cer": 0.2663883811687991, | |
| "eval_loss": 467.3829345703125, | |
| "eval_runtime": 25.0973, | |
| "eval_samples_per_second": 35.462, | |
| "eval_steps_per_second": 4.463, | |
| "eval_wer": 0.6474116161616161, | |
| "step": 38880 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "grad_norm": 42.98028564453125, | |
| "learning_rate": 0.00025583333333333334, | |
| "loss": 361.8829, | |
| "step": 39960 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_cer": 0.2516672429975794, | |
| "eval_loss": 469.9674377441406, | |
| "eval_runtime": 23.3905, | |
| "eval_samples_per_second": 38.05, | |
| "eval_steps_per_second": 4.788, | |
| "eval_wer": 0.631155303030303, | |
| "step": 39960 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "grad_norm": 509.82037353515625, | |
| "learning_rate": 0.00024472222222222224, | |
| "loss": 352.311, | |
| "step": 41040 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_cer": 0.2544830311712691, | |
| "eval_loss": 457.0285339355469, | |
| "eval_runtime": 23.3717, | |
| "eval_samples_per_second": 38.08, | |
| "eval_steps_per_second": 4.792, | |
| "eval_wer": 0.6494633838383839, | |
| "step": 41040 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "grad_norm": 173.2177276611328, | |
| "learning_rate": 0.00023363168724279835, | |
| "loss": 340.1846, | |
| "step": 42120 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_cer": 0.24615916613150224, | |
| "eval_loss": 463.1924743652344, | |
| "eval_runtime": 23.316, | |
| "eval_samples_per_second": 38.171, | |
| "eval_steps_per_second": 4.804, | |
| "eval_wer": 0.634469696969697, | |
| "step": 42120 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "grad_norm": 435.5245361328125, | |
| "learning_rate": 0.00022252057613168726, | |
| "loss": 323.3272, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_cer": 0.23939139455614286, | |
| "eval_loss": 421.072509765625, | |
| "eval_runtime": 23.2659, | |
| "eval_samples_per_second": 38.253, | |
| "eval_steps_per_second": 4.814, | |
| "eval_wer": 0.6171085858585859, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "grad_norm": 286.759765625, | |
| "learning_rate": 0.00021141975308641977, | |
| "loss": 312.6201, | |
| "step": 44280 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_cer": 0.23835399891320455, | |
| "eval_loss": 443.36468505859375, | |
| "eval_runtime": 23.0069, | |
| "eval_samples_per_second": 38.684, | |
| "eval_steps_per_second": 4.868, | |
| "eval_wer": 0.6201073232323232, | |
| "step": 44280 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "grad_norm": 305.650390625, | |
| "learning_rate": 0.00020030864197530864, | |
| "loss": 301.6251, | |
| "step": 45360 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_cer": 0.23499481302178532, | |
| "eval_loss": 429.3775634765625, | |
| "eval_runtime": 25.784, | |
| "eval_samples_per_second": 34.517, | |
| "eval_steps_per_second": 4.344, | |
| "eval_wer": 0.610479797979798, | |
| "step": 45360 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "grad_norm": 1.2366968393325806, | |
| "learning_rate": 0.00018919753086419752, | |
| "loss": 284.7902, | |
| "step": 46440 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_cer": 0.23210492515931433, | |
| "eval_loss": 466.15533447265625, | |
| "eval_runtime": 22.7914, | |
| "eval_samples_per_second": 39.05, | |
| "eval_steps_per_second": 4.914, | |
| "eval_wer": 0.602114898989899, | |
| "step": 46440 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "grad_norm": 194.98329162597656, | |
| "learning_rate": 0.00017809670781893002, | |
| "loss": 279.8459, | |
| "step": 47520 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_cer": 0.23185792619670997, | |
| "eval_loss": 487.2147521972656, | |
| "eval_runtime": 23.4301, | |
| "eval_samples_per_second": 37.985, | |
| "eval_steps_per_second": 4.78, | |
| "eval_wer": 0.6161616161616161, | |
| "step": 47520 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "grad_norm": 80.29993438720703, | |
| "learning_rate": 0.00016698559670781893, | |
| "loss": 260.5616, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_cer": 0.2306476312799486, | |
| "eval_loss": 445.4757080078125, | |
| "eval_runtime": 23.0166, | |
| "eval_samples_per_second": 38.668, | |
| "eval_steps_per_second": 4.866, | |
| "eval_wer": 0.6022727272727273, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "grad_norm": 64.79682159423828, | |
| "learning_rate": 0.00015587448559670783, | |
| "loss": 254.3347, | |
| "step": 49680 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_cer": 0.2392184952823198, | |
| "eval_loss": 439.6965026855469, | |
| "eval_runtime": 22.7648, | |
| "eval_samples_per_second": 39.095, | |
| "eval_steps_per_second": 4.92, | |
| "eval_wer": 0.6054292929292929, | |
| "step": 49680 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "grad_norm": 124.6217269897461, | |
| "learning_rate": 0.0001447633744855967, | |
| "loss": 244.043, | |
| "step": 50760 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_cer": 0.23166032702662648, | |
| "eval_loss": 459.5867919921875, | |
| "eval_runtime": 22.7729, | |
| "eval_samples_per_second": 39.081, | |
| "eval_steps_per_second": 4.918, | |
| "eval_wer": 0.5885416666666666, | |
| "step": 50760 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "grad_norm": 3.2691023349761963, | |
| "learning_rate": 0.0001336522633744856, | |
| "loss": 227.4755, | |
| "step": 51840 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_cer": 0.2307711307612508, | |
| "eval_loss": 492.8036804199219, | |
| "eval_runtime": 24.8308, | |
| "eval_samples_per_second": 35.843, | |
| "eval_steps_per_second": 4.511, | |
| "eval_wer": 0.6002209595959596, | |
| "step": 51840 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "grad_norm": 4.506575107574463, | |
| "learning_rate": 0.00012254115226337448, | |
| "loss": 216.7, | |
| "step": 52920 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "eval_cer": 0.2282764412389468, | |
| "eval_loss": 452.6693115234375, | |
| "eval_runtime": 23.3003, | |
| "eval_samples_per_second": 38.197, | |
| "eval_steps_per_second": 4.807, | |
| "eval_wer": 0.5934343434343434, | |
| "step": 52920 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "grad_norm": 296.6214294433594, | |
| "learning_rate": 0.00011143004115226338, | |
| "loss": 211.8976, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_cer": 0.22879513906041596, | |
| "eval_loss": 482.38861083984375, | |
| "eval_runtime": 23.149, | |
| "eval_samples_per_second": 38.447, | |
| "eval_steps_per_second": 4.838, | |
| "eval_wer": 0.5946969696969697, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 51.0, | |
| "grad_norm": 4.26457405090332, | |
| "learning_rate": 0.00010031893004115227, | |
| "loss": 202.0287, | |
| "step": 55080 | |
| }, | |
| { | |
| "epoch": 51.0, | |
| "eval_cer": 0.2352665118806501, | |
| "eval_loss": 475.8258361816406, | |
| "eval_runtime": 22.9599, | |
| "eval_samples_per_second": 38.763, | |
| "eval_steps_per_second": 4.878, | |
| "eval_wer": 0.6052714646464646, | |
| "step": 55080 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "grad_norm": 203.13275146484375, | |
| "learning_rate": 8.920781893004115e-05, | |
| "loss": 186.2731, | |
| "step": 56160 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_cer": 0.23109222941263646, | |
| "eval_loss": 465.3925476074219, | |
| "eval_runtime": 24.1264, | |
| "eval_samples_per_second": 36.889, | |
| "eval_steps_per_second": 4.642, | |
| "eval_wer": 0.5907512626262627, | |
| "step": 56160 | |
| }, | |
| { | |
| "epoch": 53.0, | |
| "grad_norm": 295.8645935058594, | |
| "learning_rate": 7.809670781893003e-05, | |
| "loss": 187.1888, | |
| "step": 57240 | |
| }, | |
| { | |
| "epoch": 53.0, | |
| "eval_cer": 0.2247443560737045, | |
| "eval_loss": 459.6522216796875, | |
| "eval_runtime": 23.8259, | |
| "eval_samples_per_second": 37.354, | |
| "eval_steps_per_second": 4.701, | |
| "eval_wer": 0.5890151515151515, | |
| "step": 57240 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "grad_norm": 2880.46630859375, | |
| "learning_rate": 6.700617283950618e-05, | |
| "loss": 179.0453, | |
| "step": 58320 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "eval_cer": 0.22434915773353753, | |
| "eval_loss": 473.7304382324219, | |
| "eval_runtime": 23.3348, | |
| "eval_samples_per_second": 38.14, | |
| "eval_steps_per_second": 4.8, | |
| "eval_wer": 0.5789141414141414, | |
| "step": 58320 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "grad_norm": 42.78361129760742, | |
| "learning_rate": 5.5895061728395066e-05, | |
| "loss": 165.2614, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "eval_cer": 0.22375636022328707, | |
| "eval_loss": 453.96917724609375, | |
| "eval_runtime": 26.3542, | |
| "eval_samples_per_second": 33.771, | |
| "eval_steps_per_second": 4.25, | |
| "eval_wer": 0.5787563131313131, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "grad_norm": 187.145263671875, | |
| "learning_rate": 4.478395061728395e-05, | |
| "loss": 160.4416, | |
| "step": 60480 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_cer": 0.22118757101220174, | |
| "eval_loss": 474.8050537109375, | |
| "eval_runtime": 23.2203, | |
| "eval_samples_per_second": 38.329, | |
| "eval_steps_per_second": 4.823, | |
| "eval_wer": 0.5732323232323232, | |
| "step": 60480 | |
| }, | |
| { | |
| "epoch": 57.0, | |
| "grad_norm": 83.02323150634766, | |
| "learning_rate": 3.367283950617284e-05, | |
| "loss": 153.8781, | |
| "step": 61560 | |
| }, | |
| { | |
| "epoch": 57.0, | |
| "eval_cer": 0.22015017536926346, | |
| "eval_loss": 478.45806884765625, | |
| "eval_runtime": 23.1723, | |
| "eval_samples_per_second": 38.408, | |
| "eval_steps_per_second": 4.833, | |
| "eval_wer": 0.5729166666666666, | |
| "step": 61560 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "grad_norm": 0.9642492532730103, | |
| "learning_rate": 2.2582304526748973e-05, | |
| "loss": 151.1706, | |
| "step": 62640 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "eval_cer": 0.21960677765153386, | |
| "eval_loss": 467.0158386230469, | |
| "eval_runtime": 23.0956, | |
| "eval_samples_per_second": 38.535, | |
| "eval_steps_per_second": 4.849, | |
| "eval_wer": 0.5688131313131313, | |
| "step": 62640 | |
| }, | |
| { | |
| "epoch": 59.0, | |
| "grad_norm": 721.9008178710938, | |
| "learning_rate": 1.147119341563786e-05, | |
| "loss": 147.0876, | |
| "step": 63720 | |
| }, | |
| { | |
| "epoch": 59.0, | |
| "eval_cer": 0.21600059279751024, | |
| "eval_loss": 474.2251892089844, | |
| "eval_runtime": 22.9681, | |
| "eval_samples_per_second": 38.749, | |
| "eval_steps_per_second": 4.876, | |
| "eval_wer": 0.5602904040404041, | |
| "step": 63720 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "grad_norm": 8.912408828735352, | |
| "learning_rate": 3.60082304526749e-07, | |
| "loss": 143.0797, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_cer": 0.2167909894778442, | |
| "eval_loss": 469.5599060058594, | |
| "eval_runtime": 23.6755, | |
| "eval_samples_per_second": 37.592, | |
| "eval_steps_per_second": 4.731, | |
| "eval_wer": 0.5640782828282829, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "step": 64800, | |
| "total_flos": 5.518391525676315e+19, | |
| "train_loss": 505.59943335262346, | |
| "train_runtime": 39665.0417, | |
| "train_samples_per_second": 26.119, | |
| "train_steps_per_second": 1.634 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 64800, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 60, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.518391525676315e+19, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |