lightning_cycle_3 / checkpoint-5000 /trainer_state.json
AymanMansour's picture
Upload folder using huggingface_hub
5e72e30 verified
{
"best_global_step": 1000,
"best_metric": 57.38794435857806,
"best_model_checkpoint": "./whisper-small-sdn-2025/asr_training_runs/cycle_3/checkpoint-1000",
"epoch": 8.460236886632826,
"eval_steps": 1000,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04230118443316413,
"grad_norm": 22.13687515258789,
"learning_rate": 9.960000000000001e-06,
"loss": 2.9507,
"step": 25
},
{
"epoch": 0.08460236886632826,
"grad_norm": 22.51654815673828,
"learning_rate": 9.91e-06,
"loss": 1.7271,
"step": 50
},
{
"epoch": 0.12690355329949238,
"grad_norm": 23.82666015625,
"learning_rate": 9.86e-06,
"loss": 1.5723,
"step": 75
},
{
"epoch": 0.1692047377326565,
"grad_norm": 20.517061233520508,
"learning_rate": 9.810000000000001e-06,
"loss": 1.4073,
"step": 100
},
{
"epoch": 0.21150592216582065,
"grad_norm": 20.304147720336914,
"learning_rate": 9.760000000000001e-06,
"loss": 1.3958,
"step": 125
},
{
"epoch": 0.25380710659898476,
"grad_norm": 18.278545379638672,
"learning_rate": 9.71e-06,
"loss": 1.2917,
"step": 150
},
{
"epoch": 0.2961082910321489,
"grad_norm": 12.573765754699707,
"learning_rate": 9.66e-06,
"loss": 1.0512,
"step": 175
},
{
"epoch": 0.338409475465313,
"grad_norm": 19.825847625732422,
"learning_rate": 9.610000000000001e-06,
"loss": 1.0473,
"step": 200
},
{
"epoch": 0.38071065989847713,
"grad_norm": 20.143924713134766,
"learning_rate": 9.56e-06,
"loss": 0.943,
"step": 225
},
{
"epoch": 0.4230118443316413,
"grad_norm": 23.05994415283203,
"learning_rate": 9.51e-06,
"loss": 0.9658,
"step": 250
},
{
"epoch": 0.4653130287648054,
"grad_norm": 19.993017196655273,
"learning_rate": 9.460000000000001e-06,
"loss": 1.0455,
"step": 275
},
{
"epoch": 0.5076142131979695,
"grad_norm": 21.776580810546875,
"learning_rate": 9.41e-06,
"loss": 0.9146,
"step": 300
},
{
"epoch": 0.5499153976311336,
"grad_norm": 23.989397048950195,
"learning_rate": 9.360000000000002e-06,
"loss": 0.8029,
"step": 325
},
{
"epoch": 0.5922165820642978,
"grad_norm": 12.74024772644043,
"learning_rate": 9.31e-06,
"loss": 0.9284,
"step": 350
},
{
"epoch": 0.6345177664974619,
"grad_norm": 15.003218650817871,
"learning_rate": 9.260000000000001e-06,
"loss": 0.8649,
"step": 375
},
{
"epoch": 0.676818950930626,
"grad_norm": 18.662107467651367,
"learning_rate": 9.210000000000002e-06,
"loss": 0.8373,
"step": 400
},
{
"epoch": 0.7191201353637902,
"grad_norm": 17.71957015991211,
"learning_rate": 9.16e-06,
"loss": 0.8147,
"step": 425
},
{
"epoch": 0.7614213197969543,
"grad_norm": 13.40419864654541,
"learning_rate": 9.110000000000001e-06,
"loss": 0.918,
"step": 450
},
{
"epoch": 0.8037225042301185,
"grad_norm": 21.269617080688477,
"learning_rate": 9.060000000000001e-06,
"loss": 0.8491,
"step": 475
},
{
"epoch": 0.8460236886632826,
"grad_norm": 22.920703887939453,
"learning_rate": 9.01e-06,
"loss": 0.7514,
"step": 500
},
{
"epoch": 0.8883248730964467,
"grad_norm": 14.403526306152344,
"learning_rate": 8.96e-06,
"loss": 0.7853,
"step": 525
},
{
"epoch": 0.9306260575296108,
"grad_norm": 19.039899826049805,
"learning_rate": 8.910000000000001e-06,
"loss": 0.8512,
"step": 550
},
{
"epoch": 0.9729272419627749,
"grad_norm": 14.437999725341797,
"learning_rate": 8.860000000000002e-06,
"loss": 0.7155,
"step": 575
},
{
"epoch": 1.015228426395939,
"grad_norm": 11.453521728515625,
"learning_rate": 8.81e-06,
"loss": 0.7486,
"step": 600
},
{
"epoch": 1.0575296108291032,
"grad_norm": 14.921658515930176,
"learning_rate": 8.76e-06,
"loss": 0.4925,
"step": 625
},
{
"epoch": 1.0998307952622675,
"grad_norm": 13.061800003051758,
"learning_rate": 8.710000000000001e-06,
"loss": 0.5556,
"step": 650
},
{
"epoch": 1.1421319796954315,
"grad_norm": 19.10222816467285,
"learning_rate": 8.66e-06,
"loss": 0.6538,
"step": 675
},
{
"epoch": 1.1844331641285957,
"grad_norm": 13.2350435256958,
"learning_rate": 8.61e-06,
"loss": 0.5601,
"step": 700
},
{
"epoch": 1.2267343485617597,
"grad_norm": 11.798190116882324,
"learning_rate": 8.560000000000001e-06,
"loss": 0.5892,
"step": 725
},
{
"epoch": 1.2690355329949239,
"grad_norm": 9.492058753967285,
"learning_rate": 8.51e-06,
"loss": 0.5997,
"step": 750
},
{
"epoch": 1.3113367174280879,
"grad_norm": 9.729185104370117,
"learning_rate": 8.46e-06,
"loss": 0.5505,
"step": 775
},
{
"epoch": 1.353637901861252,
"grad_norm": 15.062725067138672,
"learning_rate": 8.41e-06,
"loss": 0.5853,
"step": 800
},
{
"epoch": 1.3959390862944163,
"grad_norm": 15.928922653198242,
"learning_rate": 8.36e-06,
"loss": 0.5996,
"step": 825
},
{
"epoch": 1.4382402707275803,
"grad_norm": 10.66003131866455,
"learning_rate": 8.31e-06,
"loss": 0.5575,
"step": 850
},
{
"epoch": 1.4805414551607445,
"grad_norm": 10.623987197875977,
"learning_rate": 8.26e-06,
"loss": 0.6829,
"step": 875
},
{
"epoch": 1.5228426395939088,
"grad_norm": 12.859821319580078,
"learning_rate": 8.210000000000001e-06,
"loss": 0.5767,
"step": 900
},
{
"epoch": 1.5651438240270727,
"grad_norm": 10.011828422546387,
"learning_rate": 8.16e-06,
"loss": 0.5729,
"step": 925
},
{
"epoch": 1.6074450084602367,
"grad_norm": 9.297901153564453,
"learning_rate": 8.110000000000002e-06,
"loss": 0.4935,
"step": 950
},
{
"epoch": 1.649746192893401,
"grad_norm": 12.029204368591309,
"learning_rate": 8.06e-06,
"loss": 0.6019,
"step": 975
},
{
"epoch": 1.6920473773265652,
"grad_norm": 15.14245891571045,
"learning_rate": 8.010000000000001e-06,
"loss": 0.5357,
"step": 1000
},
{
"epoch": 1.6920473773265652,
"eval_loss": 0.7350032329559326,
"eval_runtime": 267.1085,
"eval_samples_per_second": 1.965,
"eval_steps_per_second": 0.247,
"eval_wer": 57.38794435857806,
"step": 1000
},
{
"epoch": 1.7343485617597292,
"grad_norm": 14.227155685424805,
"learning_rate": 7.960000000000002e-06,
"loss": 0.59,
"step": 1025
},
{
"epoch": 1.7766497461928934,
"grad_norm": 18.895673751831055,
"learning_rate": 7.91e-06,
"loss": 0.5216,
"step": 1050
},
{
"epoch": 1.8189509306260576,
"grad_norm": 9.782660484313965,
"learning_rate": 7.860000000000001e-06,
"loss": 0.5936,
"step": 1075
},
{
"epoch": 1.8612521150592216,
"grad_norm": 12.076578140258789,
"learning_rate": 7.810000000000001e-06,
"loss": 0.5369,
"step": 1100
},
{
"epoch": 1.9035532994923858,
"grad_norm": 15.570003509521484,
"learning_rate": 7.76e-06,
"loss": 0.4853,
"step": 1125
},
{
"epoch": 1.94585448392555,
"grad_norm": 12.804927825927734,
"learning_rate": 7.71e-06,
"loss": 0.5245,
"step": 1150
},
{
"epoch": 1.988155668358714,
"grad_norm": 12.23946475982666,
"learning_rate": 7.660000000000001e-06,
"loss": 0.6461,
"step": 1175
},
{
"epoch": 2.030456852791878,
"grad_norm": 9.39681625366211,
"learning_rate": 7.610000000000001e-06,
"loss": 0.448,
"step": 1200
},
{
"epoch": 2.0727580372250425,
"grad_norm": 17.55742073059082,
"learning_rate": 7.5600000000000005e-06,
"loss": 0.2919,
"step": 1225
},
{
"epoch": 2.1150592216582065,
"grad_norm": 17.609060287475586,
"learning_rate": 7.510000000000001e-06,
"loss": 0.34,
"step": 1250
},
{
"epoch": 2.1573604060913705,
"grad_norm": 7.253561973571777,
"learning_rate": 7.4600000000000006e-06,
"loss": 0.3617,
"step": 1275
},
{
"epoch": 2.199661590524535,
"grad_norm": 9.8511381149292,
"learning_rate": 7.41e-06,
"loss": 0.3066,
"step": 1300
},
{
"epoch": 2.241962774957699,
"grad_norm": 6.805290222167969,
"learning_rate": 7.360000000000001e-06,
"loss": 0.3451,
"step": 1325
},
{
"epoch": 2.284263959390863,
"grad_norm": 12.156089782714844,
"learning_rate": 7.31e-06,
"loss": 0.3457,
"step": 1350
},
{
"epoch": 2.326565143824027,
"grad_norm": 7.368825912475586,
"learning_rate": 7.260000000000001e-06,
"loss": 0.3957,
"step": 1375
},
{
"epoch": 2.3688663282571913,
"grad_norm": 10.008464813232422,
"learning_rate": 7.2100000000000004e-06,
"loss": 0.4248,
"step": 1400
},
{
"epoch": 2.4111675126903553,
"grad_norm": 8.685702323913574,
"learning_rate": 7.16e-06,
"loss": 0.3268,
"step": 1425
},
{
"epoch": 2.4534686971235193,
"grad_norm": 16.325767517089844,
"learning_rate": 7.1100000000000005e-06,
"loss": 0.3777,
"step": 1450
},
{
"epoch": 2.495769881556684,
"grad_norm": 7.86500883102417,
"learning_rate": 7.06e-06,
"loss": 0.3798,
"step": 1475
},
{
"epoch": 2.5380710659898478,
"grad_norm": 7.600804328918457,
"learning_rate": 7.01e-06,
"loss": 0.3439,
"step": 1500
},
{
"epoch": 2.5803722504230118,
"grad_norm": 14.633502006530762,
"learning_rate": 6.96e-06,
"loss": 0.485,
"step": 1525
},
{
"epoch": 2.6226734348561758,
"grad_norm": 7.198423862457275,
"learning_rate": 6.91e-06,
"loss": 0.3575,
"step": 1550
},
{
"epoch": 2.66497461928934,
"grad_norm": 16.913612365722656,
"learning_rate": 6.860000000000001e-06,
"loss": 0.3753,
"step": 1575
},
{
"epoch": 2.707275803722504,
"grad_norm": 11.978515625,
"learning_rate": 6.810000000000001e-06,
"loss": 0.3621,
"step": 1600
},
{
"epoch": 2.749576988155668,
"grad_norm": 8.528421401977539,
"learning_rate": 6.760000000000001e-06,
"loss": 0.4298,
"step": 1625
},
{
"epoch": 2.7918781725888326,
"grad_norm": 13.898110389709473,
"learning_rate": 6.710000000000001e-06,
"loss": 0.3661,
"step": 1650
},
{
"epoch": 2.8341793570219966,
"grad_norm": 10.405202865600586,
"learning_rate": 6.660000000000001e-06,
"loss": 0.3456,
"step": 1675
},
{
"epoch": 2.8764805414551606,
"grad_norm": 9.385127067565918,
"learning_rate": 6.610000000000001e-06,
"loss": 0.3622,
"step": 1700
},
{
"epoch": 2.9187817258883246,
"grad_norm": 13.76083755493164,
"learning_rate": 6.560000000000001e-06,
"loss": 0.3024,
"step": 1725
},
{
"epoch": 2.961082910321489,
"grad_norm": 8.907617568969727,
"learning_rate": 6.51e-06,
"loss": 0.2823,
"step": 1750
},
{
"epoch": 3.003384094754653,
"grad_norm": 5.701330661773682,
"learning_rate": 6.460000000000001e-06,
"loss": 0.3449,
"step": 1775
},
{
"epoch": 3.045685279187817,
"grad_norm": 12.139259338378906,
"learning_rate": 6.4100000000000005e-06,
"loss": 0.2832,
"step": 1800
},
{
"epoch": 3.0879864636209815,
"grad_norm": 7.493695259094238,
"learning_rate": 6.360000000000001e-06,
"loss": 0.246,
"step": 1825
},
{
"epoch": 3.1302876480541455,
"grad_norm": 9.878129959106445,
"learning_rate": 6.3100000000000006e-06,
"loss": 0.1979,
"step": 1850
},
{
"epoch": 3.1725888324873095,
"grad_norm": 11.870550155639648,
"learning_rate": 6.26e-06,
"loss": 0.2227,
"step": 1875
},
{
"epoch": 3.214890016920474,
"grad_norm": 9.061797142028809,
"learning_rate": 6.210000000000001e-06,
"loss": 0.1731,
"step": 1900
},
{
"epoch": 3.257191201353638,
"grad_norm": 10.5235013961792,
"learning_rate": 6.16e-06,
"loss": 0.2162,
"step": 1925
},
{
"epoch": 3.299492385786802,
"grad_norm": 11.65762996673584,
"learning_rate": 6.110000000000001e-06,
"loss": 0.2505,
"step": 1950
},
{
"epoch": 3.3417935702199664,
"grad_norm": 7.416925430297852,
"learning_rate": 6.0600000000000004e-06,
"loss": 0.2204,
"step": 1975
},
{
"epoch": 3.3840947546531304,
"grad_norm": 8.655340194702148,
"learning_rate": 6.01e-06,
"loss": 0.2169,
"step": 2000
},
{
"epoch": 3.3840947546531304,
"eval_loss": 0.732398271560669,
"eval_runtime": 271.7409,
"eval_samples_per_second": 1.932,
"eval_steps_per_second": 0.243,
"eval_wer": 59.58268933539412,
"step": 2000
},
{
"epoch": 3.4263959390862944,
"grad_norm": 8.720595359802246,
"learning_rate": 5.9600000000000005e-06,
"loss": 0.2475,
"step": 2025
},
{
"epoch": 3.4686971235194584,
"grad_norm": 10.252331733703613,
"learning_rate": 5.91e-06,
"loss": 0.229,
"step": 2050
},
{
"epoch": 3.510998307952623,
"grad_norm": 7.533905982971191,
"learning_rate": 5.86e-06,
"loss": 0.1953,
"step": 2075
},
{
"epoch": 3.553299492385787,
"grad_norm": 5.134809970855713,
"learning_rate": 5.81e-06,
"loss": 0.2581,
"step": 2100
},
{
"epoch": 3.595600676818951,
"grad_norm": 8.298554420471191,
"learning_rate": 5.76e-06,
"loss": 0.242,
"step": 2125
},
{
"epoch": 3.6379018612521152,
"grad_norm": 13.200987815856934,
"learning_rate": 5.71e-06,
"loss": 0.2015,
"step": 2150
},
{
"epoch": 3.6802030456852792,
"grad_norm": 10.58892822265625,
"learning_rate": 5.66e-06,
"loss": 0.2273,
"step": 2175
},
{
"epoch": 3.7225042301184432,
"grad_norm": 8.217840194702148,
"learning_rate": 5.610000000000001e-06,
"loss": 0.1863,
"step": 2200
},
{
"epoch": 3.764805414551607,
"grad_norm": 9.060100555419922,
"learning_rate": 5.560000000000001e-06,
"loss": 0.2072,
"step": 2225
},
{
"epoch": 3.8071065989847717,
"grad_norm": 7.986787796020508,
"learning_rate": 5.510000000000001e-06,
"loss": 0.2047,
"step": 2250
},
{
"epoch": 3.8494077834179357,
"grad_norm": 4.51170539855957,
"learning_rate": 5.460000000000001e-06,
"loss": 0.2277,
"step": 2275
},
{
"epoch": 3.8917089678511,
"grad_norm": 5.082483768463135,
"learning_rate": 5.410000000000001e-06,
"loss": 0.1969,
"step": 2300
},
{
"epoch": 3.934010152284264,
"grad_norm": 9.10048770904541,
"learning_rate": 5.36e-06,
"loss": 0.2005,
"step": 2325
},
{
"epoch": 3.976311336717428,
"grad_norm": 13.18527603149414,
"learning_rate": 5.310000000000001e-06,
"loss": 0.1891,
"step": 2350
},
{
"epoch": 4.018612521150592,
"grad_norm": 3.0822834968566895,
"learning_rate": 5.2600000000000005e-06,
"loss": 0.2137,
"step": 2375
},
{
"epoch": 4.060913705583756,
"grad_norm": 4.320342063903809,
"learning_rate": 5.210000000000001e-06,
"loss": 0.1121,
"step": 2400
},
{
"epoch": 4.10321489001692,
"grad_norm": 6.486440658569336,
"learning_rate": 5.1600000000000006e-06,
"loss": 0.1079,
"step": 2425
},
{
"epoch": 4.145516074450085,
"grad_norm": 6.29085636138916,
"learning_rate": 5.11e-06,
"loss": 0.1307,
"step": 2450
},
{
"epoch": 4.187817258883249,
"grad_norm": 7.327591896057129,
"learning_rate": 5.060000000000001e-06,
"loss": 0.1085,
"step": 2475
},
{
"epoch": 4.230118443316413,
"grad_norm": 10.163105964660645,
"learning_rate": 5.01e-06,
"loss": 0.1479,
"step": 2500
},
{
"epoch": 4.272419627749577,
"grad_norm": 5.800470352172852,
"learning_rate": 4.960000000000001e-06,
"loss": 0.164,
"step": 2525
},
{
"epoch": 4.314720812182741,
"grad_norm": 5.962683200836182,
"learning_rate": 4.9100000000000004e-06,
"loss": 0.1091,
"step": 2550
},
{
"epoch": 4.357021996615905,
"grad_norm": 1.9525275230407715,
"learning_rate": 4.86e-06,
"loss": 0.1158,
"step": 2575
},
{
"epoch": 4.39932318104907,
"grad_norm": 3.1240804195404053,
"learning_rate": 4.8100000000000005e-06,
"loss": 0.1322,
"step": 2600
},
{
"epoch": 4.441624365482234,
"grad_norm": 7.414096832275391,
"learning_rate": 4.76e-06,
"loss": 0.1061,
"step": 2625
},
{
"epoch": 4.483925549915398,
"grad_norm": 3.900723695755005,
"learning_rate": 4.71e-06,
"loss": 0.1218,
"step": 2650
},
{
"epoch": 4.526226734348562,
"grad_norm": 4.022332668304443,
"learning_rate": 4.66e-06,
"loss": 0.129,
"step": 2675
},
{
"epoch": 4.568527918781726,
"grad_norm": 6.424135208129883,
"learning_rate": 4.610000000000001e-06,
"loss": 0.1413,
"step": 2700
},
{
"epoch": 4.61082910321489,
"grad_norm": 6.727682590484619,
"learning_rate": 4.56e-06,
"loss": 0.1536,
"step": 2725
},
{
"epoch": 4.653130287648054,
"grad_norm": 3.917060613632202,
"learning_rate": 4.510000000000001e-06,
"loss": 0.1514,
"step": 2750
},
{
"epoch": 4.695431472081218,
"grad_norm": 5.873462677001953,
"learning_rate": 4.4600000000000005e-06,
"loss": 0.1116,
"step": 2775
},
{
"epoch": 4.737732656514383,
"grad_norm": 6.992873191833496,
"learning_rate": 4.41e-06,
"loss": 0.1474,
"step": 2800
},
{
"epoch": 4.780033840947547,
"grad_norm": 3.4381256103515625,
"learning_rate": 4.360000000000001e-06,
"loss": 0.1335,
"step": 2825
},
{
"epoch": 4.822335025380711,
"grad_norm": 12.713067054748535,
"learning_rate": 4.31e-06,
"loss": 0.1728,
"step": 2850
},
{
"epoch": 4.864636209813875,
"grad_norm": 9.53136920928955,
"learning_rate": 4.26e-06,
"loss": 0.1547,
"step": 2875
},
{
"epoch": 4.906937394247039,
"grad_norm": 4.442587375640869,
"learning_rate": 4.21e-06,
"loss": 0.1765,
"step": 2900
},
{
"epoch": 4.949238578680203,
"grad_norm": 6.532963275909424,
"learning_rate": 4.16e-06,
"loss": 0.1153,
"step": 2925
},
{
"epoch": 4.991539763113368,
"grad_norm": 5.798689842224121,
"learning_rate": 4.1100000000000005e-06,
"loss": 0.1529,
"step": 2950
},
{
"epoch": 5.0338409475465316,
"grad_norm": 13.38257122039795,
"learning_rate": 4.060000000000001e-06,
"loss": 0.0865,
"step": 2975
},
{
"epoch": 5.0761421319796955,
"grad_norm": 4.9945220947265625,
"learning_rate": 4.0100000000000006e-06,
"loss": 0.0917,
"step": 3000
},
{
"epoch": 5.0761421319796955,
"eval_loss": 0.7850525975227356,
"eval_runtime": 265.9428,
"eval_samples_per_second": 1.974,
"eval_steps_per_second": 0.248,
"eval_wer": 67.32612055641421,
"step": 3000
},
{
"epoch": 5.1184433164128595,
"grad_norm": 6.642792701721191,
"learning_rate": 3.96e-06,
"loss": 0.0646,
"step": 3025
},
{
"epoch": 5.1607445008460235,
"grad_norm": 4.741107940673828,
"learning_rate": 3.910000000000001e-06,
"loss": 0.0566,
"step": 3050
},
{
"epoch": 5.2030456852791875,
"grad_norm": 1.2372127771377563,
"learning_rate": 3.86e-06,
"loss": 0.0681,
"step": 3075
},
{
"epoch": 5.2453468697123515,
"grad_norm": 6.571516513824463,
"learning_rate": 3.8100000000000004e-06,
"loss": 0.1036,
"step": 3100
},
{
"epoch": 5.287648054145516,
"grad_norm": 2.1475963592529297,
"learning_rate": 3.7600000000000004e-06,
"loss": 0.0645,
"step": 3125
},
{
"epoch": 5.32994923857868,
"grad_norm": 1.6281226873397827,
"learning_rate": 3.7100000000000005e-06,
"loss": 0.0592,
"step": 3150
},
{
"epoch": 5.372250423011844,
"grad_norm": 1.886271357536316,
"learning_rate": 3.66e-06,
"loss": 0.0762,
"step": 3175
},
{
"epoch": 5.414551607445008,
"grad_norm": 5.298330783843994,
"learning_rate": 3.61e-06,
"loss": 0.0731,
"step": 3200
},
{
"epoch": 5.456852791878172,
"grad_norm": 2.7591044902801514,
"learning_rate": 3.5600000000000002e-06,
"loss": 0.0642,
"step": 3225
},
{
"epoch": 5.499153976311336,
"grad_norm": 4.665078163146973,
"learning_rate": 3.5100000000000003e-06,
"loss": 0.0785,
"step": 3250
},
{
"epoch": 5.541455160744501,
"grad_norm": 1.6366431713104248,
"learning_rate": 3.46e-06,
"loss": 0.0953,
"step": 3275
},
{
"epoch": 5.583756345177665,
"grad_norm": 6.781162738800049,
"learning_rate": 3.4100000000000004e-06,
"loss": 0.0529,
"step": 3300
},
{
"epoch": 5.626057529610829,
"grad_norm": 7.282459735870361,
"learning_rate": 3.3600000000000004e-06,
"loss": 0.0741,
"step": 3325
},
{
"epoch": 5.668358714043993,
"grad_norm": 2.2602906227111816,
"learning_rate": 3.3100000000000005e-06,
"loss": 0.0962,
"step": 3350
},
{
"epoch": 5.710659898477157,
"grad_norm": 7.06923246383667,
"learning_rate": 3.2600000000000006e-06,
"loss": 0.088,
"step": 3375
},
{
"epoch": 5.752961082910321,
"grad_norm": 1.4087145328521729,
"learning_rate": 3.21e-06,
"loss": 0.0935,
"step": 3400
},
{
"epoch": 5.795262267343485,
"grad_norm": 5.96427059173584,
"learning_rate": 3.1600000000000002e-06,
"loss": 0.0465,
"step": 3425
},
{
"epoch": 5.837563451776649,
"grad_norm": 7.267573833465576,
"learning_rate": 3.1100000000000003e-06,
"loss": 0.0977,
"step": 3450
},
{
"epoch": 5.879864636209814,
"grad_norm": 4.4437384605407715,
"learning_rate": 3.0600000000000003e-06,
"loss": 0.0644,
"step": 3475
},
{
"epoch": 5.922165820642978,
"grad_norm": 7.447219371795654,
"learning_rate": 3.01e-06,
"loss": 0.0699,
"step": 3500
},
{
"epoch": 5.964467005076142,
"grad_norm": 3.2096688747406006,
"learning_rate": 2.96e-06,
"loss": 0.0878,
"step": 3525
},
{
"epoch": 6.006768189509306,
"grad_norm": 7.191064357757568,
"learning_rate": 2.91e-06,
"loss": 0.0815,
"step": 3550
},
{
"epoch": 6.04906937394247,
"grad_norm": 6.0771989822387695,
"learning_rate": 2.86e-06,
"loss": 0.0471,
"step": 3575
},
{
"epoch": 6.091370558375634,
"grad_norm": 11.324472427368164,
"learning_rate": 2.8100000000000006e-06,
"loss": 0.0509,
"step": 3600
},
{
"epoch": 6.133671742808799,
"grad_norm": 4.35711145401001,
"learning_rate": 2.7600000000000003e-06,
"loss": 0.0525,
"step": 3625
},
{
"epoch": 6.175972927241963,
"grad_norm": 5.396825790405273,
"learning_rate": 2.7100000000000003e-06,
"loss": 0.0508,
"step": 3650
},
{
"epoch": 6.218274111675127,
"grad_norm": 0.9582346081733704,
"learning_rate": 2.6600000000000004e-06,
"loss": 0.0391,
"step": 3675
},
{
"epoch": 6.260575296108291,
"grad_norm": 2.5615832805633545,
"learning_rate": 2.6100000000000004e-06,
"loss": 0.0557,
"step": 3700
},
{
"epoch": 6.302876480541455,
"grad_norm": 7.233354568481445,
"learning_rate": 2.56e-06,
"loss": 0.0255,
"step": 3725
},
{
"epoch": 6.345177664974619,
"grad_norm": 1.0129377841949463,
"learning_rate": 2.51e-06,
"loss": 0.0313,
"step": 3750
},
{
"epoch": 6.387478849407783,
"grad_norm": 3.359922170639038,
"learning_rate": 2.46e-06,
"loss": 0.037,
"step": 3775
},
{
"epoch": 6.429780033840948,
"grad_norm": 4.721961498260498,
"learning_rate": 2.4100000000000002e-06,
"loss": 0.0455,
"step": 3800
},
{
"epoch": 6.472081218274112,
"grad_norm": 5.72802734375,
"learning_rate": 2.3600000000000003e-06,
"loss": 0.0403,
"step": 3825
},
{
"epoch": 6.514382402707276,
"grad_norm": 2.1109373569488525,
"learning_rate": 2.3100000000000003e-06,
"loss": 0.0572,
"step": 3850
},
{
"epoch": 6.55668358714044,
"grad_norm": 11.605259895324707,
"learning_rate": 2.2600000000000004e-06,
"loss": 0.04,
"step": 3875
},
{
"epoch": 6.598984771573604,
"grad_norm": 1.7446305751800537,
"learning_rate": 2.21e-06,
"loss": 0.0432,
"step": 3900
},
{
"epoch": 6.641285956006768,
"grad_norm": 1.7428556680679321,
"learning_rate": 2.16e-06,
"loss": 0.0438,
"step": 3925
},
{
"epoch": 6.683587140439933,
"grad_norm": 2.7987313270568848,
"learning_rate": 2.11e-06,
"loss": 0.0379,
"step": 3950
},
{
"epoch": 6.725888324873097,
"grad_norm": 0.8822634220123291,
"learning_rate": 2.06e-06,
"loss": 0.0262,
"step": 3975
},
{
"epoch": 6.768189509306261,
"grad_norm": 1.6173559427261353,
"learning_rate": 2.0100000000000002e-06,
"loss": 0.0399,
"step": 4000
},
{
"epoch": 6.768189509306261,
"eval_loss": 0.8369417190551758,
"eval_runtime": 268.5703,
"eval_samples_per_second": 1.955,
"eval_steps_per_second": 0.246,
"eval_wer": 69.36630602782071,
"step": 4000
},
{
"epoch": 6.810490693739425,
"grad_norm": 7.732959747314453,
"learning_rate": 1.9600000000000003e-06,
"loss": 0.0537,
"step": 4025
},
{
"epoch": 6.852791878172589,
"grad_norm": 2.3094513416290283,
"learning_rate": 1.9100000000000003e-06,
"loss": 0.0409,
"step": 4050
},
{
"epoch": 6.895093062605753,
"grad_norm": 7.310888767242432,
"learning_rate": 1.8600000000000002e-06,
"loss": 0.0326,
"step": 4075
},
{
"epoch": 6.937394247038917,
"grad_norm": 5.490383625030518,
"learning_rate": 1.81e-06,
"loss": 0.0494,
"step": 4100
},
{
"epoch": 6.979695431472082,
"grad_norm": 3.2482218742370605,
"learning_rate": 1.76e-06,
"loss": 0.0552,
"step": 4125
},
{
"epoch": 7.021996615905246,
"grad_norm": 3.474656343460083,
"learning_rate": 1.7100000000000004e-06,
"loss": 0.036,
"step": 4150
},
{
"epoch": 7.06429780033841,
"grad_norm": 5.680706024169922,
"learning_rate": 1.6600000000000002e-06,
"loss": 0.0281,
"step": 4175
},
{
"epoch": 7.106598984771574,
"grad_norm": 2.636260747909546,
"learning_rate": 1.6100000000000003e-06,
"loss": 0.0191,
"step": 4200
},
{
"epoch": 7.148900169204738,
"grad_norm": 0.4349760413169861,
"learning_rate": 1.56e-06,
"loss": 0.0229,
"step": 4225
},
{
"epoch": 7.191201353637902,
"grad_norm": 1.194022297859192,
"learning_rate": 1.5100000000000002e-06,
"loss": 0.024,
"step": 4250
},
{
"epoch": 7.233502538071066,
"grad_norm": 3.210624933242798,
"learning_rate": 1.46e-06,
"loss": 0.0198,
"step": 4275
},
{
"epoch": 7.2758037225042305,
"grad_norm": 6.436493873596191,
"learning_rate": 1.41e-06,
"loss": 0.0322,
"step": 4300
},
{
"epoch": 7.3181049069373945,
"grad_norm": 2.0612590312957764,
"learning_rate": 1.3600000000000001e-06,
"loss": 0.028,
"step": 4325
},
{
"epoch": 7.3604060913705585,
"grad_norm": 0.4659048318862915,
"learning_rate": 1.3100000000000002e-06,
"loss": 0.0275,
"step": 4350
},
{
"epoch": 7.4027072758037225,
"grad_norm": 0.9324387907981873,
"learning_rate": 1.26e-06,
"loss": 0.0198,
"step": 4375
},
{
"epoch": 7.4450084602368864,
"grad_norm": 0.6756917834281921,
"learning_rate": 1.21e-06,
"loss": 0.0135,
"step": 4400
},
{
"epoch": 7.4873096446700504,
"grad_norm": 0.6344797015190125,
"learning_rate": 1.1600000000000001e-06,
"loss": 0.02,
"step": 4425
},
{
"epoch": 7.529610829103214,
"grad_norm": 2.675487756729126,
"learning_rate": 1.1100000000000002e-06,
"loss": 0.0203,
"step": 4450
},
{
"epoch": 7.571912013536379,
"grad_norm": 2.6844794750213623,
"learning_rate": 1.06e-06,
"loss": 0.0318,
"step": 4475
},
{
"epoch": 7.614213197969543,
"grad_norm": 0.493827760219574,
"learning_rate": 1.01e-06,
"loss": 0.0253,
"step": 4500
},
{
"epoch": 7.656514382402707,
"grad_norm": 1.7579808235168457,
"learning_rate": 9.600000000000001e-07,
"loss": 0.0159,
"step": 4525
},
{
"epoch": 7.698815566835871,
"grad_norm": 0.6291612982749939,
"learning_rate": 9.100000000000001e-07,
"loss": 0.0284,
"step": 4550
},
{
"epoch": 7.741116751269035,
"grad_norm": 1.4849034547805786,
"learning_rate": 8.6e-07,
"loss": 0.0173,
"step": 4575
},
{
"epoch": 7.783417935702199,
"grad_norm": 0.6458856463432312,
"learning_rate": 8.100000000000001e-07,
"loss": 0.0156,
"step": 4600
},
{
"epoch": 7.825719120135364,
"grad_norm": 0.2947687804698944,
"learning_rate": 7.6e-07,
"loss": 0.0253,
"step": 4625
},
{
"epoch": 7.868020304568528,
"grad_norm": 1.277651309967041,
"learning_rate": 7.1e-07,
"loss": 0.0167,
"step": 4650
},
{
"epoch": 7.910321489001692,
"grad_norm": 3.256167411804199,
"learning_rate": 6.6e-07,
"loss": 0.0291,
"step": 4675
},
{
"epoch": 7.952622673434856,
"grad_norm": 0.46765822172164917,
"learning_rate": 6.100000000000001e-07,
"loss": 0.0499,
"step": 4700
},
{
"epoch": 7.99492385786802,
"grad_norm": 0.4301537871360779,
"learning_rate": 5.6e-07,
"loss": 0.0468,
"step": 4725
},
{
"epoch": 8.037225042301184,
"grad_norm": 2.860602378845215,
"learning_rate": 5.1e-07,
"loss": 0.0147,
"step": 4750
},
{
"epoch": 8.079526226734348,
"grad_norm": 0.27871173620224,
"learning_rate": 4.6000000000000004e-07,
"loss": 0.0158,
"step": 4775
},
{
"epoch": 8.121827411167512,
"grad_norm": 0.5240181684494019,
"learning_rate": 4.1000000000000004e-07,
"loss": 0.0251,
"step": 4800
},
{
"epoch": 8.164128595600676,
"grad_norm": 0.24851427972316742,
"learning_rate": 3.6e-07,
"loss": 0.0121,
"step": 4825
},
{
"epoch": 8.20642978003384,
"grad_norm": 0.38774704933166504,
"learning_rate": 3.1000000000000005e-07,
"loss": 0.0183,
"step": 4850
},
{
"epoch": 8.248730964467006,
"grad_norm": 0.8031614422798157,
"learning_rate": 2.6e-07,
"loss": 0.0096,
"step": 4875
},
{
"epoch": 8.29103214890017,
"grad_norm": 1.7913848161697388,
"learning_rate": 2.1000000000000003e-07,
"loss": 0.0212,
"step": 4900
},
{
"epoch": 8.333333333333334,
"grad_norm": 0.41187214851379395,
"learning_rate": 1.6e-07,
"loss": 0.0125,
"step": 4925
},
{
"epoch": 8.375634517766498,
"grad_norm": 0.9369856715202332,
"learning_rate": 1.1e-07,
"loss": 0.0103,
"step": 4950
},
{
"epoch": 8.417935702199662,
"grad_norm": 0.24833422899246216,
"learning_rate": 6.000000000000001e-08,
"loss": 0.0299,
"step": 4975
},
{
"epoch": 8.460236886632826,
"grad_norm": 0.25818368792533875,
"learning_rate": 1e-08,
"loss": 0.0164,
"step": 5000
},
{
"epoch": 8.460236886632826,
"eval_loss": 0.8701984286308289,
"eval_runtime": 276.906,
"eval_samples_per_second": 1.896,
"eval_steps_per_second": 0.238,
"eval_wer": 81.15919629057187,
"step": 5000
}
],
"logging_steps": 25,
"max_steps": 5000,
"num_input_tokens_seen": 0,
"num_train_epochs": 9,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.15318725967872e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}