lightning_cycle_2 / checkpoint-5000 /trainer_state.json
AymanMansour's picture
Upload folder using huggingface_hub
f686c73 verified
{
"best_global_step": 5000,
"best_metric": 70.01703577512777,
"best_model_checkpoint": "./whisper-small-sdn-2025/asr_training_runs/lightning_cycle_2/checkpoint-5000",
"epoch": 11.737089201877934,
"eval_steps": 1000,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05868544600938967,
"grad_norm": 30.71144676208496,
"learning_rate": 9.960000000000001e-06,
"loss": 4.0849,
"step": 25
},
{
"epoch": 0.11737089201877934,
"grad_norm": 22.479434967041016,
"learning_rate": 9.912000000000001e-06,
"loss": 2.4516,
"step": 50
},
{
"epoch": 0.176056338028169,
"grad_norm": 30.043546676635742,
"learning_rate": 9.862e-06,
"loss": 2.2164,
"step": 75
},
{
"epoch": 0.2347417840375587,
"grad_norm": 27.947431564331055,
"learning_rate": 9.812e-06,
"loss": 1.908,
"step": 100
},
{
"epoch": 0.2934272300469484,
"grad_norm": 33.64918899536133,
"learning_rate": 9.762e-06,
"loss": 1.7268,
"step": 125
},
{
"epoch": 0.352112676056338,
"grad_norm": 27.36302947998047,
"learning_rate": 9.712e-06,
"loss": 1.4279,
"step": 150
},
{
"epoch": 0.4107981220657277,
"grad_norm": 32.08675003051758,
"learning_rate": 9.662e-06,
"loss": 1.5036,
"step": 175
},
{
"epoch": 0.4694835680751174,
"grad_norm": 21.85032081604004,
"learning_rate": 9.612000000000002e-06,
"loss": 1.3513,
"step": 200
},
{
"epoch": 0.528169014084507,
"grad_norm": 18.02567481994629,
"learning_rate": 9.562000000000001e-06,
"loss": 1.2851,
"step": 225
},
{
"epoch": 0.5868544600938967,
"grad_norm": 22.885683059692383,
"learning_rate": 9.512000000000001e-06,
"loss": 1.4273,
"step": 250
},
{
"epoch": 0.6455399061032864,
"grad_norm": 24.009239196777344,
"learning_rate": 9.462000000000002e-06,
"loss": 1.2741,
"step": 275
},
{
"epoch": 0.704225352112676,
"grad_norm": 19.683744430541992,
"learning_rate": 9.412e-06,
"loss": 1.164,
"step": 300
},
{
"epoch": 0.7629107981220657,
"grad_norm": 27.29810333251953,
"learning_rate": 9.362000000000001e-06,
"loss": 1.2817,
"step": 325
},
{
"epoch": 0.8215962441314554,
"grad_norm": 25.677413940429688,
"learning_rate": 9.312000000000002e-06,
"loss": 1.1265,
"step": 350
},
{
"epoch": 0.8802816901408451,
"grad_norm": 22.555444717407227,
"learning_rate": 9.262e-06,
"loss": 1.2558,
"step": 375
},
{
"epoch": 0.9389671361502347,
"grad_norm": 27.330963134765625,
"learning_rate": 9.212000000000001e-06,
"loss": 1.0733,
"step": 400
},
{
"epoch": 0.9976525821596244,
"grad_norm": 25.466411590576172,
"learning_rate": 9.162000000000001e-06,
"loss": 1.0392,
"step": 425
},
{
"epoch": 1.056338028169014,
"grad_norm": 20.50482940673828,
"learning_rate": 9.112e-06,
"loss": 0.8302,
"step": 450
},
{
"epoch": 1.1150234741784038,
"grad_norm": 18.399213790893555,
"learning_rate": 9.062e-06,
"loss": 0.6969,
"step": 475
},
{
"epoch": 1.1737089201877935,
"grad_norm": 22.934707641601562,
"learning_rate": 9.012000000000001e-06,
"loss": 0.8019,
"step": 500
},
{
"epoch": 1.232394366197183,
"grad_norm": 17.93931007385254,
"learning_rate": 8.962e-06,
"loss": 0.8902,
"step": 525
},
{
"epoch": 1.2910798122065728,
"grad_norm": 13.139846801757812,
"learning_rate": 8.912e-06,
"loss": 0.7966,
"step": 550
},
{
"epoch": 1.3497652582159625,
"grad_norm": 23.364347457885742,
"learning_rate": 8.862000000000001e-06,
"loss": 0.8172,
"step": 575
},
{
"epoch": 1.408450704225352,
"grad_norm": 22.44937515258789,
"learning_rate": 8.812000000000001e-06,
"loss": 0.9067,
"step": 600
},
{
"epoch": 1.4671361502347418,
"grad_norm": 13.097396850585938,
"learning_rate": 8.762e-06,
"loss": 0.7597,
"step": 625
},
{
"epoch": 1.5258215962441315,
"grad_norm": 17.809097290039062,
"learning_rate": 8.712e-06,
"loss": 0.8297,
"step": 650
},
{
"epoch": 1.584507042253521,
"grad_norm": 21.2346248626709,
"learning_rate": 8.662000000000001e-06,
"loss": 0.762,
"step": 675
},
{
"epoch": 1.6431924882629108,
"grad_norm": 20.58393669128418,
"learning_rate": 8.612e-06,
"loss": 0.9516,
"step": 700
},
{
"epoch": 1.7018779342723005,
"grad_norm": 15.944975852966309,
"learning_rate": 8.562e-06,
"loss": 0.7829,
"step": 725
},
{
"epoch": 1.76056338028169,
"grad_norm": 18.670852661132812,
"learning_rate": 8.512e-06,
"loss": 0.8098,
"step": 750
},
{
"epoch": 1.8192488262910798,
"grad_norm": 18.15910530090332,
"learning_rate": 8.462e-06,
"loss": 0.8253,
"step": 775
},
{
"epoch": 1.8779342723004695,
"grad_norm": 17.510900497436523,
"learning_rate": 8.412e-06,
"loss": 0.7525,
"step": 800
},
{
"epoch": 1.936619718309859,
"grad_norm": 11.208598136901855,
"learning_rate": 8.362e-06,
"loss": 0.7598,
"step": 825
},
{
"epoch": 1.995305164319249,
"grad_norm": 14.952198028564453,
"learning_rate": 8.312000000000001e-06,
"loss": 0.8244,
"step": 850
},
{
"epoch": 2.0539906103286385,
"grad_norm": 14.107500076293945,
"learning_rate": 8.262000000000002e-06,
"loss": 0.5531,
"step": 875
},
{
"epoch": 2.112676056338028,
"grad_norm": 12.615099906921387,
"learning_rate": 8.212e-06,
"loss": 0.5232,
"step": 900
},
{
"epoch": 2.171361502347418,
"grad_norm": 8.425407409667969,
"learning_rate": 8.162e-06,
"loss": 0.4375,
"step": 925
},
{
"epoch": 2.2300469483568075,
"grad_norm": 10.094340324401855,
"learning_rate": 8.112000000000001e-06,
"loss": 0.5122,
"step": 950
},
{
"epoch": 2.288732394366197,
"grad_norm": 12.052427291870117,
"learning_rate": 8.062000000000002e-06,
"loss": 0.4252,
"step": 975
},
{
"epoch": 2.347417840375587,
"grad_norm": 12.115269660949707,
"learning_rate": 8.012e-06,
"loss": 0.5578,
"step": 1000
},
{
"epoch": 2.347417840375587,
"eval_loss": 1.0599621534347534,
"eval_runtime": 271.4248,
"eval_samples_per_second": 1.396,
"eval_steps_per_second": 0.177,
"eval_wer": 86.44439036261863,
"step": 1000
},
{
"epoch": 2.4061032863849765,
"grad_norm": 14.625807762145996,
"learning_rate": 7.962000000000001e-06,
"loss": 0.5382,
"step": 1025
},
{
"epoch": 2.464788732394366,
"grad_norm": 9.787211418151855,
"learning_rate": 7.912000000000001e-06,
"loss": 0.5301,
"step": 1050
},
{
"epoch": 2.523474178403756,
"grad_norm": 13.635499954223633,
"learning_rate": 7.862e-06,
"loss": 0.5697,
"step": 1075
},
{
"epoch": 2.5821596244131455,
"grad_norm": 12.006632804870605,
"learning_rate": 7.812e-06,
"loss": 0.4453,
"step": 1100
},
{
"epoch": 2.640845070422535,
"grad_norm": 16.6585636138916,
"learning_rate": 7.762000000000001e-06,
"loss": 0.4854,
"step": 1125
},
{
"epoch": 2.699530516431925,
"grad_norm": 15.70045280456543,
"learning_rate": 7.712e-06,
"loss": 0.5265,
"step": 1150
},
{
"epoch": 2.7582159624413145,
"grad_norm": 15.379609107971191,
"learning_rate": 7.662e-06,
"loss": 0.4894,
"step": 1175
},
{
"epoch": 2.816901408450704,
"grad_norm": 15.11367130279541,
"learning_rate": 7.612e-06,
"loss": 0.5602,
"step": 1200
},
{
"epoch": 2.875586854460094,
"grad_norm": 17.753530502319336,
"learning_rate": 7.562000000000001e-06,
"loss": 0.4709,
"step": 1225
},
{
"epoch": 2.9342723004694835,
"grad_norm": 12.208802223205566,
"learning_rate": 7.512e-06,
"loss": 0.5455,
"step": 1250
},
{
"epoch": 2.992957746478873,
"grad_norm": 9.159188270568848,
"learning_rate": 7.462000000000001e-06,
"loss": 0.5367,
"step": 1275
},
{
"epoch": 3.051643192488263,
"grad_norm": 6.392484664916992,
"learning_rate": 7.412e-06,
"loss": 0.3184,
"step": 1300
},
{
"epoch": 3.1103286384976525,
"grad_norm": 8.96312427520752,
"learning_rate": 7.362e-06,
"loss": 0.3772,
"step": 1325
},
{
"epoch": 3.169014084507042,
"grad_norm": 8.565118789672852,
"learning_rate": 7.3120000000000005e-06,
"loss": 0.3321,
"step": 1350
},
{
"epoch": 3.227699530516432,
"grad_norm": 8.298563957214355,
"learning_rate": 7.262e-06,
"loss": 0.3029,
"step": 1375
},
{
"epoch": 3.2863849765258215,
"grad_norm": 13.057472229003906,
"learning_rate": 7.212e-06,
"loss": 0.2809,
"step": 1400
},
{
"epoch": 3.345070422535211,
"grad_norm": 15.98492431640625,
"learning_rate": 7.162e-06,
"loss": 0.2694,
"step": 1425
},
{
"epoch": 3.403755868544601,
"grad_norm": 13.155710220336914,
"learning_rate": 7.1120000000000015e-06,
"loss": 0.3075,
"step": 1450
},
{
"epoch": 3.4624413145539905,
"grad_norm": 8.523398399353027,
"learning_rate": 7.062000000000001e-06,
"loss": 0.299,
"step": 1475
},
{
"epoch": 3.52112676056338,
"grad_norm": 6.524288177490234,
"learning_rate": 7.012000000000001e-06,
"loss": 0.3276,
"step": 1500
},
{
"epoch": 3.57981220657277,
"grad_norm": 8.676782608032227,
"learning_rate": 6.962000000000001e-06,
"loss": 0.3431,
"step": 1525
},
{
"epoch": 3.6384976525821595,
"grad_norm": 7.747554779052734,
"learning_rate": 6.912000000000001e-06,
"loss": 0.3369,
"step": 1550
},
{
"epoch": 3.697183098591549,
"grad_norm": 9.531723022460938,
"learning_rate": 6.8620000000000005e-06,
"loss": 0.3354,
"step": 1575
},
{
"epoch": 3.755868544600939,
"grad_norm": 13.466939926147461,
"learning_rate": 6.812000000000001e-06,
"loss": 0.3224,
"step": 1600
},
{
"epoch": 3.8145539906103285,
"grad_norm": 9.547689437866211,
"learning_rate": 6.762000000000001e-06,
"loss": 0.2504,
"step": 1625
},
{
"epoch": 3.873239436619718,
"grad_norm": 17.218488693237305,
"learning_rate": 6.712000000000001e-06,
"loss": 0.3484,
"step": 1650
},
{
"epoch": 3.931924882629108,
"grad_norm": 10.556262969970703,
"learning_rate": 6.662000000000001e-06,
"loss": 0.298,
"step": 1675
},
{
"epoch": 3.9906103286384975,
"grad_norm": 5.671838760375977,
"learning_rate": 6.612e-06,
"loss": 0.3222,
"step": 1700
},
{
"epoch": 4.049295774647887,
"grad_norm": 14.99199104309082,
"learning_rate": 6.562000000000001e-06,
"loss": 0.2189,
"step": 1725
},
{
"epoch": 4.107981220657277,
"grad_norm": 13.279620170593262,
"learning_rate": 6.5120000000000005e-06,
"loss": 0.1823,
"step": 1750
},
{
"epoch": 4.166666666666667,
"grad_norm": 8.277289390563965,
"learning_rate": 6.462e-06,
"loss": 0.1381,
"step": 1775
},
{
"epoch": 4.225352112676056,
"grad_norm": 4.825685977935791,
"learning_rate": 6.412000000000001e-06,
"loss": 0.2302,
"step": 1800
},
{
"epoch": 4.284037558685446,
"grad_norm": 7.209333896636963,
"learning_rate": 6.362e-06,
"loss": 0.1838,
"step": 1825
},
{
"epoch": 4.342723004694836,
"grad_norm": 12.577840805053711,
"learning_rate": 6.312000000000001e-06,
"loss": 0.1596,
"step": 1850
},
{
"epoch": 4.401408450704225,
"grad_norm": 6.849479675292969,
"learning_rate": 6.262e-06,
"loss": 0.2349,
"step": 1875
},
{
"epoch": 4.460093896713615,
"grad_norm": 6.93465518951416,
"learning_rate": 6.212e-06,
"loss": 0.1934,
"step": 1900
},
{
"epoch": 4.518779342723005,
"grad_norm": 8.305404663085938,
"learning_rate": 6.1620000000000005e-06,
"loss": 0.1755,
"step": 1925
},
{
"epoch": 4.577464788732394,
"grad_norm": 8.874540328979492,
"learning_rate": 6.112e-06,
"loss": 0.1726,
"step": 1950
},
{
"epoch": 4.636150234741784,
"grad_norm": 5.533932685852051,
"learning_rate": 6.062e-06,
"loss": 0.1891,
"step": 1975
},
{
"epoch": 4.694835680751174,
"grad_norm": 13.135041236877441,
"learning_rate": 6.012e-06,
"loss": 0.1592,
"step": 2000
},
{
"epoch": 4.694835680751174,
"eval_loss": 1.117398977279663,
"eval_runtime": 220.7322,
"eval_samples_per_second": 1.717,
"eval_steps_per_second": 0.217,
"eval_wer": 82.793867120954,
"step": 2000
},
{
"epoch": 4.753521126760563,
"grad_norm": 10.941877365112305,
"learning_rate": 5.962e-06,
"loss": 0.1958,
"step": 2025
},
{
"epoch": 4.812206572769953,
"grad_norm": 8.460000991821289,
"learning_rate": 5.912e-06,
"loss": 0.1765,
"step": 2050
},
{
"epoch": 4.870892018779343,
"grad_norm": 13.924954414367676,
"learning_rate": 5.862000000000001e-06,
"loss": 0.2053,
"step": 2075
},
{
"epoch": 4.929577464788732,
"grad_norm": 8.0511474609375,
"learning_rate": 5.812000000000001e-06,
"loss": 0.2096,
"step": 2100
},
{
"epoch": 4.988262910798122,
"grad_norm": 6.871691703796387,
"learning_rate": 5.762000000000001e-06,
"loss": 0.1623,
"step": 2125
},
{
"epoch": 5.046948356807512,
"grad_norm": 7.501040458679199,
"learning_rate": 5.7120000000000005e-06,
"loss": 0.1228,
"step": 2150
},
{
"epoch": 5.105633802816901,
"grad_norm": 3.6400983333587646,
"learning_rate": 5.662000000000001e-06,
"loss": 0.1071,
"step": 2175
},
{
"epoch": 5.164319248826291,
"grad_norm": 4.942204475402832,
"learning_rate": 5.612000000000001e-06,
"loss": 0.1214,
"step": 2200
},
{
"epoch": 5.223004694835681,
"grad_norm": 7.270200729370117,
"learning_rate": 5.562000000000001e-06,
"loss": 0.1011,
"step": 2225
},
{
"epoch": 5.28169014084507,
"grad_norm": 8.258340835571289,
"learning_rate": 5.512000000000001e-06,
"loss": 0.1231,
"step": 2250
},
{
"epoch": 5.34037558685446,
"grad_norm": 3.134267807006836,
"learning_rate": 5.462e-06,
"loss": 0.1075,
"step": 2275
},
{
"epoch": 5.39906103286385,
"grad_norm": 8.155512809753418,
"learning_rate": 5.412000000000001e-06,
"loss": 0.1037,
"step": 2300
},
{
"epoch": 5.457746478873239,
"grad_norm": 8.14622974395752,
"learning_rate": 5.3620000000000005e-06,
"loss": 0.1026,
"step": 2325
},
{
"epoch": 5.516431924882629,
"grad_norm": 7.712051868438721,
"learning_rate": 5.312e-06,
"loss": 0.124,
"step": 2350
},
{
"epoch": 5.575117370892019,
"grad_norm": 7.989415645599365,
"learning_rate": 5.262000000000001e-06,
"loss": 0.0928,
"step": 2375
},
{
"epoch": 5.633802816901408,
"grad_norm": 7.154349327087402,
"learning_rate": 5.212e-06,
"loss": 0.1234,
"step": 2400
},
{
"epoch": 5.692488262910798,
"grad_norm": 6.29465913772583,
"learning_rate": 5.162000000000001e-06,
"loss": 0.1243,
"step": 2425
},
{
"epoch": 5.751173708920188,
"grad_norm": 9.395393371582031,
"learning_rate": 5.112e-06,
"loss": 0.1195,
"step": 2450
},
{
"epoch": 5.809859154929578,
"grad_norm": 5.384176731109619,
"learning_rate": 5.062e-06,
"loss": 0.0725,
"step": 2475
},
{
"epoch": 5.868544600938967,
"grad_norm": 10.592826843261719,
"learning_rate": 5.0120000000000005e-06,
"loss": 0.092,
"step": 2500
},
{
"epoch": 5.927230046948357,
"grad_norm": 10.480640411376953,
"learning_rate": 4.962e-06,
"loss": 0.0979,
"step": 2525
},
{
"epoch": 5.985915492957746,
"grad_norm": 8.233006477355957,
"learning_rate": 4.9120000000000006e-06,
"loss": 0.0923,
"step": 2550
},
{
"epoch": 6.044600938967136,
"grad_norm": 1.81440269947052,
"learning_rate": 4.862e-06,
"loss": 0.0549,
"step": 2575
},
{
"epoch": 6.103286384976526,
"grad_norm": 8.188456535339355,
"learning_rate": 4.812000000000001e-06,
"loss": 0.0619,
"step": 2600
},
{
"epoch": 6.161971830985916,
"grad_norm": 6.400794506072998,
"learning_rate": 4.762e-06,
"loss": 0.0678,
"step": 2625
},
{
"epoch": 6.220657276995305,
"grad_norm": 4.501795291900635,
"learning_rate": 4.712000000000001e-06,
"loss": 0.0358,
"step": 2650
},
{
"epoch": 6.279342723004695,
"grad_norm": 8.449444770812988,
"learning_rate": 4.6620000000000004e-06,
"loss": 0.0736,
"step": 2675
},
{
"epoch": 6.338028169014084,
"grad_norm": 7.520918369293213,
"learning_rate": 4.612e-06,
"loss": 0.0631,
"step": 2700
},
{
"epoch": 6.396713615023474,
"grad_norm": 2.6740336418151855,
"learning_rate": 4.5620000000000005e-06,
"loss": 0.0732,
"step": 2725
},
{
"epoch": 6.455399061032864,
"grad_norm": 7.82635498046875,
"learning_rate": 4.512e-06,
"loss": 0.0681,
"step": 2750
},
{
"epoch": 6.514084507042254,
"grad_norm": 4.215991973876953,
"learning_rate": 4.462e-06,
"loss": 0.0687,
"step": 2775
},
{
"epoch": 6.572769953051643,
"grad_norm": 8.543045997619629,
"learning_rate": 4.412e-06,
"loss": 0.0587,
"step": 2800
},
{
"epoch": 6.631455399061033,
"grad_norm": 1.273762583732605,
"learning_rate": 4.362e-06,
"loss": 0.0388,
"step": 2825
},
{
"epoch": 6.690140845070422,
"grad_norm": 7.219778060913086,
"learning_rate": 4.312e-06,
"loss": 0.0637,
"step": 2850
},
{
"epoch": 6.748826291079812,
"grad_norm": 3.4591705799102783,
"learning_rate": 4.262000000000001e-06,
"loss": 0.0512,
"step": 2875
},
{
"epoch": 6.807511737089202,
"grad_norm": 8.502069473266602,
"learning_rate": 4.2120000000000005e-06,
"loss": 0.0553,
"step": 2900
},
{
"epoch": 6.866197183098592,
"grad_norm": 9.043886184692383,
"learning_rate": 4.162e-06,
"loss": 0.0773,
"step": 2925
},
{
"epoch": 6.924882629107981,
"grad_norm": 7.274739742279053,
"learning_rate": 4.112000000000001e-06,
"loss": 0.0575,
"step": 2950
},
{
"epoch": 6.983568075117371,
"grad_norm": 1.4064300060272217,
"learning_rate": 4.062e-06,
"loss": 0.0527,
"step": 2975
},
{
"epoch": 7.042253521126761,
"grad_norm": 4.416920185089111,
"learning_rate": 4.012000000000001e-06,
"loss": 0.0282,
"step": 3000
},
{
"epoch": 7.042253521126761,
"eval_loss": 1.2262433767318726,
"eval_runtime": 197.2898,
"eval_samples_per_second": 1.921,
"eval_steps_per_second": 0.243,
"eval_wer": 81.09028960817717,
"step": 3000
},
{
"epoch": 7.10093896713615,
"grad_norm": 1.1444480419158936,
"learning_rate": 3.962e-06,
"loss": 0.0271,
"step": 3025
},
{
"epoch": 7.15962441314554,
"grad_norm": 3.13212513923645,
"learning_rate": 3.912e-06,
"loss": 0.0302,
"step": 3050
},
{
"epoch": 7.21830985915493,
"grad_norm": 1.2989273071289062,
"learning_rate": 3.8620000000000005e-06,
"loss": 0.0385,
"step": 3075
},
{
"epoch": 7.276995305164319,
"grad_norm": 8.594914436340332,
"learning_rate": 3.812e-06,
"loss": 0.0383,
"step": 3100
},
{
"epoch": 7.335680751173709,
"grad_norm": 3.2579174041748047,
"learning_rate": 3.762e-06,
"loss": 0.043,
"step": 3125
},
{
"epoch": 7.394366197183099,
"grad_norm": 2.448817014694214,
"learning_rate": 3.712e-06,
"loss": 0.0191,
"step": 3150
},
{
"epoch": 7.453051643192488,
"grad_norm": 1.0537528991699219,
"learning_rate": 3.6620000000000007e-06,
"loss": 0.026,
"step": 3175
},
{
"epoch": 7.511737089201878,
"grad_norm": 5.7627716064453125,
"learning_rate": 3.6120000000000003e-06,
"loss": 0.0334,
"step": 3200
},
{
"epoch": 7.570422535211268,
"grad_norm": 1.5681730508804321,
"learning_rate": 3.5620000000000004e-06,
"loss": 0.0303,
"step": 3225
},
{
"epoch": 7.629107981220657,
"grad_norm": 1.3271775245666504,
"learning_rate": 3.5120000000000004e-06,
"loss": 0.0318,
"step": 3250
},
{
"epoch": 7.687793427230047,
"grad_norm": 1.4985450506210327,
"learning_rate": 3.4620000000000005e-06,
"loss": 0.0287,
"step": 3275
},
{
"epoch": 7.746478873239437,
"grad_norm": 2.9170143604278564,
"learning_rate": 3.412e-06,
"loss": 0.0251,
"step": 3300
},
{
"epoch": 7.805164319248826,
"grad_norm": 3.5694615840911865,
"learning_rate": 3.362e-06,
"loss": 0.0303,
"step": 3325
},
{
"epoch": 7.863849765258216,
"grad_norm": 0.8620719313621521,
"learning_rate": 3.3120000000000002e-06,
"loss": 0.0374,
"step": 3350
},
{
"epoch": 7.922535211267606,
"grad_norm": 4.067095756530762,
"learning_rate": 3.2620000000000003e-06,
"loss": 0.0254,
"step": 3375
},
{
"epoch": 7.981220657276995,
"grad_norm": 4.516016960144043,
"learning_rate": 3.212e-06,
"loss": 0.0359,
"step": 3400
},
{
"epoch": 8.039906103286384,
"grad_norm": 4.685014247894287,
"learning_rate": 3.162e-06,
"loss": 0.0189,
"step": 3425
},
{
"epoch": 8.098591549295774,
"grad_norm": 4.836423873901367,
"learning_rate": 3.112e-06,
"loss": 0.0217,
"step": 3450
},
{
"epoch": 8.157276995305164,
"grad_norm": 0.42729654908180237,
"learning_rate": 3.0620000000000005e-06,
"loss": 0.0157,
"step": 3475
},
{
"epoch": 8.215962441314554,
"grad_norm": 2.2334694862365723,
"learning_rate": 3.0120000000000006e-06,
"loss": 0.015,
"step": 3500
},
{
"epoch": 8.274647887323944,
"grad_norm": 4.860167980194092,
"learning_rate": 2.9620000000000006e-06,
"loss": 0.0254,
"step": 3525
},
{
"epoch": 8.333333333333334,
"grad_norm": 3.4534430503845215,
"learning_rate": 2.9120000000000002e-06,
"loss": 0.0154,
"step": 3550
},
{
"epoch": 8.392018779342724,
"grad_norm": 0.6198468208312988,
"learning_rate": 2.8620000000000003e-06,
"loss": 0.0195,
"step": 3575
},
{
"epoch": 8.450704225352112,
"grad_norm": 3.6635360717773438,
"learning_rate": 2.8120000000000004e-06,
"loss": 0.0132,
"step": 3600
},
{
"epoch": 8.509389671361502,
"grad_norm": 4.437213897705078,
"learning_rate": 2.7620000000000004e-06,
"loss": 0.0283,
"step": 3625
},
{
"epoch": 8.568075117370892,
"grad_norm": 2.0735621452331543,
"learning_rate": 2.712e-06,
"loss": 0.0141,
"step": 3650
},
{
"epoch": 8.626760563380282,
"grad_norm": 4.264507293701172,
"learning_rate": 2.662e-06,
"loss": 0.016,
"step": 3675
},
{
"epoch": 8.685446009389672,
"grad_norm": 9.613415718078613,
"learning_rate": 2.612e-06,
"loss": 0.0188,
"step": 3700
},
{
"epoch": 8.74413145539906,
"grad_norm": 1.1187382936477661,
"learning_rate": 2.562e-06,
"loss": 0.0111,
"step": 3725
},
{
"epoch": 8.80281690140845,
"grad_norm": 1.9176028966903687,
"learning_rate": 2.512e-06,
"loss": 0.0142,
"step": 3750
},
{
"epoch": 8.86150234741784,
"grad_norm": 2.139860153198242,
"learning_rate": 2.4620000000000003e-06,
"loss": 0.0122,
"step": 3775
},
{
"epoch": 8.92018779342723,
"grad_norm": 1.8670209646224976,
"learning_rate": 2.4120000000000004e-06,
"loss": 0.0136,
"step": 3800
},
{
"epoch": 8.97887323943662,
"grad_norm": 1.4191254377365112,
"learning_rate": 2.362e-06,
"loss": 0.0104,
"step": 3825
},
{
"epoch": 9.03755868544601,
"grad_norm": 1.3398191928863525,
"learning_rate": 2.312e-06,
"loss": 0.0097,
"step": 3850
},
{
"epoch": 9.0962441314554,
"grad_norm": 3.4211862087249756,
"learning_rate": 2.262e-06,
"loss": 0.0045,
"step": 3875
},
{
"epoch": 9.154929577464788,
"grad_norm": 0.4645729959011078,
"learning_rate": 2.212e-06,
"loss": 0.0069,
"step": 3900
},
{
"epoch": 9.213615023474178,
"grad_norm": 0.5300021767616272,
"learning_rate": 2.1620000000000002e-06,
"loss": 0.0064,
"step": 3925
},
{
"epoch": 9.272300469483568,
"grad_norm": 0.7054510712623596,
"learning_rate": 2.1120000000000003e-06,
"loss": 0.0077,
"step": 3950
},
{
"epoch": 9.330985915492958,
"grad_norm": 2.699523448944092,
"learning_rate": 2.062e-06,
"loss": 0.0169,
"step": 3975
},
{
"epoch": 9.389671361502348,
"grad_norm": 3.4975876808166504,
"learning_rate": 2.012e-06,
"loss": 0.0066,
"step": 4000
},
{
"epoch": 9.389671361502348,
"eval_loss": 1.3293064832687378,
"eval_runtime": 183.5937,
"eval_samples_per_second": 2.064,
"eval_steps_per_second": 0.261,
"eval_wer": 70.13871988318326,
"step": 4000
},
{
"epoch": 9.448356807511738,
"grad_norm": 0.23933370411396027,
"learning_rate": 1.9620000000000004e-06,
"loss": 0.0086,
"step": 4025
},
{
"epoch": 9.507042253521126,
"grad_norm": 4.528557777404785,
"learning_rate": 1.912e-06,
"loss": 0.0093,
"step": 4050
},
{
"epoch": 9.565727699530516,
"grad_norm": 0.5696691274642944,
"learning_rate": 1.8620000000000001e-06,
"loss": 0.0083,
"step": 4075
},
{
"epoch": 9.624413145539906,
"grad_norm": 0.42469522356987,
"learning_rate": 1.8120000000000002e-06,
"loss": 0.0093,
"step": 4100
},
{
"epoch": 9.683098591549296,
"grad_norm": 0.2512667179107666,
"learning_rate": 1.762e-06,
"loss": 0.0067,
"step": 4125
},
{
"epoch": 9.741784037558686,
"grad_norm": 8.73310661315918,
"learning_rate": 1.712e-06,
"loss": 0.0153,
"step": 4150
},
{
"epoch": 9.800469483568076,
"grad_norm": 0.35621461272239685,
"learning_rate": 1.662e-06,
"loss": 0.009,
"step": 4175
},
{
"epoch": 9.859154929577464,
"grad_norm": 0.45837071537971497,
"learning_rate": 1.6120000000000002e-06,
"loss": 0.0077,
"step": 4200
},
{
"epoch": 9.917840375586854,
"grad_norm": 2.0876801013946533,
"learning_rate": 1.5620000000000002e-06,
"loss": 0.0086,
"step": 4225
},
{
"epoch": 9.976525821596244,
"grad_norm": 0.18432964384555817,
"learning_rate": 1.512e-06,
"loss": 0.009,
"step": 4250
},
{
"epoch": 10.035211267605634,
"grad_norm": 1.0506223440170288,
"learning_rate": 1.4620000000000001e-06,
"loss": 0.0073,
"step": 4275
},
{
"epoch": 10.093896713615024,
"grad_norm": 1.3841832876205444,
"learning_rate": 1.412e-06,
"loss": 0.0058,
"step": 4300
},
{
"epoch": 10.152582159624414,
"grad_norm": 0.15871676802635193,
"learning_rate": 1.362e-06,
"loss": 0.0047,
"step": 4325
},
{
"epoch": 10.211267605633802,
"grad_norm": 0.42714136838912964,
"learning_rate": 1.3120000000000003e-06,
"loss": 0.01,
"step": 4350
},
{
"epoch": 10.269953051643192,
"grad_norm": 0.19602788984775543,
"learning_rate": 1.2620000000000002e-06,
"loss": 0.0055,
"step": 4375
},
{
"epoch": 10.328638497652582,
"grad_norm": 0.25925296545028687,
"learning_rate": 1.2120000000000002e-06,
"loss": 0.0042,
"step": 4400
},
{
"epoch": 10.387323943661972,
"grad_norm": 0.18303866684436798,
"learning_rate": 1.162e-06,
"loss": 0.0058,
"step": 4425
},
{
"epoch": 10.446009389671362,
"grad_norm": 0.2301306277513504,
"learning_rate": 1.1120000000000001e-06,
"loss": 0.0031,
"step": 4450
},
{
"epoch": 10.504694835680752,
"grad_norm": 0.23439094424247742,
"learning_rate": 1.0620000000000002e-06,
"loss": 0.005,
"step": 4475
},
{
"epoch": 10.56338028169014,
"grad_norm": 0.2046060562133789,
"learning_rate": 1.012e-06,
"loss": 0.0046,
"step": 4500
},
{
"epoch": 10.62206572769953,
"grad_norm": 0.17962978780269623,
"learning_rate": 9.62e-07,
"loss": 0.0055,
"step": 4525
},
{
"epoch": 10.68075117370892,
"grad_norm": 0.13209222257137299,
"learning_rate": 9.120000000000001e-07,
"loss": 0.0048,
"step": 4550
},
{
"epoch": 10.73943661971831,
"grad_norm": 0.1691015511751175,
"learning_rate": 8.620000000000001e-07,
"loss": 0.0044,
"step": 4575
},
{
"epoch": 10.7981220657277,
"grad_norm": 4.851961135864258,
"learning_rate": 8.12e-07,
"loss": 0.006,
"step": 4600
},
{
"epoch": 10.85680751173709,
"grad_norm": 0.1549525409936905,
"learning_rate": 7.620000000000001e-07,
"loss": 0.0064,
"step": 4625
},
{
"epoch": 10.915492957746478,
"grad_norm": 0.5663545727729797,
"learning_rate": 7.12e-07,
"loss": 0.0066,
"step": 4650
},
{
"epoch": 10.974178403755868,
"grad_norm": 0.2026844024658203,
"learning_rate": 6.62e-07,
"loss": 0.0051,
"step": 4675
},
{
"epoch": 11.032863849765258,
"grad_norm": 0.29603302478790283,
"learning_rate": 6.12e-07,
"loss": 0.0036,
"step": 4700
},
{
"epoch": 11.091549295774648,
"grad_norm": 0.46924281120300293,
"learning_rate": 5.620000000000001e-07,
"loss": 0.0037,
"step": 4725
},
{
"epoch": 11.150234741784038,
"grad_norm": 0.24668444693088531,
"learning_rate": 5.12e-07,
"loss": 0.0036,
"step": 4750
},
{
"epoch": 11.208920187793428,
"grad_norm": 0.1730998456478119,
"learning_rate": 4.6200000000000003e-07,
"loss": 0.0025,
"step": 4775
},
{
"epoch": 11.267605633802816,
"grad_norm": 0.2514236569404602,
"learning_rate": 4.1200000000000004e-07,
"loss": 0.0041,
"step": 4800
},
{
"epoch": 11.326291079812206,
"grad_norm": 0.3077276647090912,
"learning_rate": 3.6200000000000004e-07,
"loss": 0.0031,
"step": 4825
},
{
"epoch": 11.384976525821596,
"grad_norm": 0.13625359535217285,
"learning_rate": 3.12e-07,
"loss": 0.0029,
"step": 4850
},
{
"epoch": 11.443661971830986,
"grad_norm": 0.6590627431869507,
"learning_rate": 2.6200000000000004e-07,
"loss": 0.0046,
"step": 4875
},
{
"epoch": 11.502347417840376,
"grad_norm": 0.09400284290313721,
"learning_rate": 2.1200000000000002e-07,
"loss": 0.005,
"step": 4900
},
{
"epoch": 11.561032863849766,
"grad_norm": 0.14036104083061218,
"learning_rate": 1.62e-07,
"loss": 0.0045,
"step": 4925
},
{
"epoch": 11.619718309859154,
"grad_norm": 0.48431381583213806,
"learning_rate": 1.1200000000000001e-07,
"loss": 0.0066,
"step": 4950
},
{
"epoch": 11.678403755868544,
"grad_norm": 0.35120266675949097,
"learning_rate": 6.2e-08,
"loss": 0.0032,
"step": 4975
},
{
"epoch": 11.737089201877934,
"grad_norm": 0.20458117127418518,
"learning_rate": 1.2e-08,
"loss": 0.0031,
"step": 5000
},
{
"epoch": 11.737089201877934,
"eval_loss": 1.3679251670837402,
"eval_runtime": 188.6684,
"eval_samples_per_second": 2.009,
"eval_steps_per_second": 0.254,
"eval_wer": 70.01703577512777,
"step": 5000
}
],
"logging_steps": 25,
"max_steps": 5000,
"num_input_tokens_seen": 0,
"num_train_epochs": 12,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.153706713399296e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}