whisper-medium-ph / trainer_state.json
rbcurzon's picture
End of training
8e56c9c verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.407709414381023,
"eval_steps": 1000,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.037064492216456635,
"grad_norm": 14.615763664245605,
"learning_rate": 4.800000000000001e-07,
"loss": 1.4607,
"step": 25
},
{
"epoch": 0.07412898443291327,
"grad_norm": 10.59756851196289,
"learning_rate": 9.800000000000001e-07,
"loss": 1.176,
"step": 50
},
{
"epoch": 0.1111934766493699,
"grad_norm": 7.142136573791504,
"learning_rate": 1.48e-06,
"loss": 0.9267,
"step": 75
},
{
"epoch": 0.14825796886582654,
"grad_norm": 9.191902160644531,
"learning_rate": 1.98e-06,
"loss": 0.7253,
"step": 100
},
{
"epoch": 0.18532246108228317,
"grad_norm": 10.320201873779297,
"learning_rate": 2.4800000000000004e-06,
"loss": 0.7047,
"step": 125
},
{
"epoch": 0.2223869532987398,
"grad_norm": 8.486912727355957,
"learning_rate": 2.9800000000000003e-06,
"loss": 0.6634,
"step": 150
},
{
"epoch": 0.25945144551519644,
"grad_norm": 9.802300453186035,
"learning_rate": 3.48e-06,
"loss": 0.5786,
"step": 175
},
{
"epoch": 0.2965159377316531,
"grad_norm": 9.568249702453613,
"learning_rate": 3.980000000000001e-06,
"loss": 0.5857,
"step": 200
},
{
"epoch": 0.3335804299481097,
"grad_norm": 7.968526840209961,
"learning_rate": 4.48e-06,
"loss": 0.5385,
"step": 225
},
{
"epoch": 0.37064492216456635,
"grad_norm": 7.507795810699463,
"learning_rate": 4.980000000000001e-06,
"loss": 0.5151,
"step": 250
},
{
"epoch": 0.407709414381023,
"grad_norm": 6.258375644683838,
"learning_rate": 5.480000000000001e-06,
"loss": 0.4649,
"step": 275
},
{
"epoch": 0.4447739065974796,
"grad_norm": 9.89697551727295,
"learning_rate": 5.98e-06,
"loss": 0.4209,
"step": 300
},
{
"epoch": 0.48183839881393625,
"grad_norm": 7.8507490158081055,
"learning_rate": 6.480000000000001e-06,
"loss": 0.4459,
"step": 325
},
{
"epoch": 0.5189028910303929,
"grad_norm": 5.835811138153076,
"learning_rate": 6.98e-06,
"loss": 0.4141,
"step": 350
},
{
"epoch": 0.5559673832468495,
"grad_norm": 6.767547607421875,
"learning_rate": 7.48e-06,
"loss": 0.4108,
"step": 375
},
{
"epoch": 0.5930318754633062,
"grad_norm": 5.9475884437561035,
"learning_rate": 7.980000000000002e-06,
"loss": 0.41,
"step": 400
},
{
"epoch": 0.6300963676797627,
"grad_norm": 7.767906188964844,
"learning_rate": 8.48e-06,
"loss": 0.3781,
"step": 425
},
{
"epoch": 0.6671608598962194,
"grad_norm": 6.990137100219727,
"learning_rate": 8.98e-06,
"loss": 0.39,
"step": 450
},
{
"epoch": 0.704225352112676,
"grad_norm": 5.607441425323486,
"learning_rate": 9.48e-06,
"loss": 0.3783,
"step": 475
},
{
"epoch": 0.7412898443291327,
"grad_norm": 6.288857936859131,
"learning_rate": 9.980000000000001e-06,
"loss": 0.3559,
"step": 500
},
{
"epoch": 0.7783543365455893,
"grad_norm": 6.985698699951172,
"learning_rate": 9.946666666666667e-06,
"loss": 0.3595,
"step": 525
},
{
"epoch": 0.815418828762046,
"grad_norm": 6.037854194641113,
"learning_rate": 9.891111111111113e-06,
"loss": 0.3163,
"step": 550
},
{
"epoch": 0.8524833209785025,
"grad_norm": 5.8710784912109375,
"learning_rate": 9.835555555555556e-06,
"loss": 0.3502,
"step": 575
},
{
"epoch": 0.8895478131949592,
"grad_norm": 6.342834949493408,
"learning_rate": 9.780000000000001e-06,
"loss": 0.317,
"step": 600
},
{
"epoch": 0.9266123054114158,
"grad_norm": 5.589534759521484,
"learning_rate": 9.724444444444445e-06,
"loss": 0.3228,
"step": 625
},
{
"epoch": 0.9636767976278725,
"grad_norm": 7.743918418884277,
"learning_rate": 9.66888888888889e-06,
"loss": 0.3144,
"step": 650
},
{
"epoch": 1.0,
"grad_norm": 10.073568344116211,
"learning_rate": 9.613333333333335e-06,
"loss": 0.2939,
"step": 675
},
{
"epoch": 1.0370644922164567,
"grad_norm": 4.640520095825195,
"learning_rate": 9.557777777777777e-06,
"loss": 0.1939,
"step": 700
},
{
"epoch": 1.0741289844329134,
"grad_norm": 3.2049508094787598,
"learning_rate": 9.502222222222223e-06,
"loss": 0.1929,
"step": 725
},
{
"epoch": 1.1111934766493698,
"grad_norm": 3.9065611362457275,
"learning_rate": 9.446666666666667e-06,
"loss": 0.1998,
"step": 750
},
{
"epoch": 1.1482579688658265,
"grad_norm": 3.7471649646759033,
"learning_rate": 9.391111111111111e-06,
"loss": 0.2007,
"step": 775
},
{
"epoch": 1.1853224610822832,
"grad_norm": 3.952751874923706,
"learning_rate": 9.335555555555557e-06,
"loss": 0.1863,
"step": 800
},
{
"epoch": 1.2223869532987397,
"grad_norm": 5.39549446105957,
"learning_rate": 9.280000000000001e-06,
"loss": 0.1953,
"step": 825
},
{
"epoch": 1.2594514455151964,
"grad_norm": 4.03216552734375,
"learning_rate": 9.224444444444445e-06,
"loss": 0.2065,
"step": 850
},
{
"epoch": 1.296515937731653,
"grad_norm": 3.854651689529419,
"learning_rate": 9.168888888888889e-06,
"loss": 0.1703,
"step": 875
},
{
"epoch": 1.3335804299481098,
"grad_norm": 4.835360050201416,
"learning_rate": 9.113333333333335e-06,
"loss": 0.1692,
"step": 900
},
{
"epoch": 1.3706449221645665,
"grad_norm": 5.247130393981934,
"learning_rate": 9.057777777777779e-06,
"loss": 0.1982,
"step": 925
},
{
"epoch": 1.407709414381023,
"grad_norm": 3.9537737369537354,
"learning_rate": 9.002222222222223e-06,
"loss": 0.1661,
"step": 950
},
{
"epoch": 1.4447739065974796,
"grad_norm": 4.887810230255127,
"learning_rate": 8.946666666666669e-06,
"loss": 0.1836,
"step": 975
},
{
"epoch": 1.4818383988139363,
"grad_norm": 3.6338751316070557,
"learning_rate": 8.891111111111111e-06,
"loss": 0.1822,
"step": 1000
},
{
"epoch": 1.4818383988139363,
"eval_loss": 0.2655850648880005,
"eval_runtime": 730.9503,
"eval_samples_per_second": 3.947,
"eval_steps_per_second": 0.494,
"eval_wer": 0.14449384404924762,
"step": 1000
},
{
"epoch": 1.5189028910303928,
"grad_norm": 4.078255653381348,
"learning_rate": 8.835555555555557e-06,
"loss": 0.1661,
"step": 1025
},
{
"epoch": 1.5559673832468495,
"grad_norm": 3.9311952590942383,
"learning_rate": 8.78e-06,
"loss": 0.1725,
"step": 1050
},
{
"epoch": 1.5930318754633062,
"grad_norm": 4.800196170806885,
"learning_rate": 8.724444444444445e-06,
"loss": 0.1704,
"step": 1075
},
{
"epoch": 1.6300963676797626,
"grad_norm": 4.550530910491943,
"learning_rate": 8.66888888888889e-06,
"loss": 0.1793,
"step": 1100
},
{
"epoch": 1.6671608598962195,
"grad_norm": 6.508624076843262,
"learning_rate": 8.613333333333333e-06,
"loss": 0.1619,
"step": 1125
},
{
"epoch": 1.704225352112676,
"grad_norm": 4.16792106628418,
"learning_rate": 8.557777777777778e-06,
"loss": 0.1652,
"step": 1150
},
{
"epoch": 1.7412898443291327,
"grad_norm": 4.420657157897949,
"learning_rate": 8.502222222222223e-06,
"loss": 0.16,
"step": 1175
},
{
"epoch": 1.7783543365455894,
"grad_norm": 4.781569004058838,
"learning_rate": 8.446666666666668e-06,
"loss": 0.1695,
"step": 1200
},
{
"epoch": 1.8154188287620459,
"grad_norm": 3.877307176589966,
"learning_rate": 8.391111111111112e-06,
"loss": 0.1529,
"step": 1225
},
{
"epoch": 1.8524833209785025,
"grad_norm": 4.159163475036621,
"learning_rate": 8.335555555555556e-06,
"loss": 0.1619,
"step": 1250
},
{
"epoch": 1.8895478131949592,
"grad_norm": 3.6631579399108887,
"learning_rate": 8.28e-06,
"loss": 0.1654,
"step": 1275
},
{
"epoch": 1.9266123054114157,
"grad_norm": 4.1784210205078125,
"learning_rate": 8.224444444444444e-06,
"loss": 0.1494,
"step": 1300
},
{
"epoch": 1.9636767976278726,
"grad_norm": 5.867852210998535,
"learning_rate": 8.16888888888889e-06,
"loss": 0.1443,
"step": 1325
},
{
"epoch": 2.0,
"grad_norm": 5.817214012145996,
"learning_rate": 8.113333333333334e-06,
"loss": 0.139,
"step": 1350
},
{
"epoch": 2.0370644922164565,
"grad_norm": 2.3572022914886475,
"learning_rate": 8.057777777777778e-06,
"loss": 0.0614,
"step": 1375
},
{
"epoch": 2.0741289844329134,
"grad_norm": 2.2769412994384766,
"learning_rate": 8.002222222222222e-06,
"loss": 0.0606,
"step": 1400
},
{
"epoch": 2.11119347664937,
"grad_norm": 2.474583864212036,
"learning_rate": 7.946666666666666e-06,
"loss": 0.0716,
"step": 1425
},
{
"epoch": 2.1482579688658268,
"grad_norm": 2.5783841609954834,
"learning_rate": 7.891111111111112e-06,
"loss": 0.065,
"step": 1450
},
{
"epoch": 2.1853224610822832,
"grad_norm": 1.6132420301437378,
"learning_rate": 7.835555555555556e-06,
"loss": 0.067,
"step": 1475
},
{
"epoch": 2.2223869532987397,
"grad_norm": 3.8042001724243164,
"learning_rate": 7.78e-06,
"loss": 0.0724,
"step": 1500
},
{
"epoch": 2.2594514455151966,
"grad_norm": 2.2419843673706055,
"learning_rate": 7.724444444444446e-06,
"loss": 0.0761,
"step": 1525
},
{
"epoch": 2.296515937731653,
"grad_norm": 2.706354856491089,
"learning_rate": 7.66888888888889e-06,
"loss": 0.0659,
"step": 1550
},
{
"epoch": 2.3335804299481095,
"grad_norm": 2.8394265174865723,
"learning_rate": 7.613333333333334e-06,
"loss": 0.0688,
"step": 1575
},
{
"epoch": 2.3706449221645665,
"grad_norm": 2.383784770965576,
"learning_rate": 7.557777777777779e-06,
"loss": 0.0729,
"step": 1600
},
{
"epoch": 2.407709414381023,
"grad_norm": 3.0959832668304443,
"learning_rate": 7.502222222222223e-06,
"loss": 0.0626,
"step": 1625
},
{
"epoch": 2.4447739065974794,
"grad_norm": 2.927393913269043,
"learning_rate": 7.446666666666668e-06,
"loss": 0.0677,
"step": 1650
},
{
"epoch": 2.4818383988139363,
"grad_norm": 2.644434928894043,
"learning_rate": 7.3911111111111125e-06,
"loss": 0.0644,
"step": 1675
},
{
"epoch": 2.5189028910303928,
"grad_norm": 2.9071755409240723,
"learning_rate": 7.335555555555556e-06,
"loss": 0.061,
"step": 1700
},
{
"epoch": 2.5559673832468492,
"grad_norm": 2.6862034797668457,
"learning_rate": 7.280000000000001e-06,
"loss": 0.0615,
"step": 1725
},
{
"epoch": 2.593031875463306,
"grad_norm": 3.1184046268463135,
"learning_rate": 7.224444444444445e-06,
"loss": 0.0714,
"step": 1750
},
{
"epoch": 2.6300963676797626,
"grad_norm": 1.7592053413391113,
"learning_rate": 7.1688888888888895e-06,
"loss": 0.0704,
"step": 1775
},
{
"epoch": 2.6671608598962195,
"grad_norm": 2.9316508769989014,
"learning_rate": 7.113333333333334e-06,
"loss": 0.0689,
"step": 1800
},
{
"epoch": 2.704225352112676,
"grad_norm": 2.1934666633605957,
"learning_rate": 7.057777777777778e-06,
"loss": 0.0721,
"step": 1825
},
{
"epoch": 2.741289844329133,
"grad_norm": 3.4919371604919434,
"learning_rate": 7.0022222222222225e-06,
"loss": 0.0638,
"step": 1850
},
{
"epoch": 2.7783543365455894,
"grad_norm": 2.723252058029175,
"learning_rate": 6.946666666666667e-06,
"loss": 0.0598,
"step": 1875
},
{
"epoch": 2.815418828762046,
"grad_norm": 1.8668267726898193,
"learning_rate": 6.891111111111111e-06,
"loss": 0.0607,
"step": 1900
},
{
"epoch": 2.8524833209785028,
"grad_norm": 2.0989866256713867,
"learning_rate": 6.835555555555556e-06,
"loss": 0.0821,
"step": 1925
},
{
"epoch": 2.8895478131949592,
"grad_norm": 2.9375364780426025,
"learning_rate": 6.780000000000001e-06,
"loss": 0.0636,
"step": 1950
},
{
"epoch": 2.9266123054114157,
"grad_norm": 2.1375315189361572,
"learning_rate": 6.724444444444444e-06,
"loss": 0.0723,
"step": 1975
},
{
"epoch": 2.9636767976278726,
"grad_norm": 2.5874264240264893,
"learning_rate": 6.668888888888889e-06,
"loss": 0.0706,
"step": 2000
},
{
"epoch": 2.9636767976278726,
"eval_loss": 0.2490690052509308,
"eval_runtime": 730.2087,
"eval_samples_per_second": 3.951,
"eval_steps_per_second": 0.494,
"eval_wer": 0.12696648426812585,
"step": 2000
},
{
"epoch": 3.0,
"grad_norm": 6.509148597717285,
"learning_rate": 6.613333333333334e-06,
"loss": 0.0587,
"step": 2025
},
{
"epoch": 3.0370644922164565,
"grad_norm": 1.9590086936950684,
"learning_rate": 6.557777777777778e-06,
"loss": 0.0241,
"step": 2050
},
{
"epoch": 3.0741289844329134,
"grad_norm": 1.4612740278244019,
"learning_rate": 6.502222222222223e-06,
"loss": 0.0267,
"step": 2075
},
{
"epoch": 3.11119347664937,
"grad_norm": 0.9522780179977417,
"learning_rate": 6.446666666666668e-06,
"loss": 0.023,
"step": 2100
},
{
"epoch": 3.1482579688658268,
"grad_norm": 1.891400694847107,
"learning_rate": 6.391111111111111e-06,
"loss": 0.0281,
"step": 2125
},
{
"epoch": 3.1853224610822832,
"grad_norm": 1.0783302783966064,
"learning_rate": 6.335555555555556e-06,
"loss": 0.0246,
"step": 2150
},
{
"epoch": 3.2223869532987397,
"grad_norm": 1.3504562377929688,
"learning_rate": 6.280000000000001e-06,
"loss": 0.0244,
"step": 2175
},
{
"epoch": 3.2594514455151966,
"grad_norm": 1.8768439292907715,
"learning_rate": 6.224444444444445e-06,
"loss": 0.0264,
"step": 2200
},
{
"epoch": 3.296515937731653,
"grad_norm": 1.5083887577056885,
"learning_rate": 6.16888888888889e-06,
"loss": 0.0248,
"step": 2225
},
{
"epoch": 3.3335804299481095,
"grad_norm": 3.5768120288848877,
"learning_rate": 6.113333333333333e-06,
"loss": 0.0316,
"step": 2250
},
{
"epoch": 3.3706449221645665,
"grad_norm": 1.1493444442749023,
"learning_rate": 6.057777777777778e-06,
"loss": 0.0294,
"step": 2275
},
{
"epoch": 3.407709414381023,
"grad_norm": 2.3746306896209717,
"learning_rate": 6.002222222222223e-06,
"loss": 0.0263,
"step": 2300
},
{
"epoch": 3.4447739065974794,
"grad_norm": 2.144634485244751,
"learning_rate": 5.946666666666668e-06,
"loss": 0.0348,
"step": 2325
},
{
"epoch": 3.4818383988139363,
"grad_norm": 1.5002686977386475,
"learning_rate": 5.891111111111112e-06,
"loss": 0.0228,
"step": 2350
},
{
"epoch": 3.5189028910303928,
"grad_norm": 1.6059187650680542,
"learning_rate": 5.8355555555555565e-06,
"loss": 0.0239,
"step": 2375
},
{
"epoch": 3.5559673832468492,
"grad_norm": 2.757420778274536,
"learning_rate": 5.78e-06,
"loss": 0.0277,
"step": 2400
},
{
"epoch": 3.593031875463306,
"grad_norm": 1.3977222442626953,
"learning_rate": 5.724444444444445e-06,
"loss": 0.0224,
"step": 2425
},
{
"epoch": 3.6300963676797626,
"grad_norm": 1.9618048667907715,
"learning_rate": 5.6688888888888895e-06,
"loss": 0.026,
"step": 2450
},
{
"epoch": 3.6671608598962195,
"grad_norm": 0.898245632648468,
"learning_rate": 5.613333333333334e-06,
"loss": 0.0326,
"step": 2475
},
{
"epoch": 3.704225352112676,
"grad_norm": 1.8148616552352905,
"learning_rate": 5.557777777777778e-06,
"loss": 0.0213,
"step": 2500
},
{
"epoch": 3.741289844329133,
"grad_norm": 1.308030366897583,
"learning_rate": 5.5022222222222224e-06,
"loss": 0.0192,
"step": 2525
},
{
"epoch": 3.7783543365455894,
"grad_norm": 1.6680744886398315,
"learning_rate": 5.4466666666666665e-06,
"loss": 0.027,
"step": 2550
},
{
"epoch": 3.815418828762046,
"grad_norm": 3.235917568206787,
"learning_rate": 5.391111111111111e-06,
"loss": 0.0242,
"step": 2575
},
{
"epoch": 3.8524833209785028,
"grad_norm": 2.096780300140381,
"learning_rate": 5.335555555555556e-06,
"loss": 0.0243,
"step": 2600
},
{
"epoch": 3.8895478131949592,
"grad_norm": 1.8445031642913818,
"learning_rate": 5.28e-06,
"loss": 0.024,
"step": 2625
},
{
"epoch": 3.9266123054114157,
"grad_norm": 1.357937216758728,
"learning_rate": 5.224444444444445e-06,
"loss": 0.0244,
"step": 2650
},
{
"epoch": 3.9636767976278726,
"grad_norm": 1.0413466691970825,
"learning_rate": 5.168888888888889e-06,
"loss": 0.0221,
"step": 2675
},
{
"epoch": 4.0,
"grad_norm": 3.0572996139526367,
"learning_rate": 5.113333333333333e-06,
"loss": 0.0206,
"step": 2700
},
{
"epoch": 4.037064492216457,
"grad_norm": 0.9961848258972168,
"learning_rate": 5.057777777777778e-06,
"loss": 0.0136,
"step": 2725
},
{
"epoch": 4.074128984432913,
"grad_norm": 1.0248702764511108,
"learning_rate": 5.002222222222223e-06,
"loss": 0.009,
"step": 2750
},
{
"epoch": 4.11119347664937,
"grad_norm": 0.6142157912254333,
"learning_rate": 4.946666666666667e-06,
"loss": 0.0113,
"step": 2775
},
{
"epoch": 4.148257968865827,
"grad_norm": 0.27292531728744507,
"learning_rate": 4.891111111111111e-06,
"loss": 0.009,
"step": 2800
},
{
"epoch": 4.185322461082283,
"grad_norm": 2.2906312942504883,
"learning_rate": 4.835555555555556e-06,
"loss": 0.0073,
"step": 2825
},
{
"epoch": 4.22238695329874,
"grad_norm": 1.0498850345611572,
"learning_rate": 4.78e-06,
"loss": 0.0093,
"step": 2850
},
{
"epoch": 4.259451445515197,
"grad_norm": 1.1574844121932983,
"learning_rate": 4.724444444444445e-06,
"loss": 0.0159,
"step": 2875
},
{
"epoch": 4.2965159377316535,
"grad_norm": 0.7209671139717102,
"learning_rate": 4.66888888888889e-06,
"loss": 0.0088,
"step": 2900
},
{
"epoch": 4.3335804299481095,
"grad_norm": 1.168841004371643,
"learning_rate": 4.613333333333334e-06,
"loss": 0.0094,
"step": 2925
},
{
"epoch": 4.3706449221645665,
"grad_norm": 0.6153778433799744,
"learning_rate": 4.557777777777778e-06,
"loss": 0.009,
"step": 2950
},
{
"epoch": 4.407709414381023,
"grad_norm": 1.5705232620239258,
"learning_rate": 4.502222222222223e-06,
"loss": 0.0085,
"step": 2975
},
{
"epoch": 4.444773906597479,
"grad_norm": 0.24448032677173615,
"learning_rate": 4.446666666666667e-06,
"loss": 0.0072,
"step": 3000
},
{
"epoch": 4.444773906597479,
"eval_loss": 0.27286583185195923,
"eval_runtime": 739.8615,
"eval_samples_per_second": 3.899,
"eval_steps_per_second": 0.488,
"eval_wer": 0.11913474692202462,
"step": 3000
},
{
"epoch": 4.481838398813936,
"grad_norm": 1.2278587818145752,
"learning_rate": 4.391111111111112e-06,
"loss": 0.0146,
"step": 3025
},
{
"epoch": 4.518902891030393,
"grad_norm": 0.6478213667869568,
"learning_rate": 4.3355555555555565e-06,
"loss": 0.014,
"step": 3050
},
{
"epoch": 4.555967383246849,
"grad_norm": 0.7865190505981445,
"learning_rate": 4.2800000000000005e-06,
"loss": 0.0079,
"step": 3075
},
{
"epoch": 4.593031875463306,
"grad_norm": 2.3078877925872803,
"learning_rate": 4.2244444444444446e-06,
"loss": 0.009,
"step": 3100
},
{
"epoch": 4.630096367679763,
"grad_norm": 0.9625842571258545,
"learning_rate": 4.168888888888889e-06,
"loss": 0.0096,
"step": 3125
},
{
"epoch": 4.667160859896219,
"grad_norm": 0.7619579434394836,
"learning_rate": 4.1133333333333335e-06,
"loss": 0.0096,
"step": 3150
},
{
"epoch": 4.704225352112676,
"grad_norm": 1.5049270391464233,
"learning_rate": 4.057777777777778e-06,
"loss": 0.0099,
"step": 3175
},
{
"epoch": 4.741289844329133,
"grad_norm": 1.1056573390960693,
"learning_rate": 4.002222222222222e-06,
"loss": 0.0065,
"step": 3200
},
{
"epoch": 4.778354336545589,
"grad_norm": 0.7983392477035522,
"learning_rate": 3.946666666666667e-06,
"loss": 0.0105,
"step": 3225
},
{
"epoch": 4.815418828762046,
"grad_norm": 1.1153795719146729,
"learning_rate": 3.891111111111111e-06,
"loss": 0.0075,
"step": 3250
},
{
"epoch": 4.852483320978503,
"grad_norm": 0.9730608463287354,
"learning_rate": 3.835555555555555e-06,
"loss": 0.0087,
"step": 3275
},
{
"epoch": 4.889547813194959,
"grad_norm": 0.5694206953048706,
"learning_rate": 3.7800000000000002e-06,
"loss": 0.0071,
"step": 3300
},
{
"epoch": 4.926612305411416,
"grad_norm": 0.2520028352737427,
"learning_rate": 3.724444444444445e-06,
"loss": 0.0081,
"step": 3325
},
{
"epoch": 4.963676797627873,
"grad_norm": 0.436355322599411,
"learning_rate": 3.668888888888889e-06,
"loss": 0.0078,
"step": 3350
},
{
"epoch": 5.0,
"grad_norm": 0.798361599445343,
"learning_rate": 3.6133333333333336e-06,
"loss": 0.0075,
"step": 3375
},
{
"epoch": 5.037064492216457,
"grad_norm": 1.3702267408370972,
"learning_rate": 3.5577777777777785e-06,
"loss": 0.005,
"step": 3400
},
{
"epoch": 5.074128984432913,
"grad_norm": 0.2790464162826538,
"learning_rate": 3.5022222222222225e-06,
"loss": 0.0032,
"step": 3425
},
{
"epoch": 5.11119347664937,
"grad_norm": 0.15111476182937622,
"learning_rate": 3.446666666666667e-06,
"loss": 0.0046,
"step": 3450
},
{
"epoch": 5.148257968865827,
"grad_norm": 0.09985285252332687,
"learning_rate": 3.391111111111111e-06,
"loss": 0.0035,
"step": 3475
},
{
"epoch": 5.185322461082283,
"grad_norm": 0.5352105498313904,
"learning_rate": 3.335555555555556e-06,
"loss": 0.0031,
"step": 3500
},
{
"epoch": 5.22238695329874,
"grad_norm": 0.9406213760375977,
"learning_rate": 3.2800000000000004e-06,
"loss": 0.0035,
"step": 3525
},
{
"epoch": 5.259451445515197,
"grad_norm": 0.7073507905006409,
"learning_rate": 3.2244444444444444e-06,
"loss": 0.0035,
"step": 3550
},
{
"epoch": 5.2965159377316535,
"grad_norm": 0.07916448265314102,
"learning_rate": 3.1688888888888893e-06,
"loss": 0.0035,
"step": 3575
},
{
"epoch": 5.3335804299481095,
"grad_norm": 0.5285120606422424,
"learning_rate": 3.1133333333333337e-06,
"loss": 0.0027,
"step": 3600
},
{
"epoch": 5.3706449221645665,
"grad_norm": 0.09832775592803955,
"learning_rate": 3.0577777777777778e-06,
"loss": 0.0036,
"step": 3625
},
{
"epoch": 5.407709414381023,
"grad_norm": 0.21083103120326996,
"learning_rate": 3.0022222222222227e-06,
"loss": 0.0041,
"step": 3650
},
{
"epoch": 5.444773906597479,
"grad_norm": 0.6747980713844299,
"learning_rate": 2.946666666666667e-06,
"loss": 0.003,
"step": 3675
},
{
"epoch": 5.481838398813936,
"grad_norm": 0.5111549496650696,
"learning_rate": 2.891111111111111e-06,
"loss": 0.0028,
"step": 3700
},
{
"epoch": 5.518902891030393,
"grad_norm": 0.6502516269683838,
"learning_rate": 2.835555555555556e-06,
"loss": 0.0045,
"step": 3725
},
{
"epoch": 5.555967383246849,
"grad_norm": 0.4688964784145355,
"learning_rate": 2.7800000000000005e-06,
"loss": 0.0036,
"step": 3750
},
{
"epoch": 5.593031875463306,
"grad_norm": 0.281994104385376,
"learning_rate": 2.7244444444444445e-06,
"loss": 0.0021,
"step": 3775
},
{
"epoch": 5.630096367679763,
"grad_norm": 0.11583279073238373,
"learning_rate": 2.6688888888888894e-06,
"loss": 0.0041,
"step": 3800
},
{
"epoch": 5.667160859896219,
"grad_norm": 0.22941534221172333,
"learning_rate": 2.6133333333333334e-06,
"loss": 0.0022,
"step": 3825
},
{
"epoch": 5.704225352112676,
"grad_norm": 0.13950073719024658,
"learning_rate": 2.557777777777778e-06,
"loss": 0.003,
"step": 3850
},
{
"epoch": 5.741289844329133,
"grad_norm": 0.6869206428527832,
"learning_rate": 2.5022222222222224e-06,
"loss": 0.0024,
"step": 3875
},
{
"epoch": 5.778354336545589,
"grad_norm": 0.09893081337213516,
"learning_rate": 2.446666666666667e-06,
"loss": 0.0029,
"step": 3900
},
{
"epoch": 5.815418828762046,
"grad_norm": 0.1264762133359909,
"learning_rate": 2.3911111111111113e-06,
"loss": 0.0033,
"step": 3925
},
{
"epoch": 5.852483320978503,
"grad_norm": 0.15489889681339264,
"learning_rate": 2.3355555555555557e-06,
"loss": 0.003,
"step": 3950
},
{
"epoch": 5.889547813194959,
"grad_norm": 0.5875250697135925,
"learning_rate": 2.28e-06,
"loss": 0.0022,
"step": 3975
},
{
"epoch": 5.926612305411416,
"grad_norm": 0.06691984087228775,
"learning_rate": 2.2244444444444447e-06,
"loss": 0.005,
"step": 4000
},
{
"epoch": 5.926612305411416,
"eval_loss": 0.28099098801612854,
"eval_runtime": 734.9707,
"eval_samples_per_second": 3.925,
"eval_steps_per_second": 0.491,
"eval_wer": 0.11566347469220246,
"step": 4000
},
{
"epoch": 5.963676797627873,
"grad_norm": 0.2645249664783478,
"learning_rate": 2.168888888888889e-06,
"loss": 0.0026,
"step": 4025
},
{
"epoch": 6.0,
"grad_norm": 0.3361597955226898,
"learning_rate": 2.1133333333333336e-06,
"loss": 0.0023,
"step": 4050
},
{
"epoch": 6.037064492216457,
"grad_norm": 0.059147898107767105,
"learning_rate": 2.057777777777778e-06,
"loss": 0.0015,
"step": 4075
},
{
"epoch": 6.074128984432913,
"grad_norm": 0.1158735603094101,
"learning_rate": 2.0022222222222225e-06,
"loss": 0.0016,
"step": 4100
},
{
"epoch": 6.11119347664937,
"grad_norm": 1.3564985990524292,
"learning_rate": 1.9466666666666665e-06,
"loss": 0.0014,
"step": 4125
},
{
"epoch": 6.148257968865827,
"grad_norm": 0.5956087112426758,
"learning_rate": 1.8911111111111114e-06,
"loss": 0.0018,
"step": 4150
},
{
"epoch": 6.185322461082283,
"grad_norm": 0.09224885702133179,
"learning_rate": 1.8355555555555557e-06,
"loss": 0.0017,
"step": 4175
},
{
"epoch": 6.22238695329874,
"grad_norm": 0.06868930906057358,
"learning_rate": 1.7800000000000001e-06,
"loss": 0.0017,
"step": 4200
},
{
"epoch": 6.259451445515197,
"grad_norm": 0.06657718122005463,
"learning_rate": 1.7244444444444448e-06,
"loss": 0.0014,
"step": 4225
},
{
"epoch": 6.2965159377316535,
"grad_norm": 0.05459928885102272,
"learning_rate": 1.668888888888889e-06,
"loss": 0.0017,
"step": 4250
},
{
"epoch": 6.3335804299481095,
"grad_norm": 0.05795517563819885,
"learning_rate": 1.6133333333333335e-06,
"loss": 0.0027,
"step": 4275
},
{
"epoch": 6.3706449221645665,
"grad_norm": 0.06204914301633835,
"learning_rate": 1.5577777777777777e-06,
"loss": 0.0012,
"step": 4300
},
{
"epoch": 6.407709414381023,
"grad_norm": 0.0820712074637413,
"learning_rate": 1.5022222222222224e-06,
"loss": 0.0012,
"step": 4325
},
{
"epoch": 6.444773906597479,
"grad_norm": 0.056523606181144714,
"learning_rate": 1.4466666666666669e-06,
"loss": 0.0013,
"step": 4350
},
{
"epoch": 6.481838398813936,
"grad_norm": 0.07985592633485794,
"learning_rate": 1.3911111111111111e-06,
"loss": 0.0014,
"step": 4375
},
{
"epoch": 6.518902891030393,
"grad_norm": 0.044111426919698715,
"learning_rate": 1.3355555555555558e-06,
"loss": 0.0012,
"step": 4400
},
{
"epoch": 6.555967383246849,
"grad_norm": 0.05683915689587593,
"learning_rate": 1.28e-06,
"loss": 0.0014,
"step": 4425
},
{
"epoch": 6.593031875463306,
"grad_norm": 0.08568093180656433,
"learning_rate": 1.2244444444444445e-06,
"loss": 0.0012,
"step": 4450
},
{
"epoch": 6.630096367679763,
"grad_norm": 0.054062824696302414,
"learning_rate": 1.168888888888889e-06,
"loss": 0.0011,
"step": 4475
},
{
"epoch": 6.667160859896219,
"grad_norm": 0.0509476363658905,
"learning_rate": 1.1133333333333334e-06,
"loss": 0.0013,
"step": 4500
},
{
"epoch": 6.704225352112676,
"grad_norm": 0.04927874356508255,
"learning_rate": 1.0577777777777779e-06,
"loss": 0.0012,
"step": 4525
},
{
"epoch": 6.741289844329133,
"grad_norm": 0.08598697185516357,
"learning_rate": 1.0022222222222223e-06,
"loss": 0.0011,
"step": 4550
},
{
"epoch": 6.778354336545589,
"grad_norm": 0.3571934700012207,
"learning_rate": 9.466666666666667e-07,
"loss": 0.0016,
"step": 4575
},
{
"epoch": 6.815418828762046,
"grad_norm": 0.05977300554513931,
"learning_rate": 8.911111111111112e-07,
"loss": 0.001,
"step": 4600
},
{
"epoch": 6.852483320978503,
"grad_norm": 0.05966237559914589,
"learning_rate": 8.355555555555556e-07,
"loss": 0.001,
"step": 4625
},
{
"epoch": 6.889547813194959,
"grad_norm": 0.05432112514972687,
"learning_rate": 7.8e-07,
"loss": 0.001,
"step": 4650
},
{
"epoch": 6.926612305411416,
"grad_norm": 0.06741122156381607,
"learning_rate": 7.244444444444446e-07,
"loss": 0.0019,
"step": 4675
},
{
"epoch": 6.963676797627873,
"grad_norm": 0.04723643884062767,
"learning_rate": 6.68888888888889e-07,
"loss": 0.0012,
"step": 4700
},
{
"epoch": 7.0,
"grad_norm": 0.07329325377941132,
"learning_rate": 6.133333333333333e-07,
"loss": 0.001,
"step": 4725
},
{
"epoch": 7.037064492216457,
"grad_norm": 0.06389188766479492,
"learning_rate": 5.577777777777779e-07,
"loss": 0.001,
"step": 4750
},
{
"epoch": 7.074128984432913,
"grad_norm": 0.03797365352511406,
"learning_rate": 5.022222222222222e-07,
"loss": 0.001,
"step": 4775
},
{
"epoch": 7.11119347664937,
"grad_norm": 0.04686768725514412,
"learning_rate": 4.466666666666667e-07,
"loss": 0.0009,
"step": 4800
},
{
"epoch": 7.148257968865827,
"grad_norm": 0.06883518397808075,
"learning_rate": 3.9111111111111115e-07,
"loss": 0.001,
"step": 4825
},
{
"epoch": 7.185322461082283,
"grad_norm": 0.02842629700899124,
"learning_rate": 3.3555555555555556e-07,
"loss": 0.0009,
"step": 4850
},
{
"epoch": 7.22238695329874,
"grad_norm": 0.04749394953250885,
"learning_rate": 2.8e-07,
"loss": 0.001,
"step": 4875
},
{
"epoch": 7.259451445515197,
"grad_norm": 0.04491546377539635,
"learning_rate": 2.2444444444444445e-07,
"loss": 0.001,
"step": 4900
},
{
"epoch": 7.2965159377316535,
"grad_norm": 0.056013334542512894,
"learning_rate": 1.6888888888888888e-07,
"loss": 0.001,
"step": 4925
},
{
"epoch": 7.3335804299481095,
"grad_norm": 0.057778194546699524,
"learning_rate": 1.1333333333333336e-07,
"loss": 0.0011,
"step": 4950
},
{
"epoch": 7.3706449221645665,
"grad_norm": 0.051241885870695114,
"learning_rate": 5.777777777777778e-08,
"loss": 0.0011,
"step": 4975
},
{
"epoch": 7.407709414381023,
"grad_norm": 0.06301814317703247,
"learning_rate": 2.2222222222222225e-09,
"loss": 0.0009,
"step": 5000
},
{
"epoch": 7.407709414381023,
"eval_loss": 0.29011788964271545,
"eval_runtime": 732.4342,
"eval_samples_per_second": 3.939,
"eval_steps_per_second": 0.493,
"eval_wer": 0.1146545827633379,
"step": 5000
},
{
"epoch": 7.407709414381023,
"step": 5000,
"total_flos": 8.155551755501568e+19,
"train_loss": 0.10907779041565954,
"train_runtime": 12394.4337,
"train_samples_per_second": 6.455,
"train_steps_per_second": 0.403
}
],
"logging_steps": 25,
"max_steps": 5000,
"num_input_tokens_seen": 0,
"num_train_epochs": 8,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.155551755501568e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}