CS_mms_eng_yor / last-checkpoint /trainer_state.json
oluwagbotty's picture
Training in progress, step 3400, checkpoint
53aa11e verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 8.272838002436053,
"eval_steps": 100,
"global_step": 3400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.243605359317905,
"grad_norm": 1.117086410522461,
"learning_rate": 0.0005595762717789524,
"loss": 2.3476,
"step": 100
},
{
"epoch": 0.243605359317905,
"eval_loss": 0.41345393657684326,
"eval_runtime": 207.4783,
"eval_samples_per_second": 7.938,
"eval_steps_per_second": 0.993,
"eval_wer": 0.3729947315456106,
"step": 100
},
{
"epoch": 0.48721071863581,
"grad_norm": 1.6088380813598633,
"learning_rate": 0.0005480076079009505,
"loss": 0.4631,
"step": 200
},
{
"epoch": 0.48721071863581,
"eval_loss": 0.3957911729812622,
"eval_runtime": 207.5213,
"eval_samples_per_second": 7.937,
"eval_steps_per_second": 0.993,
"eval_wer": 0.3639969218019298,
"step": 200
},
{
"epoch": 0.730816077953715,
"grad_norm": 1.3071035146713257,
"learning_rate": 0.0005364389440229487,
"loss": 0.4441,
"step": 300
},
{
"epoch": 0.730816077953715,
"eval_loss": 0.3825915455818176,
"eval_runtime": 207.3617,
"eval_samples_per_second": 7.943,
"eval_steps_per_second": 0.993,
"eval_wer": 0.34925708873497896,
"step": 300
},
{
"epoch": 0.97442143727162,
"grad_norm": 1.313904047012329,
"learning_rate": 0.0005249859667837267,
"loss": 0.4367,
"step": 400
},
{
"epoch": 0.97442143727162,
"eval_loss": 0.3784385323524475,
"eval_runtime": 207.3709,
"eval_samples_per_second": 7.942,
"eval_steps_per_second": 0.993,
"eval_wer": 0.35215769845498135,
"step": 400
},
{
"epoch": 1.2168087697929355,
"grad_norm": 0.9488235116004944,
"learning_rate": 0.0005134173029057248,
"loss": 0.424,
"step": 500
},
{
"epoch": 1.2168087697929355,
"eval_loss": 0.3734341263771057,
"eval_runtime": 209.8871,
"eval_samples_per_second": 7.847,
"eval_steps_per_second": 0.981,
"eval_wer": 0.3468300479488546,
"step": 500
},
{
"epoch": 1.4604141291108403,
"grad_norm": 0.875639021396637,
"learning_rate": 0.0005018486390277228,
"loss": 0.4256,
"step": 600
},
{
"epoch": 1.4604141291108403,
"eval_loss": 0.37602460384368896,
"eval_runtime": 207.528,
"eval_samples_per_second": 7.936,
"eval_steps_per_second": 0.993,
"eval_wer": 0.3515657372876339,
"step": 600
},
{
"epoch": 1.7040194884287454,
"grad_norm": 0.7033505439758301,
"learning_rate": 0.000490395661788501,
"loss": 0.4189,
"step": 700
},
{
"epoch": 1.7040194884287454,
"eval_loss": 0.3739420473575592,
"eval_runtime": 208.7563,
"eval_samples_per_second": 7.89,
"eval_steps_per_second": 0.987,
"eval_wer": 0.34742200911620197,
"step": 700
},
{
"epoch": 1.9476248477466505,
"grad_norm": 0.8537428975105286,
"learning_rate": 0.00047882699791049904,
"loss": 0.4293,
"step": 800
},
{
"epoch": 1.9476248477466505,
"eval_loss": 0.3677741289138794,
"eval_runtime": 209.3774,
"eval_samples_per_second": 7.866,
"eval_steps_per_second": 0.984,
"eval_wer": 0.34481737997987333,
"step": 800
},
{
"epoch": 2.1900121802679657,
"grad_norm": 0.7106778621673584,
"learning_rate": 0.00046725833403249716,
"loss": 0.4105,
"step": 900
},
{
"epoch": 2.1900121802679657,
"eval_loss": 0.36625751852989197,
"eval_runtime": 208.5161,
"eval_samples_per_second": 7.899,
"eval_steps_per_second": 0.988,
"eval_wer": 0.3465932634819156,
"step": 900
},
{
"epoch": 2.433617539585871,
"grad_norm": 0.9586976766586304,
"learning_rate": 0.00045568967015449527,
"loss": 0.3981,
"step": 1000
},
{
"epoch": 2.433617539585871,
"eval_loss": 0.37355494499206543,
"eval_runtime": 209.5635,
"eval_samples_per_second": 7.859,
"eval_steps_per_second": 0.983,
"eval_wer": 0.3441070265790564,
"step": 1000
},
{
"epoch": 2.677222898903776,
"grad_norm": 7.491183757781982,
"learning_rate": 0.0004441210062764934,
"loss": 0.4161,
"step": 1100
},
{
"epoch": 2.677222898903776,
"eval_loss": 0.3725080192089081,
"eval_runtime": 209.8642,
"eval_samples_per_second": 7.848,
"eval_steps_per_second": 0.982,
"eval_wer": 0.34866512756763157,
"step": 1100
},
{
"epoch": 2.9208282582216807,
"grad_norm": 0.9329687356948853,
"learning_rate": 0.0004326680290372715,
"loss": 0.4188,
"step": 1200
},
{
"epoch": 2.9208282582216807,
"eval_loss": 0.3627680242061615,
"eval_runtime": 209.391,
"eval_samples_per_second": 7.866,
"eval_steps_per_second": 0.984,
"eval_wer": 0.3433966731782395,
"step": 1200
},
{
"epoch": 3.1632155907429964,
"grad_norm": 0.7644044756889343,
"learning_rate": 0.00042109936515926955,
"loss": 0.3984,
"step": 1300
},
{
"epoch": 3.1632155907429964,
"eval_loss": 0.3671010732650757,
"eval_runtime": 207.9613,
"eval_samples_per_second": 7.92,
"eval_steps_per_second": 0.991,
"eval_wer": 0.34262712366068787,
"step": 1300
},
{
"epoch": 3.4068209500609012,
"grad_norm": 1.272712230682373,
"learning_rate": 0.00040953070128126767,
"loss": 0.4108,
"step": 1400
},
{
"epoch": 3.4068209500609012,
"eval_loss": 0.3673802316188812,
"eval_runtime": 208.6721,
"eval_samples_per_second": 7.893,
"eval_steps_per_second": 0.987,
"eval_wer": 0.3410288285088498,
"step": 1400
},
{
"epoch": 3.6504263093788065,
"grad_norm": 0.9993807673454285,
"learning_rate": 0.00039796203740326573,
"loss": 0.3905,
"step": 1500
},
{
"epoch": 3.6504263093788065,
"eval_loss": 0.3593311309814453,
"eval_runtime": 208.7376,
"eval_samples_per_second": 7.89,
"eval_steps_per_second": 0.987,
"eval_wer": 0.3387793760729296,
"step": 1500
},
{
"epoch": 3.8940316686967114,
"grad_norm": 4.2975311279296875,
"learning_rate": 0.0003865090601640438,
"loss": 0.3977,
"step": 1600
},
{
"epoch": 3.8940316686967114,
"eval_loss": 0.36261168122291565,
"eval_runtime": 209.8526,
"eval_samples_per_second": 7.848,
"eval_steps_per_second": 0.982,
"eval_wer": 0.33872017995619486,
"step": 1600
},
{
"epoch": 4.136419001218027,
"grad_norm": 0.6952757835388184,
"learning_rate": 0.0003749403962860419,
"loss": 0.3961,
"step": 1700
},
{
"epoch": 4.136419001218027,
"eval_loss": 0.36098670959472656,
"eval_runtime": 209.1076,
"eval_samples_per_second": 7.876,
"eval_steps_per_second": 0.985,
"eval_wer": 0.33220860711537326,
"step": 1700
},
{
"epoch": 4.3800243605359315,
"grad_norm": 0.6217916011810303,
"learning_rate": 0.00036337173240804,
"loss": 0.3844,
"step": 1800
},
{
"epoch": 4.3800243605359315,
"eval_loss": 0.36441686749458313,
"eval_runtime": 208.6619,
"eval_samples_per_second": 7.893,
"eval_steps_per_second": 0.987,
"eval_wer": 0.3422719469602794,
"step": 1800
},
{
"epoch": 4.623629719853836,
"grad_norm": 0.6715461611747742,
"learning_rate": 0.00035180306853003807,
"loss": 0.3938,
"step": 1900
},
{
"epoch": 4.623629719853836,
"eval_loss": 0.35540714859962463,
"eval_runtime": 207.8758,
"eval_samples_per_second": 7.923,
"eval_steps_per_second": 0.991,
"eval_wer": 0.33546439353578406,
"step": 1900
},
{
"epoch": 4.867235079171742,
"grad_norm": 4.297011852264404,
"learning_rate": 0.00034035009129081617,
"loss": 0.3808,
"step": 2000
},
{
"epoch": 4.867235079171742,
"eval_loss": 0.35785403847694397,
"eval_runtime": 209.5813,
"eval_samples_per_second": 7.859,
"eval_steps_per_second": 0.983,
"eval_wer": 0.3348724323684366,
"step": 2000
},
{
"epoch": 5.109622411693057,
"grad_norm": 1.1115533113479614,
"learning_rate": 0.00032878142741281434,
"loss": 0.3822,
"step": 2100
},
{
"epoch": 5.109622411693057,
"eval_loss": 0.35621944069862366,
"eval_runtime": 208.4206,
"eval_samples_per_second": 7.902,
"eval_steps_per_second": 0.988,
"eval_wer": 0.3330373527496596,
"step": 2100
},
{
"epoch": 5.353227771010962,
"grad_norm": 1.375293493270874,
"learning_rate": 0.0003172127635348124,
"loss": 0.3755,
"step": 2200
},
{
"epoch": 5.353227771010962,
"eval_loss": 0.3556455075740814,
"eval_runtime": 208.4802,
"eval_samples_per_second": 7.9,
"eval_steps_per_second": 0.988,
"eval_wer": 0.3307287041970047,
"step": 2200
},
{
"epoch": 5.596833130328867,
"grad_norm": 4.188701629638672,
"learning_rate": 0.0003056440996568105,
"loss": 0.3789,
"step": 2300
},
{
"epoch": 5.596833130328867,
"eval_loss": 0.3514413833618164,
"eval_runtime": 209.2966,
"eval_samples_per_second": 7.869,
"eval_steps_per_second": 0.984,
"eval_wer": 0.3303143313798615,
"step": 2300
},
{
"epoch": 5.840438489646772,
"grad_norm": 1.3970204591751099,
"learning_rate": 0.0002941911224175886,
"loss": 0.3742,
"step": 2400
},
{
"epoch": 5.840438489646772,
"eval_loss": 0.34722429513931274,
"eval_runtime": 209.3711,
"eval_samples_per_second": 7.866,
"eval_steps_per_second": 0.984,
"eval_wer": 0.33280056828272064,
"step": 2400
},
{
"epoch": 6.082825822168088,
"grad_norm": 3.3088934421539307,
"learning_rate": 0.0002826224585395867,
"loss": 0.3608,
"step": 2500
},
{
"epoch": 6.082825822168088,
"eval_loss": 0.3470153510570526,
"eval_runtime": 208.973,
"eval_samples_per_second": 7.881,
"eval_steps_per_second": 0.986,
"eval_wer": 0.32759131001006336,
"step": 2500
},
{
"epoch": 6.326431181485993,
"grad_norm": 1.3504565954208374,
"learning_rate": 0.0002711694813003648,
"loss": 0.3647,
"step": 2600
},
{
"epoch": 6.326431181485993,
"eval_loss": 0.34682515263557434,
"eval_runtime": 209.4124,
"eval_samples_per_second": 7.865,
"eval_steps_per_second": 0.984,
"eval_wer": 0.32954478186230984,
"step": 2600
},
{
"epoch": 6.570036540803898,
"grad_norm": 3.7289445400238037,
"learning_rate": 0.0002596008174223629,
"loss": 0.3719,
"step": 2700
},
{
"epoch": 6.570036540803898,
"eval_loss": 0.3456605076789856,
"eval_runtime": 210.3062,
"eval_samples_per_second": 7.831,
"eval_steps_per_second": 0.98,
"eval_wer": 0.3259930148582253,
"step": 2700
},
{
"epoch": 6.8136419001218025,
"grad_norm": 0.8493024706840515,
"learning_rate": 0.00024803215354436095,
"loss": 0.3678,
"step": 2800
},
{
"epoch": 6.8136419001218025,
"eval_loss": 0.3423258364200592,
"eval_runtime": 210.1296,
"eval_samples_per_second": 7.838,
"eval_steps_per_second": 0.98,
"eval_wer": 0.31794234298230034,
"step": 2800
},
{
"epoch": 7.056029232643118,
"grad_norm": 2.6314430236816406,
"learning_rate": 0.00023646348966635904,
"loss": 0.3575,
"step": 2900
},
{
"epoch": 7.056029232643118,
"eval_loss": 0.3422372043132782,
"eval_runtime": 209.587,
"eval_samples_per_second": 7.858,
"eval_steps_per_second": 0.983,
"eval_wer": 0.3201325993014858,
"step": 2900
},
{
"epoch": 7.2996345919610235,
"grad_norm": 0.7139139175415039,
"learning_rate": 0.00022489482578835716,
"loss": 0.3427,
"step": 3000
},
{
"epoch": 7.2996345919610235,
"eval_loss": 0.35160067677497864,
"eval_runtime": 209.3155,
"eval_samples_per_second": 7.869,
"eval_steps_per_second": 0.984,
"eval_wer": 0.3231516012549577,
"step": 3000
},
{
"epoch": 7.543239951278928,
"grad_norm": 2.187338352203369,
"learning_rate": 0.00021344184854913526,
"loss": 0.3661,
"step": 3100
},
{
"epoch": 7.543239951278928,
"eval_loss": 0.342045396566391,
"eval_runtime": 210.2605,
"eval_samples_per_second": 7.833,
"eval_steps_per_second": 0.98,
"eval_wer": 0.32155330610311966,
"step": 3100
},
{
"epoch": 7.786845310596833,
"grad_norm": 2.156829357147217,
"learning_rate": 0.00020187318467113335,
"loss": 0.3502,
"step": 3200
},
{
"epoch": 7.786845310596833,
"eval_loss": 0.3429788053035736,
"eval_runtime": 210.9605,
"eval_samples_per_second": 7.807,
"eval_steps_per_second": 0.976,
"eval_wer": 0.32380275853903984,
"step": 3200
},
{
"epoch": 8.02923264311815,
"grad_norm": 0.6317552924156189,
"learning_rate": 0.00019030452079313144,
"loss": 0.3681,
"step": 3300
},
{
"epoch": 8.02923264311815,
"eval_loss": 0.33876505494117737,
"eval_runtime": 210.1517,
"eval_samples_per_second": 7.837,
"eval_steps_per_second": 0.98,
"eval_wer": 0.3200734031847511,
"step": 3300
},
{
"epoch": 8.272838002436053,
"grad_norm": 0.9605346322059631,
"learning_rate": 0.0001787358569151295,
"loss": 0.3454,
"step": 3400
},
{
"epoch": 8.272838002436053,
"eval_loss": 0.33973127603530884,
"eval_runtime": 209.8102,
"eval_samples_per_second": 7.85,
"eval_steps_per_second": 0.982,
"eval_wer": 0.32184928668679336,
"step": 3400
}
],
"logging_steps": 100,
"max_steps": 4932,
"num_input_tokens_seen": 0,
"num_train_epochs": 12,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.771788544505309e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}