whisper-arabic-Adapter-Trainable / trainer_state.json
Abdulvajid's picture
Upload full training checkpoint
3d37445 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.08177652449770885,
"eval_steps": 10,
"global_step": 290,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0028198801550934085,
"grad_norm": 1.4646761417388916,
"learning_rate": 2.535211267605634e-07,
"loss": 1.6487,
"step": 10
},
{
"epoch": 0.0028198801550934085,
"eval_loss": 1.5981513261795044,
"eval_runtime": 1507.1995,
"eval_samples_per_second": 0.663,
"eval_steps_per_second": 0.332,
"eval_wer": 23.173577454676,
"step": 10
},
{
"epoch": 0.005639760310186817,
"grad_norm": 1.6286903619766235,
"learning_rate": 5.352112676056338e-07,
"loss": 1.5828,
"step": 20
},
{
"epoch": 0.005639760310186817,
"eval_loss": 1.5729961395263672,
"eval_runtime": 1479.7495,
"eval_samples_per_second": 0.676,
"eval_steps_per_second": 0.338,
"eval_wer": 17.930291052820696,
"step": 20
},
{
"epoch": 0.008459640465280225,
"grad_norm": 2.2479844093322754,
"learning_rate": 8.169014084507043e-07,
"loss": 1.6375,
"step": 30
},
{
"epoch": 0.008459640465280225,
"eval_loss": 1.5637080669403076,
"eval_runtime": 1475.9552,
"eval_samples_per_second": 0.678,
"eval_steps_per_second": 0.339,
"eval_wer": 17.930291052820696,
"step": 30
},
{
"epoch": 0.011279520620373634,
"grad_norm": 1.6205031871795654,
"learning_rate": 1.098591549295775e-06,
"loss": 1.4224,
"step": 40
},
{
"epoch": 0.011279520620373634,
"eval_loss": 1.5508662462234497,
"eval_runtime": 1465.0874,
"eval_samples_per_second": 0.683,
"eval_steps_per_second": 0.341,
"eval_wer": 17.876392382321235,
"step": 40
},
{
"epoch": 0.014099400775467043,
"grad_norm": 1.4715520143508911,
"learning_rate": 1.3802816901408453e-06,
"loss": 1.5848,
"step": 50
},
{
"epoch": 0.014099400775467043,
"eval_loss": 1.5341806411743164,
"eval_runtime": 1483.291,
"eval_samples_per_second": 0.674,
"eval_steps_per_second": 0.337,
"eval_wer": 17.822493711821775,
"step": 50
},
{
"epoch": 0.01691928093056045,
"grad_norm": 1.752820611000061,
"learning_rate": 1.6619718309859157e-06,
"loss": 1.4716,
"step": 60
},
{
"epoch": 0.01691928093056045,
"eval_loss": 1.5121240615844727,
"eval_runtime": 1496.0308,
"eval_samples_per_second": 0.668,
"eval_steps_per_second": 0.334,
"eval_wer": 17.714696370822853,
"step": 60
},
{
"epoch": 0.01973916108565386,
"grad_norm": 1.584325909614563,
"learning_rate": 1.943661971830986e-06,
"loss": 1.4226,
"step": 70
},
{
"epoch": 0.01973916108565386,
"eval_loss": 1.4858661890029907,
"eval_runtime": 1472.4553,
"eval_samples_per_second": 0.679,
"eval_steps_per_second": 0.34,
"eval_wer": 17.62486525332375,
"step": 70
},
{
"epoch": 0.022559041240747268,
"grad_norm": 1.720224142074585,
"learning_rate": 2.2253521126760566e-06,
"loss": 1.4217,
"step": 80
},
{
"epoch": 0.022559041240747268,
"eval_loss": 1.4541670083999634,
"eval_runtime": 1479.057,
"eval_samples_per_second": 0.676,
"eval_steps_per_second": 0.338,
"eval_wer": 17.46316924182537,
"step": 80
},
{
"epoch": 0.02537892139584068,
"grad_norm": 1.3850756883621216,
"learning_rate": 2.507042253521127e-06,
"loss": 1.444,
"step": 90
},
{
"epoch": 0.02537892139584068,
"eval_loss": 1.415584921836853,
"eval_runtime": 1481.7443,
"eval_samples_per_second": 0.675,
"eval_steps_per_second": 0.337,
"eval_wer": 17.40927057132591,
"step": 90
},
{
"epoch": 0.028198801550934086,
"grad_norm": 2.1286864280700684,
"learning_rate": 2.7887323943661974e-06,
"loss": 1.3818,
"step": 100
},
{
"epoch": 0.028198801550934086,
"eval_loss": 1.3688818216323853,
"eval_runtime": 1474.4035,
"eval_samples_per_second": 0.678,
"eval_steps_per_second": 0.339,
"eval_wer": 17.337405677326625,
"step": 100
},
{
"epoch": 0.031018681706027493,
"grad_norm": 1.3948097229003906,
"learning_rate": 3.0704225352112678e-06,
"loss": 1.2849,
"step": 110
},
{
"epoch": 0.031018681706027493,
"eval_loss": 1.3072822093963623,
"eval_runtime": 1463.7543,
"eval_samples_per_second": 0.683,
"eval_steps_per_second": 0.342,
"eval_wer": 17.175709665828244,
"step": 110
},
{
"epoch": 0.0338385618611209,
"grad_norm": 1.907498836517334,
"learning_rate": 3.352112676056338e-06,
"loss": 1.2869,
"step": 120
},
{
"epoch": 0.0338385618611209,
"eval_loss": 1.2292650938034058,
"eval_runtime": 1464.7785,
"eval_samples_per_second": 0.683,
"eval_steps_per_second": 0.341,
"eval_wer": 17.121810995328783,
"step": 120
},
{
"epoch": 0.036658442016214314,
"grad_norm": 2.2028934955596924,
"learning_rate": 3.633802816901409e-06,
"loss": 1.1545,
"step": 130
},
{
"epoch": 0.036658442016214314,
"eval_loss": 1.1384223699569702,
"eval_runtime": 1494.8653,
"eval_samples_per_second": 0.669,
"eval_steps_per_second": 0.334,
"eval_wer": 16.92418253683076,
"step": 130
},
{
"epoch": 0.03947832217130772,
"grad_norm": 1.801270604133606,
"learning_rate": 3.915492957746479e-06,
"loss": 1.1247,
"step": 140
},
{
"epoch": 0.03947832217130772,
"eval_loss": 1.0565879344940186,
"eval_runtime": 1504.6119,
"eval_samples_per_second": 0.665,
"eval_steps_per_second": 0.332,
"eval_wer": 16.708587854832917,
"step": 140
},
{
"epoch": 0.04229820232640113,
"grad_norm": 1.8307346105575562,
"learning_rate": 4.19718309859155e-06,
"loss": 1.0473,
"step": 150
},
{
"epoch": 0.04229820232640113,
"eval_loss": 0.9981860518455505,
"eval_runtime": 1504.1852,
"eval_samples_per_second": 0.665,
"eval_steps_per_second": 0.332,
"eval_wer": 16.331297161336686,
"step": 150
},
{
"epoch": 0.045118082481494536,
"grad_norm": 1.1669814586639404,
"learning_rate": 4.4788732394366205e-06,
"loss": 0.9783,
"step": 160
},
{
"epoch": 0.045118082481494536,
"eval_loss": 0.9503867030143738,
"eval_runtime": 1510.1792,
"eval_samples_per_second": 0.662,
"eval_steps_per_second": 0.331,
"eval_wer": 16.241466043837587,
"step": 160
},
{
"epoch": 0.04793796263658794,
"grad_norm": 1.8830143213272095,
"learning_rate": 4.7605633802816905e-06,
"loss": 0.937,
"step": 170
},
{
"epoch": 0.04793796263658794,
"eval_loss": 0.906912624835968,
"eval_runtime": 1516.9202,
"eval_samples_per_second": 0.659,
"eval_steps_per_second": 0.33,
"eval_wer": 16.349263384836508,
"step": 170
},
{
"epoch": 0.05075784279168136,
"grad_norm": 1.4165269136428833,
"learning_rate": 5.042253521126761e-06,
"loss": 0.942,
"step": 180
},
{
"epoch": 0.05075784279168136,
"eval_loss": 0.868701159954071,
"eval_runtime": 1514.2041,
"eval_samples_per_second": 0.66,
"eval_steps_per_second": 0.33,
"eval_wer": 15.45095220984549,
"step": 180
},
{
"epoch": 0.053577722946774764,
"grad_norm": 1.283534049987793,
"learning_rate": 5.323943661971831e-06,
"loss": 0.7458,
"step": 190
},
{
"epoch": 0.053577722946774764,
"eval_loss": 0.8295483589172363,
"eval_runtime": 1513.4352,
"eval_samples_per_second": 0.661,
"eval_steps_per_second": 0.33,
"eval_wer": 15.199425080848005,
"step": 190
},
{
"epoch": 0.05639760310186817,
"grad_norm": 2.037789821624756,
"learning_rate": 5.577464788732395e-06,
"loss": 0.8498,
"step": 200
},
{
"epoch": 0.05639760310186817,
"eval_loss": 0.7952865958213806,
"eval_runtime": 1513.3273,
"eval_samples_per_second": 0.661,
"eval_steps_per_second": 0.33,
"eval_wer": 14.768235716852319,
"step": 200
},
{
"epoch": 0.05921748325696158,
"grad_norm": 1.5177339315414429,
"learning_rate": 5.859154929577466e-06,
"loss": 0.8204,
"step": 210
},
{
"epoch": 0.05921748325696158,
"eval_loss": 0.7552182674407959,
"eval_runtime": 1510.8116,
"eval_samples_per_second": 0.662,
"eval_steps_per_second": 0.331,
"eval_wer": 14.498742364355014,
"step": 210
},
{
"epoch": 0.062037363412054986,
"grad_norm": 2.0467543601989746,
"learning_rate": 6.1408450704225356e-06,
"loss": 0.7322,
"step": 220
},
{
"epoch": 0.062037363412054986,
"eval_loss": 0.7149233222007751,
"eval_runtime": 1512.5061,
"eval_samples_per_second": 0.661,
"eval_steps_per_second": 0.331,
"eval_wer": 14.31908012935681,
"step": 220
},
{
"epoch": 0.06485724356714839,
"grad_norm": 1.996768832206726,
"learning_rate": 6.422535211267606e-06,
"loss": 0.7584,
"step": 230
},
{
"epoch": 0.06485724356714839,
"eval_loss": 0.679040789604187,
"eval_runtime": 1497.8821,
"eval_samples_per_second": 0.668,
"eval_steps_per_second": 0.334,
"eval_wer": 14.28314768235717,
"step": 230
},
{
"epoch": 0.0676771237222418,
"grad_norm": 2.120554208755493,
"learning_rate": 6.704225352112676e-06,
"loss": 0.6627,
"step": 240
},
{
"epoch": 0.0676771237222418,
"eval_loss": 0.6412619352340698,
"eval_runtime": 1508.0578,
"eval_samples_per_second": 0.663,
"eval_steps_per_second": 0.332,
"eval_wer": 14.39094502335609,
"step": 240
},
{
"epoch": 0.07049700387733521,
"grad_norm": 2.281755208969116,
"learning_rate": 6.985915492957746e-06,
"loss": 0.6823,
"step": 250
},
{
"epoch": 0.07049700387733521,
"eval_loss": 0.6027175188064575,
"eval_runtime": 1504.4211,
"eval_samples_per_second": 0.665,
"eval_steps_per_second": 0.332,
"eval_wer": 14.35501257635645,
"step": 250
},
{
"epoch": 0.07331688403242863,
"grad_norm": 1.7897437810897827,
"learning_rate": 7.267605633802818e-06,
"loss": 0.5683,
"step": 260
},
{
"epoch": 0.07331688403242863,
"eval_loss": 0.5633881688117981,
"eval_runtime": 1514.5793,
"eval_samples_per_second": 0.66,
"eval_steps_per_second": 0.33,
"eval_wer": 14.28314768235717,
"step": 260
},
{
"epoch": 0.07613676418752204,
"grad_norm": 2.116328001022339,
"learning_rate": 7.549295774647888e-06,
"loss": 0.5503,
"step": 270
},
{
"epoch": 0.07613676418752204,
"eval_loss": 0.5198965072631836,
"eval_runtime": 1518.7263,
"eval_samples_per_second": 0.658,
"eval_steps_per_second": 0.329,
"eval_wer": 14.193316564858065,
"step": 270
},
{
"epoch": 0.07895664434261544,
"grad_norm": 1.2750239372253418,
"learning_rate": 7.830985915492958e-06,
"loss": 0.4712,
"step": 280
},
{
"epoch": 0.07895664434261544,
"eval_loss": 0.46468213200569153,
"eval_runtime": 1520.4856,
"eval_samples_per_second": 0.658,
"eval_steps_per_second": 0.329,
"eval_wer": 14.103485447358965,
"step": 280
},
{
"epoch": 0.08177652449770885,
"grad_norm": 2.0781710147857666,
"learning_rate": 8.112676056338029e-06,
"loss": 0.4791,
"step": 290
},
{
"epoch": 0.08177652449770885,
"eval_loss": 0.3976580798625946,
"eval_runtime": 1514.2321,
"eval_samples_per_second": 0.66,
"eval_steps_per_second": 0.33,
"eval_wer": 14.085519223859144,
"step": 290
}
],
"logging_steps": 10,
"max_steps": 3546,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 8.18355290112e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}