speecht5-darija / checkpoint-700 /trainer_state.json
HAMMALE's picture
Upload fine-tuned SpeechT5 model for Darija
2f99e8a verified
{
"best_global_step": 600,
"best_metric": 0.45842912793159485,
"best_model_checkpoint": "./speecht5_finetuned_Darija/checkpoint-600",
"epoch": 1.9515172654342519,
"eval_steps": 100,
"global_step": 700,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06975933031042902,
"grad_norm": 4.014983654022217,
"learning_rate": 2.4e-05,
"loss": 1.2037,
"step": 25
},
{
"epoch": 0.13951866062085805,
"grad_norm": 4.588967800140381,
"learning_rate": 4.8e-05,
"loss": 0.8946,
"step": 50
},
{
"epoch": 0.20927799093128707,
"grad_norm": 4.026744842529297,
"learning_rate": 7.3e-05,
"loss": 0.6685,
"step": 75
},
{
"epoch": 0.2790373212417161,
"grad_norm": 3.659444808959961,
"learning_rate": 9.8e-05,
"loss": 0.6168,
"step": 100
},
{
"epoch": 0.2790373212417161,
"eval_loss": 0.5272690057754517,
"eval_runtime": 41.6965,
"eval_samples_per_second": 30.578,
"eval_steps_per_second": 15.301,
"step": 100
},
{
"epoch": 0.3487966515521451,
"grad_norm": 6.453115940093994,
"learning_rate": 9.878947368421053e-05,
"loss": 0.5847,
"step": 125
},
{
"epoch": 0.41855598186257414,
"grad_norm": 3.9114222526550293,
"learning_rate": 9.747368421052632e-05,
"loss": 0.5667,
"step": 150
},
{
"epoch": 0.4883153121730031,
"grad_norm": 5.056523323059082,
"learning_rate": 9.615789473684212e-05,
"loss": 0.5662,
"step": 175
},
{
"epoch": 0.5580746424834322,
"grad_norm": 5.494399070739746,
"learning_rate": 9.48421052631579e-05,
"loss": 0.5699,
"step": 200
},
{
"epoch": 0.5580746424834322,
"eval_loss": 0.5231854915618896,
"eval_runtime": 37.0744,
"eval_samples_per_second": 34.39,
"eval_steps_per_second": 17.209,
"step": 200
},
{
"epoch": 0.6278339727938612,
"grad_norm": 2.748530864715576,
"learning_rate": 9.352631578947368e-05,
"loss": 0.5448,
"step": 225
},
{
"epoch": 0.6975933031042902,
"grad_norm": 3.9691007137298584,
"learning_rate": 9.221052631578948e-05,
"loss": 0.5358,
"step": 250
},
{
"epoch": 0.7673526334147193,
"grad_norm": 3.5358879566192627,
"learning_rate": 9.089473684210526e-05,
"loss": 0.5245,
"step": 275
},
{
"epoch": 0.8371119637251483,
"grad_norm": 2.21895432472229,
"learning_rate": 8.957894736842106e-05,
"loss": 0.5231,
"step": 300
},
{
"epoch": 0.8371119637251483,
"eval_loss": 0.47762706875801086,
"eval_runtime": 36.3582,
"eval_samples_per_second": 35.068,
"eval_steps_per_second": 17.548,
"step": 300
},
{
"epoch": 0.9068712940355773,
"grad_norm": 3.315195322036743,
"learning_rate": 8.826315789473684e-05,
"loss": 0.5349,
"step": 325
},
{
"epoch": 0.9766306243460062,
"grad_norm": 2.936798572540283,
"learning_rate": 8.694736842105264e-05,
"loss": 0.5248,
"step": 350
},
{
"epoch": 1.0446459713986747,
"grad_norm": 2.950599431991577,
"learning_rate": 8.563157894736843e-05,
"loss": 0.5101,
"step": 375
},
{
"epoch": 1.1144053017091036,
"grad_norm": 4.972070693969727,
"learning_rate": 8.431578947368422e-05,
"loss": 0.515,
"step": 400
},
{
"epoch": 1.1144053017091036,
"eval_loss": 0.49294987320899963,
"eval_runtime": 35.8818,
"eval_samples_per_second": 35.533,
"eval_steps_per_second": 17.781,
"step": 400
},
{
"epoch": 1.1841646320195327,
"grad_norm": 2.6673648357391357,
"learning_rate": 8.3e-05,
"loss": 0.5115,
"step": 425
},
{
"epoch": 1.2539239623299616,
"grad_norm": 3.388873338699341,
"learning_rate": 8.16842105263158e-05,
"loss": 0.5064,
"step": 450
},
{
"epoch": 1.3236832926403905,
"grad_norm": 2.4961979389190674,
"learning_rate": 8.036842105263158e-05,
"loss": 0.5028,
"step": 475
},
{
"epoch": 1.3934426229508197,
"grad_norm": 2.7970707416534424,
"learning_rate": 7.905263157894737e-05,
"loss": 0.4975,
"step": 500
},
{
"epoch": 1.3934426229508197,
"eval_loss": 0.4633351266384125,
"eval_runtime": 36.5245,
"eval_samples_per_second": 34.908,
"eval_steps_per_second": 17.468,
"step": 500
},
{
"epoch": 1.4632019532612488,
"grad_norm": 2.774756908416748,
"learning_rate": 7.773684210526317e-05,
"loss": 0.498,
"step": 525
},
{
"epoch": 1.5329612835716777,
"grad_norm": 5.114898204803467,
"learning_rate": 7.642105263157895e-05,
"loss": 0.4974,
"step": 550
},
{
"epoch": 1.6027206138821066,
"grad_norm": 3.1757712364196777,
"learning_rate": 7.510526315789475e-05,
"loss": 0.5068,
"step": 575
},
{
"epoch": 1.6724799441925358,
"grad_norm": 2.1318249702453613,
"learning_rate": 7.378947368421053e-05,
"loss": 0.5003,
"step": 600
},
{
"epoch": 1.6724799441925358,
"eval_loss": 0.45842912793159485,
"eval_runtime": 36.8677,
"eval_samples_per_second": 34.583,
"eval_steps_per_second": 17.305,
"step": 600
},
{
"epoch": 1.742239274502965,
"grad_norm": 3.224851131439209,
"learning_rate": 7.247368421052631e-05,
"loss": 0.4954,
"step": 625
},
{
"epoch": 1.8119986048133938,
"grad_norm": 2.2048707008361816,
"learning_rate": 7.115789473684211e-05,
"loss": 0.4954,
"step": 650
},
{
"epoch": 1.8817579351238227,
"grad_norm": 3.310940742492676,
"learning_rate": 6.98421052631579e-05,
"loss": 0.4898,
"step": 675
},
{
"epoch": 1.9515172654342519,
"grad_norm": 3.6913022994995117,
"learning_rate": 6.852631578947369e-05,
"loss": 0.4881,
"step": 700
},
{
"epoch": 1.9515172654342519,
"eval_loss": 0.4606294631958008,
"eval_runtime": 36.611,
"eval_samples_per_second": 34.826,
"eval_steps_per_second": 17.426,
"step": 700
}
],
"logging_steps": 25,
"max_steps": 2000,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 862456381300800.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}