mbart50-large-for-aug / trainer_state.json
GoGanghee's picture
Initial commit
03c5c95
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 8751,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.17,
"learning_rate": 4.714318363615587e-05,
"loss": 3.0752,
"step": 500
},
{
"epoch": 0.34,
"learning_rate": 4.428636727231174e-05,
"loss": 1.3251,
"step": 1000
},
{
"epoch": 0.51,
"learning_rate": 4.142955090846761e-05,
"loss": 0.9722,
"step": 1500
},
{
"epoch": 0.69,
"learning_rate": 3.857273454462347e-05,
"loss": 0.923,
"step": 2000
},
{
"epoch": 0.86,
"learning_rate": 3.571591818077934e-05,
"loss": 0.8754,
"step": 2500
},
{
"epoch": 1.03,
"learning_rate": 3.285910181693521e-05,
"loss": 0.7869,
"step": 3000
},
{
"epoch": 1.2,
"learning_rate": 3.0002285453091078e-05,
"loss": 0.5717,
"step": 3500
},
{
"epoch": 1.37,
"learning_rate": 2.7145469089246944e-05,
"loss": 0.5819,
"step": 4000
},
{
"epoch": 1.54,
"learning_rate": 2.4288652725402813e-05,
"loss": 0.568,
"step": 4500
},
{
"epoch": 1.71,
"learning_rate": 2.143183636155868e-05,
"loss": 0.5581,
"step": 5000
},
{
"epoch": 1.89,
"learning_rate": 1.8575019997714548e-05,
"loss": 0.5506,
"step": 5500
},
{
"epoch": 2.06,
"learning_rate": 1.5718203633870414e-05,
"loss": 0.4619,
"step": 6000
},
{
"epoch": 2.23,
"learning_rate": 1.2861387270026285e-05,
"loss": 0.3245,
"step": 6500
},
{
"epoch": 2.4,
"learning_rate": 1.0004570906182151e-05,
"loss": 0.3176,
"step": 7000
},
{
"epoch": 2.57,
"learning_rate": 7.147754542338019e-06,
"loss": 0.3069,
"step": 7500
},
{
"epoch": 2.74,
"learning_rate": 4.290938178493887e-06,
"loss": 0.3089,
"step": 8000
},
{
"epoch": 2.91,
"learning_rate": 1.4341218146497544e-06,
"loss": 0.2988,
"step": 8500
},
{
"epoch": 3.0,
"step": 8751,
"total_flos": 2.1528253275439104e+16,
"train_loss": 0.7404402195692145,
"train_runtime": 4141.2406,
"train_samples_per_second": 16.905,
"train_steps_per_second": 2.113
}
],
"max_steps": 8751,
"num_train_epochs": 3,
"total_flos": 2.1528253275439104e+16,
"trial_name": null,
"trial_params": null
}