fin_microsoft_deberta-v3-base / trainer_state.json
Ngawang's picture
Upload model files
645f088
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"global_step": 25150,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 1.900198807157058e-05,
"loss": 3.4819,
"step": 2515
},
{
"epoch": 1.0,
"eval_loss": 2.2373175621032715,
"eval_runtime": 310.738,
"eval_samples_per_second": 28.777,
"eval_steps_per_second": 1.799,
"step": 2515
},
{
"epoch": 2.0,
"learning_rate": 1.8002385685884693e-05,
"loss": 2.1965,
"step": 5030
},
{
"epoch": 2.0,
"eval_loss": 1.8015460968017578,
"eval_runtime": 310.6652,
"eval_samples_per_second": 28.783,
"eval_steps_per_second": 1.799,
"step": 5030
},
{
"epoch": 3.0,
"learning_rate": 1.700278330019881e-05,
"loss": 1.8805,
"step": 7545
},
{
"epoch": 3.0,
"eval_loss": 1.5936048030853271,
"eval_runtime": 310.3586,
"eval_samples_per_second": 28.812,
"eval_steps_per_second": 1.801,
"step": 7545
},
{
"epoch": 4.0,
"learning_rate": 1.6003180914512923e-05,
"loss": 1.6926,
"step": 10060
},
{
"epoch": 4.0,
"eval_loss": 1.451478362083435,
"eval_runtime": 310.2705,
"eval_samples_per_second": 28.82,
"eval_steps_per_second": 1.802,
"step": 10060
},
{
"epoch": 5.0,
"learning_rate": 1.500357852882704e-05,
"loss": 1.5731,
"step": 12575
},
{
"epoch": 5.0,
"eval_loss": 1.3821080923080444,
"eval_runtime": 310.2193,
"eval_samples_per_second": 28.825,
"eval_steps_per_second": 1.802,
"step": 12575
},
{
"epoch": 6.0,
"learning_rate": 1.4003976143141154e-05,
"loss": 1.4862,
"step": 15090
},
{
"epoch": 6.0,
"eval_loss": 1.3046475648880005,
"eval_runtime": 310.6594,
"eval_samples_per_second": 28.784,
"eval_steps_per_second": 1.799,
"step": 15090
},
{
"epoch": 7.0,
"learning_rate": 1.3003976143141155e-05,
"loss": 1.4197,
"step": 17605
},
{
"epoch": 7.0,
"eval_loss": 1.253291368484497,
"eval_runtime": 310.6843,
"eval_samples_per_second": 28.782,
"eval_steps_per_second": 1.799,
"step": 17605
},
{
"epoch": 8.0,
"learning_rate": 1.20051689860835e-05,
"loss": 1.368,
"step": 20120
},
{
"epoch": 8.0,
"eval_loss": 1.221903920173645,
"eval_runtime": 310.6521,
"eval_samples_per_second": 28.785,
"eval_steps_per_second": 1.799,
"step": 20120
},
{
"epoch": 9.0,
"learning_rate": 1.10051689860835e-05,
"loss": 1.3235,
"step": 22635
},
{
"epoch": 9.0,
"eval_loss": 1.1836310625076294,
"eval_runtime": 310.7209,
"eval_samples_per_second": 28.778,
"eval_steps_per_second": 1.799,
"step": 22635
},
{
"epoch": 10.0,
"learning_rate": 1.00051689860835e-05,
"loss": 1.2855,
"step": 25150
},
{
"epoch": 10.0,
"eval_loss": 1.1522128582000732,
"eval_runtime": 310.7087,
"eval_samples_per_second": 28.779,
"eval_steps_per_second": 1.799,
"step": 25150
}
],
"max_steps": 50300,
"num_train_epochs": 20,
"total_flos": 5.301609303112704e+16,
"trial_name": null,
"trial_params": null
}