mt0-base_QG_SS / trainer_state.json
intelia-lab's picture
Upload folder using huggingface_hub
dbefec2 verified
{
"best_metric": 0.8784255981445312,
"best_model_checkpoint": "../Modelos/mt0_QG_SQAC_SQuAD 17-11-22_10:03:48/checkpoint-1200",
"epoch": 2.9976617303195634,
"global_step": 1200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12,
"eval_loss": 1.0145061016082764,
"eval_runtime": 65.3999,
"eval_samples_per_second": 190.122,
"eval_steps_per_second": 11.896,
"step": 50
},
{
"epoch": 0.25,
"eval_loss": 0.9660477638244629,
"eval_runtime": 65.4002,
"eval_samples_per_second": 190.122,
"eval_steps_per_second": 11.896,
"step": 100
},
{
"epoch": 0.37,
"eval_loss": 0.9502729773521423,
"eval_runtime": 65.4029,
"eval_samples_per_second": 190.114,
"eval_steps_per_second": 11.895,
"step": 150
},
{
"epoch": 0.5,
"eval_loss": 0.9393932819366455,
"eval_runtime": 65.4026,
"eval_samples_per_second": 190.115,
"eval_steps_per_second": 11.896,
"step": 200
},
{
"epoch": 0.62,
"eval_loss": 0.9244149923324585,
"eval_runtime": 65.4024,
"eval_samples_per_second": 190.115,
"eval_steps_per_second": 11.896,
"step": 250
},
{
"epoch": 0.75,
"eval_loss": 0.912540078163147,
"eval_runtime": 65.4032,
"eval_samples_per_second": 190.113,
"eval_steps_per_second": 11.895,
"step": 300
},
{
"epoch": 0.87,
"eval_loss": 0.9208225607872009,
"eval_runtime": 65.4136,
"eval_samples_per_second": 190.083,
"eval_steps_per_second": 11.894,
"step": 350
},
{
"epoch": 1.0,
"eval_loss": 0.907939076423645,
"eval_runtime": 65.3953,
"eval_samples_per_second": 190.136,
"eval_steps_per_second": 11.897,
"step": 400
},
{
"epoch": 1.12,
"eval_loss": 0.903082013130188,
"eval_runtime": 65.3937,
"eval_samples_per_second": 190.141,
"eval_steps_per_second": 11.897,
"step": 450
},
{
"epoch": 1.25,
"learning_rate": 0.001,
"loss": 1.1491,
"step": 500
},
{
"epoch": 1.25,
"eval_loss": 0.8966090083122253,
"eval_runtime": 65.4073,
"eval_samples_per_second": 190.101,
"eval_steps_per_second": 11.895,
"step": 500
},
{
"epoch": 1.37,
"eval_loss": 0.8984885215759277,
"eval_runtime": 65.3845,
"eval_samples_per_second": 190.167,
"eval_steps_per_second": 11.899,
"step": 550
},
{
"epoch": 1.5,
"eval_loss": 0.8956364393234253,
"eval_runtime": 65.3948,
"eval_samples_per_second": 190.137,
"eval_steps_per_second": 11.897,
"step": 600
},
{
"epoch": 1.62,
"eval_loss": 0.8861657381057739,
"eval_runtime": 65.3936,
"eval_samples_per_second": 190.141,
"eval_steps_per_second": 11.897,
"step": 650
},
{
"epoch": 1.75,
"eval_loss": 0.8837835788726807,
"eval_runtime": 65.4055,
"eval_samples_per_second": 190.106,
"eval_steps_per_second": 11.895,
"step": 700
},
{
"epoch": 1.87,
"eval_loss": 0.875281035900116,
"eval_runtime": 65.3939,
"eval_samples_per_second": 190.14,
"eval_steps_per_second": 11.897,
"step": 750
},
{
"epoch": 2.0,
"eval_loss": 0.889029860496521,
"eval_runtime": 65.4822,
"eval_samples_per_second": 189.884,
"eval_steps_per_second": 11.881,
"step": 800
},
{
"epoch": 2.12,
"eval_loss": 0.9000933766365051,
"eval_runtime": 65.3866,
"eval_samples_per_second": 190.161,
"eval_steps_per_second": 11.898,
"step": 850
},
{
"epoch": 2.25,
"eval_loss": 0.897091805934906,
"eval_runtime": 65.4252,
"eval_samples_per_second": 190.049,
"eval_steps_per_second": 11.891,
"step": 900
},
{
"epoch": 2.37,
"eval_loss": 0.8837863206863403,
"eval_runtime": 65.4877,
"eval_samples_per_second": 189.868,
"eval_steps_per_second": 11.88,
"step": 950
},
{
"epoch": 2.5,
"learning_rate": 0.001,
"loss": 0.7877,
"step": 1000
},
{
"epoch": 2.5,
"eval_loss": 0.8839919567108154,
"eval_runtime": 65.4142,
"eval_samples_per_second": 190.081,
"eval_steps_per_second": 11.893,
"step": 1000
},
{
"epoch": 2.62,
"eval_loss": 0.8844324350357056,
"eval_runtime": 65.4264,
"eval_samples_per_second": 190.046,
"eval_steps_per_second": 11.891,
"step": 1050
},
{
"epoch": 2.75,
"eval_loss": 0.8891133666038513,
"eval_runtime": 65.4136,
"eval_samples_per_second": 190.083,
"eval_steps_per_second": 11.894,
"step": 1100
},
{
"epoch": 2.87,
"eval_loss": 0.8817307949066162,
"eval_runtime": 65.4132,
"eval_samples_per_second": 190.084,
"eval_steps_per_second": 11.894,
"step": 1150
},
{
"epoch": 3.0,
"eval_loss": 0.8784255981445312,
"eval_runtime": 65.4323,
"eval_samples_per_second": 190.029,
"eval_steps_per_second": 11.89,
"step": 1200
}
],
"max_steps": 1200,
"num_train_epochs": 3,
"total_flos": 3.392579398501663e+17,
"trial_name": null,
"trial_params": null
}