LLaMA-LoRA-PET-impression / trainer_state.json
xtie's picture
Upload trainer_state.json
1849521
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 19.935691318327976,
"global_step": 5580,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.46,
"learning_rate": 0.0002,
"loss": 1.2054,
"step": 128
},
{
"epoch": 0.91,
"learning_rate": 0.0002,
"loss": 1.0285,
"step": 256
},
{
"epoch": 1.37,
"learning_rate": 0.0002,
"loss": 0.976,
"step": 384
},
{
"epoch": 1.83,
"learning_rate": 0.0002,
"loss": 0.9643,
"step": 512
},
{
"epoch": 2.29,
"learning_rate": 0.0002,
"loss": 0.9371,
"step": 640
},
{
"epoch": 2.74,
"learning_rate": 0.0002,
"loss": 0.9214,
"step": 768
},
{
"epoch": 3.2,
"learning_rate": 0.0002,
"loss": 0.9016,
"step": 896
},
{
"epoch": 3.66,
"learning_rate": 0.0002,
"loss": 0.8862,
"step": 1024
},
{
"epoch": 4.12,
"learning_rate": 0.0002,
"loss": 0.8792,
"step": 1152
},
{
"epoch": 4.57,
"learning_rate": 0.0002,
"loss": 0.864,
"step": 1280
},
{
"epoch": 5.03,
"learning_rate": 0.0002,
"loss": 0.8588,
"step": 1408
},
{
"epoch": 5.49,
"learning_rate": 0.0002,
"loss": 0.8374,
"step": 1536
},
{
"epoch": 5.94,
"learning_rate": 0.0002,
"loss": 0.8414,
"step": 1664
},
{
"epoch": 6.4,
"learning_rate": 0.0002,
"loss": 0.8152,
"step": 1792
},
{
"epoch": 6.86,
"learning_rate": 0.0002,
"loss": 0.8216,
"step": 1920
},
{
"epoch": 7.32,
"learning_rate": 0.0002,
"loss": 0.8031,
"step": 2048
},
{
"epoch": 7.77,
"learning_rate": 0.0002,
"loss": 0.8031,
"step": 2176
},
{
"epoch": 8.23,
"learning_rate": 0.0002,
"loss": 0.7887,
"step": 2304
},
{
"epoch": 8.69,
"learning_rate": 0.0002,
"loss": 0.7847,
"step": 2432
},
{
"epoch": 9.15,
"learning_rate": 0.0002,
"loss": 0.7746,
"step": 2560
},
{
"epoch": 9.6,
"learning_rate": 0.0002,
"loss": 0.764,
"step": 2688
},
{
"epoch": 10.06,
"learning_rate": 0.0002,
"loss": 0.7652,
"step": 2816
},
{
"epoch": 10.52,
"learning_rate": 0.0002,
"loss": 0.7425,
"step": 2944
},
{
"epoch": 10.98,
"learning_rate": 0.0002,
"loss": 0.7584,
"step": 3072
},
{
"epoch": 11.43,
"learning_rate": 0.0002,
"loss": 0.7302,
"step": 3200
},
{
"epoch": 11.89,
"learning_rate": 0.0002,
"loss": 0.7374,
"step": 3328
},
{
"epoch": 12.35,
"learning_rate": 0.0002,
"loss": 0.7187,
"step": 3456
},
{
"epoch": 12.8,
"learning_rate": 0.0002,
"loss": 0.7222,
"step": 3584
},
{
"epoch": 13.26,
"learning_rate": 0.0002,
"loss": 0.7096,
"step": 3712
},
{
"epoch": 13.72,
"learning_rate": 0.0002,
"loss": 0.7057,
"step": 3840
},
{
"epoch": 14.18,
"learning_rate": 0.0002,
"loss": 0.7003,
"step": 3968
},
{
"epoch": 14.63,
"learning_rate": 0.0002,
"loss": 0.6879,
"step": 4096
},
{
"epoch": 15.09,
"learning_rate": 0.0002,
"loss": 0.6928,
"step": 4224
},
{
"epoch": 15.55,
"learning_rate": 0.0002,
"loss": 0.6749,
"step": 4352
},
{
"epoch": 16.01,
"learning_rate": 0.0002,
"loss": 0.6857,
"step": 4480
},
{
"epoch": 16.46,
"learning_rate": 0.0002,
"loss": 0.6545,
"step": 4608
},
{
"epoch": 16.92,
"learning_rate": 0.0002,
"loss": 0.6724,
"step": 4736
},
{
"epoch": 17.38,
"learning_rate": 0.0002,
"loss": 0.6434,
"step": 4864
},
{
"epoch": 17.83,
"learning_rate": 0.0002,
"loss": 0.6581,
"step": 4992
},
{
"epoch": 18.29,
"learning_rate": 0.0002,
"loss": 0.644,
"step": 5120
},
{
"epoch": 18.75,
"learning_rate": 0.0002,
"loss": 0.6392,
"step": 5248
},
{
"epoch": 19.21,
"learning_rate": 0.0002,
"loss": 0.6346,
"step": 5376
},
{
"epoch": 19.66,
"learning_rate": 0.0002,
"loss": 0.6286,
"step": 5504
}
],
"max_steps": 5580,
"num_train_epochs": 20,
"total_flos": 3.311540504834867e+16,
"trial_name": null,
"trial_params": null
}