File size: 2,146 Bytes
00259b7 79965b6 00259b7 79965b6 00259b7 79965b6 00259b7 79965b6 00259b7 79965b6 00259b7 79965b6 00259b7 79965b6 00259b7 79965b6 00259b7 79965b6 00259b7 79965b6 00259b7 79965b6 00259b7 79965b6 00259b7 79965b6 00259b7 79965b6 00259b7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | {
"best_metric": 0.27657164760495423,
"best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-3/checkpoint-4276",
"epoch": 2.0,
"eval_steps": 500,
"global_step": 4276,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.23,
"learning_rate": 1.3504098486610872e-06,
"loss": 0.6235,
"step": 500
},
{
"epoch": 0.47,
"learning_rate": 1.1715949852260916e-06,
"loss": 0.6084,
"step": 1000
},
{
"epoch": 0.7,
"learning_rate": 9.92780121791096e-07,
"loss": 0.5687,
"step": 1500
},
{
"epoch": 0.94,
"learning_rate": 8.139652583561002e-07,
"loss": 0.5599,
"step": 2000
},
{
"epoch": 1.0,
"eval_loss": 0.5971149802207947,
"eval_matthews_correlation": 0.0463559874942472,
"eval_runtime": 0.7351,
"eval_samples_per_second": 1418.884,
"eval_steps_per_second": 89.786,
"step": 2138
},
{
"epoch": 1.17,
"learning_rate": 6.351503949211046e-07,
"loss": 0.5449,
"step": 2500
},
{
"epoch": 1.4,
"learning_rate": 4.563355314861089e-07,
"loss": 0.5175,
"step": 3000
},
{
"epoch": 1.64,
"learning_rate": 2.7752066805111325e-07,
"loss": 0.535,
"step": 3500
},
{
"epoch": 1.87,
"learning_rate": 9.870580461611762e-08,
"loss": 0.5162,
"step": 4000
},
{
"epoch": 2.0,
"eval_loss": 0.571711540222168,
"eval_matthews_correlation": 0.27657164760495423,
"eval_runtime": 0.7318,
"eval_samples_per_second": 1425.327,
"eval_steps_per_second": 90.193,
"step": 4276
}
],
"logging_steps": 500,
"max_steps": 4276,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"total_flos": 65111866045632.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": {
"learning_rate": 1.529224712096083e-06,
"num_train_epochs": 2,
"per_device_train_batch_size": 4,
"seed": 5
}
}
|