GPT2_random_function_42 / trainer_state.json
xiulinyang's picture
Upload folder using huggingface_hub
24d9031 verified
{
"best_metric": 4.843646049499512,
"best_model_checkpoint": "models/GPT2_random_function_42/checkpoint-64260",
"epoch": 10.0,
"global_step": 64260,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.16,
"learning_rate": 1e-05,
"loss": 8.1532,
"step": 1000
},
{
"epoch": 0.31,
"learning_rate": 2e-05,
"loss": 6.9215,
"step": 2000
},
{
"epoch": 0.47,
"learning_rate": 3e-05,
"loss": 6.5875,
"step": 3000
},
{
"epoch": 0.62,
"learning_rate": 4e-05,
"loss": 6.333,
"step": 4000
},
{
"epoch": 0.78,
"learning_rate": 5e-05,
"loss": 6.1289,
"step": 5000
},
{
"epoch": 0.93,
"learning_rate": 6e-05,
"loss": 5.9624,
"step": 6000
},
{
"epoch": 1.0,
"eval_accuracy": 0.17896217672198259,
"eval_loss": 5.776374816894531,
"eval_runtime": 2.254,
"eval_samples_per_second": 523.954,
"eval_steps_per_second": 4.437,
"step": 6426
},
{
"epoch": 1.09,
"learning_rate": 7e-05,
"loss": 5.8201,
"step": 7000
},
{
"epoch": 1.24,
"learning_rate": 8e-05,
"loss": 5.7047,
"step": 8000
},
{
"epoch": 1.4,
"learning_rate": 9e-05,
"loss": 5.6153,
"step": 9000
},
{
"epoch": 1.56,
"learning_rate": 0.0001,
"loss": 5.5372,
"step": 10000
},
{
"epoch": 1.71,
"learning_rate": 9.889000000000001e-05,
"loss": 5.4722,
"step": 11000
},
{
"epoch": 1.87,
"learning_rate": 9.77788888888889e-05,
"loss": 5.4144,
"step": 12000
},
{
"epoch": 2.0,
"eval_accuracy": 0.20932480815003968,
"eval_loss": 5.288669586181641,
"eval_runtime": 2.2534,
"eval_samples_per_second": 524.088,
"eval_steps_per_second": 4.438,
"step": 12852
},
{
"epoch": 2.02,
"learning_rate": 9.66688888888889e-05,
"loss": 5.3583,
"step": 13000
},
{
"epoch": 2.18,
"learning_rate": 9.555777777777778e-05,
"loss": 5.2917,
"step": 14000
},
{
"epoch": 2.33,
"learning_rate": 9.444777777777778e-05,
"loss": 5.261,
"step": 15000
},
{
"epoch": 2.49,
"learning_rate": 9.333666666666667e-05,
"loss": 5.2364,
"step": 16000
},
{
"epoch": 2.65,
"learning_rate": 9.222666666666668e-05,
"loss": 5.2124,
"step": 17000
},
{
"epoch": 2.8,
"learning_rate": 9.111555555555556e-05,
"loss": 5.1899,
"step": 18000
},
{
"epoch": 2.96,
"learning_rate": 9.000555555555557e-05,
"loss": 5.1712,
"step": 19000
},
{
"epoch": 3.0,
"eval_accuracy": 0.22311266976471295,
"eval_loss": 5.106184959411621,
"eval_runtime": 2.2518,
"eval_samples_per_second": 524.46,
"eval_steps_per_second": 4.441,
"step": 19278
},
{
"epoch": 3.11,
"learning_rate": 8.889444444444444e-05,
"loss": 5.11,
"step": 20000
},
{
"epoch": 3.27,
"learning_rate": 8.778444444444445e-05,
"loss": 5.0904,
"step": 21000
},
{
"epoch": 3.42,
"learning_rate": 8.667333333333334e-05,
"loss": 5.0817,
"step": 22000
},
{
"epoch": 3.58,
"learning_rate": 8.556333333333334e-05,
"loss": 5.0734,
"step": 23000
},
{
"epoch": 3.73,
"learning_rate": 8.445222222222222e-05,
"loss": 5.0627,
"step": 24000
},
{
"epoch": 3.89,
"learning_rate": 8.334222222222222e-05,
"loss": 5.0536,
"step": 25000
},
{
"epoch": 4.0,
"eval_accuracy": 0.23168674618466933,
"eval_loss": 5.014615535736084,
"eval_runtime": 2.2491,
"eval_samples_per_second": 525.091,
"eval_steps_per_second": 4.446,
"step": 25704
},
{
"epoch": 4.05,
"learning_rate": 8.223111111111111e-05,
"loss": 5.0232,
"step": 26000
},
{
"epoch": 4.2,
"learning_rate": 8.112111111111111e-05,
"loss": 4.977,
"step": 27000
},
{
"epoch": 4.36,
"learning_rate": 8.001e-05,
"loss": 4.9799,
"step": 28000
},
{
"epoch": 4.51,
"learning_rate": 7.890000000000001e-05,
"loss": 4.9757,
"step": 29000
},
{
"epoch": 4.67,
"learning_rate": 7.77888888888889e-05,
"loss": 4.9734,
"step": 30000
},
{
"epoch": 4.82,
"learning_rate": 7.667888888888889e-05,
"loss": 4.9701,
"step": 31000
},
{
"epoch": 4.98,
"learning_rate": 7.556777777777779e-05,
"loss": 4.9639,
"step": 32000
},
{
"epoch": 5.0,
"eval_accuracy": 0.2363138138638682,
"eval_loss": 4.959312438964844,
"eval_runtime": 2.2437,
"eval_samples_per_second": 526.357,
"eval_steps_per_second": 4.457,
"step": 32130
},
{
"epoch": 5.14,
"learning_rate": 7.445777777777778e-05,
"loss": 4.9014,
"step": 33000
},
{
"epoch": 5.29,
"learning_rate": 7.334666666666668e-05,
"loss": 4.9002,
"step": 34000
},
{
"epoch": 5.45,
"learning_rate": 7.223666666666667e-05,
"loss": 4.9022,
"step": 35000
},
{
"epoch": 5.6,
"learning_rate": 7.112555555555556e-05,
"loss": 4.9051,
"step": 36000
},
{
"epoch": 5.76,
"learning_rate": 7.001555555555555e-05,
"loss": 4.9037,
"step": 37000
},
{
"epoch": 5.91,
"learning_rate": 6.890444444444445e-05,
"loss": 4.9031,
"step": 38000
},
{
"epoch": 6.0,
"eval_accuracy": 0.24040750198350525,
"eval_loss": 4.919950485229492,
"eval_runtime": 2.3095,
"eval_samples_per_second": 511.362,
"eval_steps_per_second": 4.33,
"step": 38556
},
{
"epoch": 6.07,
"learning_rate": 6.779444444444444e-05,
"loss": 4.8674,
"step": 39000
},
{
"epoch": 6.22,
"learning_rate": 6.668333333333333e-05,
"loss": 4.836,
"step": 40000
},
{
"epoch": 6.38,
"learning_rate": 6.557333333333332e-05,
"loss": 4.8425,
"step": 41000
},
{
"epoch": 6.54,
"learning_rate": 6.446222222222223e-05,
"loss": 4.846,
"step": 42000
},
{
"epoch": 6.69,
"learning_rate": 6.335222222222222e-05,
"loss": 4.849,
"step": 43000
},
{
"epoch": 6.85,
"learning_rate": 6.224111111111112e-05,
"loss": 4.8501,
"step": 44000
},
{
"epoch": 7.0,
"eval_accuracy": 0.24246101328781827,
"eval_loss": 4.8934783935546875,
"eval_runtime": 2.2468,
"eval_samples_per_second": 525.632,
"eval_steps_per_second": 4.451,
"step": 44982
},
{
"epoch": 7.0,
"learning_rate": 6.113111111111111e-05,
"loss": 4.8488,
"step": 45000
},
{
"epoch": 7.16,
"learning_rate": 6.002e-05,
"loss": 4.7787,
"step": 46000
},
{
"epoch": 7.31,
"learning_rate": 5.890888888888889e-05,
"loss": 4.7911,
"step": 47000
},
{
"epoch": 7.47,
"learning_rate": 5.779888888888889e-05,
"loss": 4.7989,
"step": 48000
},
{
"epoch": 7.63,
"learning_rate": 5.6687777777777785e-05,
"loss": 4.8007,
"step": 49000
},
{
"epoch": 7.78,
"learning_rate": 5.5577777777777784e-05,
"loss": 4.8028,
"step": 50000
},
{
"epoch": 7.94,
"learning_rate": 5.4466666666666665e-05,
"loss": 4.8034,
"step": 51000
},
{
"epoch": 8.0,
"eval_accuracy": 0.24542793708788094,
"eval_loss": 4.871953010559082,
"eval_runtime": 2.2466,
"eval_samples_per_second": 525.675,
"eval_steps_per_second": 4.451,
"step": 51408
},
{
"epoch": 8.09,
"learning_rate": 5.3356666666666663e-05,
"loss": 4.7612,
"step": 52000
},
{
"epoch": 8.25,
"learning_rate": 5.224555555555556e-05,
"loss": 4.7436,
"step": 53000
},
{
"epoch": 8.4,
"learning_rate": 5.1135555555555556e-05,
"loss": 4.7511,
"step": 54000
},
{
"epoch": 8.56,
"learning_rate": 5.002444444444445e-05,
"loss": 4.7584,
"step": 55000
},
{
"epoch": 8.71,
"learning_rate": 4.891444444444444e-05,
"loss": 4.762,
"step": 56000
},
{
"epoch": 8.87,
"learning_rate": 4.780333333333333e-05,
"loss": 4.7644,
"step": 57000
},
{
"epoch": 9.0,
"eval_accuracy": 0.24703474301106096,
"eval_loss": 4.856387615203857,
"eval_runtime": 2.2498,
"eval_samples_per_second": 524.927,
"eval_steps_per_second": 4.445,
"step": 57834
},
{
"epoch": 9.03,
"learning_rate": 4.6693333333333336e-05,
"loss": 4.7533,
"step": 58000
},
{
"epoch": 9.18,
"learning_rate": 4.558222222222222e-05,
"loss": 4.7005,
"step": 59000
},
{
"epoch": 9.34,
"learning_rate": 4.447222222222223e-05,
"loss": 4.7126,
"step": 60000
},
{
"epoch": 9.49,
"learning_rate": 4.3361111111111116e-05,
"loss": 4.7184,
"step": 61000
},
{
"epoch": 9.65,
"learning_rate": 4.2251111111111115e-05,
"loss": 4.7231,
"step": 62000
},
{
"epoch": 9.8,
"learning_rate": 4.114e-05,
"loss": 4.7254,
"step": 63000
},
{
"epoch": 9.96,
"learning_rate": 4.003e-05,
"loss": 4.7269,
"step": 64000
},
{
"epoch": 10.0,
"eval_accuracy": 0.2490015801369452,
"eval_loss": 4.843646049499512,
"eval_runtime": 2.2484,
"eval_samples_per_second": 525.261,
"eval_steps_per_second": 4.448,
"step": 64260
}
],
"max_steps": 100000,
"num_train_epochs": 16,
"total_flos": 5.37232748544e+17,
"trial_name": null,
"trial_params": null
}