PEFT
Safetensors
llama-quantized-1 / trainer_state.json
Albert Gong
Upload llama-quantized-1 PEFT adapter for arxiv classification
4ca2949 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.7742370848016797,
"eval_steps": 300,
"global_step": 37000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"eval_accuracy": 0.328,
"eval_loss": 2.7831807136535645,
"eval_runtime": 1186.4739,
"eval_samples_per_second": 1.686,
"eval_steps_per_second": 0.211,
"step": 300
},
{
"epoch": 0.04,
"learning_rate": 4.975e-05,
"loss": 4.2509,
"step": 500
},
{
"epoch": 0.04,
"eval_accuracy": 0.4605,
"eval_loss": 2.0948123931884766,
"eval_runtime": 1186.9029,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 600
},
{
"epoch": 0.07,
"eval_accuracy": 0.535,
"eval_loss": 1.6745976209640503,
"eval_runtime": 1186.3538,
"eval_samples_per_second": 1.686,
"eval_steps_per_second": 0.211,
"step": 900
},
{
"epoch": 0.07,
"learning_rate": 4.9500000000000004e-05,
"loss": 1.7807,
"step": 1000
},
{
"epoch": 0.09,
"eval_accuracy": 0.579,
"eval_loss": 1.4981111288070679,
"eval_runtime": 1186.31,
"eval_samples_per_second": 1.686,
"eval_steps_per_second": 0.211,
"step": 1200
},
{
"epoch": 0.11,
"learning_rate": 4.9250000000000004e-05,
"loss": 1.572,
"step": 1500
},
{
"epoch": 0.11,
"eval_accuracy": 0.587,
"eval_loss": 1.4770824909210205,
"eval_runtime": 1186.999,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 1500
},
{
"epoch": 0.13,
"eval_accuracy": 0.608,
"eval_loss": 1.4447531700134277,
"eval_runtime": 1186.6764,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 1800
},
{
"epoch": 0.15,
"learning_rate": 4.9e-05,
"loss": 1.4599,
"step": 2000
},
{
"epoch": 0.16,
"eval_accuracy": 0.601,
"eval_loss": 1.4452807903289795,
"eval_runtime": 1187.0967,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 2100
},
{
"epoch": 0.18,
"eval_accuracy": 0.631,
"eval_loss": 1.3295071125030518,
"eval_runtime": 1187.5516,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.211,
"step": 2400
},
{
"epoch": 0.19,
"learning_rate": 4.875e-05,
"loss": 1.359,
"step": 2500
},
{
"epoch": 0.2,
"eval_accuracy": 0.6205,
"eval_loss": 1.3333522081375122,
"eval_runtime": 1186.0391,
"eval_samples_per_second": 1.686,
"eval_steps_per_second": 0.211,
"step": 2700
},
{
"epoch": 0.22,
"learning_rate": 4.85e-05,
"loss": 1.3251,
"step": 3000
},
{
"epoch": 0.22,
"eval_accuracy": 0.6255,
"eval_loss": 1.363963007926941,
"eval_runtime": 1187.0984,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 3000
},
{
"epoch": 0.25,
"eval_accuracy": 0.653,
"eval_loss": 1.268043041229248,
"eval_runtime": 1187.7128,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.21,
"step": 3300
},
{
"epoch": 0.26,
"learning_rate": 4.825e-05,
"loss": 1.3291,
"step": 3500
},
{
"epoch": 0.27,
"eval_accuracy": 0.653,
"eval_loss": 1.2229315042495728,
"eval_runtime": 1187.3105,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.211,
"step": 3600
},
{
"epoch": 0.29,
"eval_accuracy": 0.646,
"eval_loss": 1.2400562763214111,
"eval_runtime": 1187.2506,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 3900
},
{
"epoch": 0.3,
"learning_rate": 4.8e-05,
"loss": 1.3028,
"step": 4000
},
{
"epoch": 0.31,
"eval_accuracy": 0.6625,
"eval_loss": 1.1704109907150269,
"eval_runtime": 1187.1809,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 4200
},
{
"epoch": 0.34,
"learning_rate": 4.775e-05,
"loss": 1.241,
"step": 4500
},
{
"epoch": 0.34,
"eval_accuracy": 0.6485,
"eval_loss": 1.1750303506851196,
"eval_runtime": 1186.827,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 4500
},
{
"epoch": 0.36,
"eval_accuracy": 0.6595,
"eval_loss": 1.1775925159454346,
"eval_runtime": 1186.113,
"eval_samples_per_second": 1.686,
"eval_steps_per_second": 0.211,
"step": 4800
},
{
"epoch": 0.37,
"learning_rate": 4.75e-05,
"loss": 1.1833,
"step": 5000
},
{
"epoch": 0.38,
"eval_accuracy": 0.6825,
"eval_loss": 1.1013399362564087,
"eval_runtime": 1187.1796,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 5100
},
{
"epoch": 0.4,
"eval_accuracy": 0.7125,
"eval_loss": 1.0126953125,
"eval_runtime": 1186.6585,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 5400
},
{
"epoch": 0.41,
"learning_rate": 4.7249999999999997e-05,
"loss": 1.1398,
"step": 5500
},
{
"epoch": 0.43,
"eval_accuracy": 0.7345,
"eval_loss": 0.93663489818573,
"eval_runtime": 1185.7147,
"eval_samples_per_second": 1.687,
"eval_steps_per_second": 0.211,
"step": 5700
},
{
"epoch": 0.45,
"learning_rate": 4.7e-05,
"loss": 0.924,
"step": 6000
},
{
"epoch": 0.45,
"eval_accuracy": 0.734,
"eval_loss": 0.8979936838150024,
"eval_runtime": 1185.9917,
"eval_samples_per_second": 1.686,
"eval_steps_per_second": 0.211,
"step": 6000
},
{
"epoch": 0.47,
"eval_accuracy": 0.7415,
"eval_loss": 0.9261764287948608,
"eval_runtime": 1186.1073,
"eval_samples_per_second": 1.686,
"eval_steps_per_second": 0.211,
"step": 6300
},
{
"epoch": 0.49,
"learning_rate": 4.6750000000000005e-05,
"loss": 0.9069,
"step": 6500
},
{
"epoch": 0.49,
"eval_accuracy": 0.7505,
"eval_loss": 0.8675258755683899,
"eval_runtime": 1186.5035,
"eval_samples_per_second": 1.686,
"eval_steps_per_second": 0.211,
"step": 6600
},
{
"epoch": 0.52,
"eval_accuracy": 0.747,
"eval_loss": 0.9057780504226685,
"eval_runtime": 1186.5258,
"eval_samples_per_second": 1.686,
"eval_steps_per_second": 0.211,
"step": 6900
},
{
"epoch": 0.52,
"learning_rate": 4.6500000000000005e-05,
"loss": 0.859,
"step": 7000
},
{
"epoch": 0.54,
"eval_accuracy": 0.768,
"eval_loss": 0.8729298710823059,
"eval_runtime": 1185.9339,
"eval_samples_per_second": 1.686,
"eval_steps_per_second": 0.211,
"step": 7200
},
{
"epoch": 0.56,
"learning_rate": 4.6250000000000006e-05,
"loss": 0.8543,
"step": 7500
},
{
"epoch": 0.56,
"eval_accuracy": 0.747,
"eval_loss": 0.9086282849311829,
"eval_runtime": 1186.632,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 7500
},
{
"epoch": 0.58,
"eval_accuracy": 0.7495,
"eval_loss": 0.9070839285850525,
"eval_runtime": 1185.9595,
"eval_samples_per_second": 1.686,
"eval_steps_per_second": 0.211,
"step": 7800
},
{
"epoch": 0.6,
"learning_rate": 4.600000000000001e-05,
"loss": 0.8249,
"step": 8000
},
{
"epoch": 0.61,
"eval_accuracy": 0.758,
"eval_loss": 0.8381221890449524,
"eval_runtime": 1187.0834,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 8100
},
{
"epoch": 0.63,
"eval_accuracy": 0.7575,
"eval_loss": 0.8683782815933228,
"eval_runtime": 1186.7854,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 8400
},
{
"epoch": 0.64,
"learning_rate": 4.575e-05,
"loss": 0.8244,
"step": 8500
},
{
"epoch": 0.65,
"eval_accuracy": 0.758,
"eval_loss": 0.8148394227027893,
"eval_runtime": 1187.1442,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 8700
},
{
"epoch": 0.67,
"learning_rate": 4.55e-05,
"loss": 0.8829,
"step": 9000
},
{
"epoch": 0.67,
"eval_accuracy": 0.7475,
"eval_loss": 0.8369246125221252,
"eval_runtime": 1186.5959,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 9000
},
{
"epoch": 0.7,
"eval_accuracy": 0.7685,
"eval_loss": 0.7999008297920227,
"eval_runtime": 1186.8545,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 9300
},
{
"epoch": 0.71,
"learning_rate": 4.525e-05,
"loss": 0.8641,
"step": 9500
},
{
"epoch": 0.72,
"eval_accuracy": 0.766,
"eval_loss": 0.8229847550392151,
"eval_runtime": 1186.889,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 9600
},
{
"epoch": 0.74,
"eval_accuracy": 0.759,
"eval_loss": 0.805006206035614,
"eval_runtime": 1187.0511,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 9900
},
{
"epoch": 0.75,
"learning_rate": 4.5e-05,
"loss": 0.8418,
"step": 10000
},
{
"epoch": 0.76,
"eval_accuracy": 0.763,
"eval_loss": 0.8287988901138306,
"eval_runtime": 1186.3691,
"eval_samples_per_second": 1.686,
"eval_steps_per_second": 0.211,
"step": 10200
},
{
"epoch": 0.79,
"learning_rate": 4.4750000000000004e-05,
"loss": 0.7795,
"step": 10500
},
{
"epoch": 0.79,
"eval_accuracy": 0.7595,
"eval_loss": 0.829521656036377,
"eval_runtime": 1187.0672,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 10500
},
{
"epoch": 0.81,
"eval_accuracy": 0.7715,
"eval_loss": 0.7910842299461365,
"eval_runtime": 1187.3028,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.211,
"step": 10800
},
{
"epoch": 0.82,
"learning_rate": 4.4500000000000004e-05,
"loss": 0.7874,
"step": 11000
},
{
"epoch": 0.83,
"eval_accuracy": 0.765,
"eval_loss": 0.8317267298698425,
"eval_runtime": 1187.1774,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 11100
},
{
"epoch": 0.85,
"eval_accuracy": 0.7635,
"eval_loss": 0.8163227438926697,
"eval_runtime": 1186.9338,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 11400
},
{
"epoch": 0.86,
"learning_rate": 4.4250000000000005e-05,
"loss": 0.8084,
"step": 11500
},
{
"epoch": 0.88,
"eval_accuracy": 0.7695,
"eval_loss": 0.7647069692611694,
"eval_runtime": 1188.3228,
"eval_samples_per_second": 1.683,
"eval_steps_per_second": 0.21,
"step": 11700
},
{
"epoch": 0.9,
"learning_rate": 4.4000000000000006e-05,
"loss": 0.8158,
"step": 12000
},
{
"epoch": 0.9,
"eval_accuracy": 0.762,
"eval_loss": 0.7762951850891113,
"eval_runtime": 1187.5448,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.211,
"step": 12000
},
{
"epoch": 0.92,
"eval_accuracy": 0.776,
"eval_loss": 0.748843789100647,
"eval_runtime": 1186.9009,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 12300
},
{
"epoch": 0.94,
"learning_rate": 4.375e-05,
"loss": 0.8213,
"step": 12500
},
{
"epoch": 0.94,
"eval_accuracy": 0.7635,
"eval_loss": 0.8027353286743164,
"eval_runtime": 1186.9766,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 12600
},
{
"epoch": 0.97,
"eval_accuracy": 0.7815,
"eval_loss": 0.7765725255012512,
"eval_runtime": 1187.3977,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.211,
"step": 12900
},
{
"epoch": 0.97,
"learning_rate": 4.35e-05,
"loss": 0.7743,
"step": 13000
},
{
"epoch": 0.99,
"eval_accuracy": 0.7825,
"eval_loss": 0.7971859574317932,
"eval_runtime": 1187.3337,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.211,
"step": 13200
},
{
"epoch": 1.01,
"learning_rate": 4.325e-05,
"loss": 0.7384,
"step": 13500
},
{
"epoch": 1.01,
"eval_accuracy": 0.7715,
"eval_loss": 0.8095068335533142,
"eval_runtime": 1187.8109,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.21,
"step": 13500
},
{
"epoch": 1.03,
"eval_accuracy": 0.7665,
"eval_loss": 0.8057354092597961,
"eval_runtime": 1187.1204,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 13800
},
{
"epoch": 1.05,
"learning_rate": 4.3e-05,
"loss": 0.6811,
"step": 14000
},
{
"epoch": 1.06,
"eval_accuracy": 0.765,
"eval_loss": 0.8190826773643494,
"eval_runtime": 1186.898,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 14100
},
{
"epoch": 1.08,
"eval_accuracy": 0.7815,
"eval_loss": 0.8019638657569885,
"eval_runtime": 1186.6924,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 14400
},
{
"epoch": 1.09,
"learning_rate": 4.275e-05,
"loss": 0.6512,
"step": 14500
},
{
"epoch": 1.1,
"eval_accuracy": 0.774,
"eval_loss": 0.8071369528770447,
"eval_runtime": 1187.0834,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 14700
},
{
"epoch": 1.12,
"learning_rate": 4.25e-05,
"loss": 0.6752,
"step": 15000
},
{
"epoch": 1.12,
"eval_accuracy": 0.781,
"eval_loss": 0.7650523781776428,
"eval_runtime": 1188.8808,
"eval_samples_per_second": 1.682,
"eval_steps_per_second": 0.21,
"step": 15000
},
{
"epoch": 1.15,
"eval_accuracy": 0.77,
"eval_loss": 0.8068825006484985,
"eval_runtime": 1187.497,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.211,
"step": 15300
},
{
"epoch": 1.16,
"learning_rate": 4.2250000000000004e-05,
"loss": 0.6467,
"step": 15500
},
{
"epoch": 1.17,
"eval_accuracy": 0.772,
"eval_loss": 0.817916989326477,
"eval_runtime": 1188.2442,
"eval_samples_per_second": 1.683,
"eval_steps_per_second": 0.21,
"step": 15600
},
{
"epoch": 1.19,
"eval_accuracy": 0.7725,
"eval_loss": 0.7762767672538757,
"eval_runtime": 1188.1044,
"eval_samples_per_second": 1.683,
"eval_steps_per_second": 0.21,
"step": 15900
},
{
"epoch": 1.2,
"learning_rate": 4.2e-05,
"loss": 0.6815,
"step": 16000
},
{
"epoch": 1.21,
"eval_accuracy": 0.7815,
"eval_loss": 0.804487943649292,
"eval_runtime": 1186.96,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 16200
},
{
"epoch": 1.24,
"learning_rate": 4.175e-05,
"loss": 0.6652,
"step": 16500
},
{
"epoch": 1.24,
"eval_accuracy": 0.7685,
"eval_loss": 0.7841760516166687,
"eval_runtime": 1192.1415,
"eval_samples_per_second": 1.678,
"eval_steps_per_second": 0.21,
"step": 16500
},
{
"epoch": 1.26,
"eval_accuracy": 0.769,
"eval_loss": 0.7735722661018372,
"eval_runtime": 1187.6295,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.211,
"step": 16800
},
{
"epoch": 1.27,
"learning_rate": 4.15e-05,
"loss": 0.6787,
"step": 17000
},
{
"epoch": 1.28,
"eval_accuracy": 0.7725,
"eval_loss": 0.8701850771903992,
"eval_runtime": 1186.4462,
"eval_samples_per_second": 1.686,
"eval_steps_per_second": 0.211,
"step": 17100
},
{
"epoch": 1.3,
"eval_accuracy": 0.781,
"eval_loss": 0.7945307493209839,
"eval_runtime": 1186.8131,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 17400
},
{
"epoch": 1.31,
"learning_rate": 4.125e-05,
"loss": 0.6731,
"step": 17500
},
{
"epoch": 1.33,
"eval_accuracy": 0.772,
"eval_loss": 0.8117572069168091,
"eval_runtime": 1187.4347,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.211,
"step": 17700
},
{
"epoch": 1.35,
"learning_rate": 4.1e-05,
"loss": 0.7126,
"step": 18000
},
{
"epoch": 1.35,
"eval_accuracy": 0.767,
"eval_loss": 0.8045957088470459,
"eval_runtime": 1187.2145,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 18000
},
{
"epoch": 1.37,
"eval_accuracy": 0.778,
"eval_loss": 0.84222012758255,
"eval_runtime": 1187.2901,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 18300
},
{
"epoch": 1.39,
"learning_rate": 4.075e-05,
"loss": 0.6724,
"step": 18500
},
{
"epoch": 1.39,
"eval_accuracy": 0.7785,
"eval_loss": 0.8121286630630493,
"eval_runtime": 1187.0784,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 18600
},
{
"epoch": 1.42,
"eval_accuracy": 0.775,
"eval_loss": 0.8152139782905579,
"eval_runtime": 1188.0198,
"eval_samples_per_second": 1.683,
"eval_steps_per_second": 0.21,
"step": 18900
},
{
"epoch": 1.42,
"learning_rate": 4.05e-05,
"loss": 0.6797,
"step": 19000
},
{
"epoch": 1.44,
"eval_accuracy": 0.768,
"eval_loss": 0.7854306697845459,
"eval_runtime": 1187.194,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 19200
},
{
"epoch": 1.46,
"learning_rate": 4.025e-05,
"loss": 0.6684,
"step": 19500
},
{
"epoch": 1.46,
"eval_accuracy": 0.7715,
"eval_loss": 0.8213361501693726,
"eval_runtime": 1186.9831,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 19500
},
{
"epoch": 1.48,
"eval_accuracy": 0.7745,
"eval_loss": 0.7918490767478943,
"eval_runtime": 1186.5427,
"eval_samples_per_second": 1.686,
"eval_steps_per_second": 0.211,
"step": 19800
},
{
"epoch": 1.5,
"learning_rate": 4e-05,
"loss": 0.6613,
"step": 20000
},
{
"epoch": 1.51,
"eval_accuracy": 0.7845,
"eval_loss": 0.7775911092758179,
"eval_runtime": 1186.9067,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 20100
},
{
"epoch": 1.53,
"eval_accuracy": 0.7755,
"eval_loss": 0.8130319714546204,
"eval_runtime": 1187.1843,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 20400
},
{
"epoch": 1.54,
"learning_rate": 3.9750000000000004e-05,
"loss": 0.655,
"step": 20500
},
{
"epoch": 1.55,
"eval_accuracy": 0.778,
"eval_loss": 0.7867146134376526,
"eval_runtime": 1185.9114,
"eval_samples_per_second": 1.686,
"eval_steps_per_second": 0.211,
"step": 20700
},
{
"epoch": 1.57,
"learning_rate": 3.9500000000000005e-05,
"loss": 0.6778,
"step": 21000
},
{
"epoch": 1.57,
"eval_accuracy": 0.781,
"eval_loss": 0.7961477637290955,
"eval_runtime": 1186.2535,
"eval_samples_per_second": 1.686,
"eval_steps_per_second": 0.211,
"step": 21000
},
{
"epoch": 1.6,
"eval_accuracy": 0.7745,
"eval_loss": 0.7971734404563904,
"eval_runtime": 1186.9143,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 21300
},
{
"epoch": 1.61,
"learning_rate": 3.9250000000000005e-05,
"loss": 0.6517,
"step": 21500
},
{
"epoch": 1.62,
"eval_accuracy": 0.7775,
"eval_loss": 0.7863536477088928,
"eval_runtime": 1186.9914,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 21600
},
{
"epoch": 1.64,
"eval_accuracy": 0.7915,
"eval_loss": 0.8122566938400269,
"eval_runtime": 1186.3977,
"eval_samples_per_second": 1.686,
"eval_steps_per_second": 0.211,
"step": 21900
},
{
"epoch": 1.65,
"learning_rate": 3.9000000000000006e-05,
"loss": 0.6406,
"step": 22000
},
{
"epoch": 1.66,
"eval_accuracy": 0.7925,
"eval_loss": 0.7533332705497742,
"eval_runtime": 1187.9251,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.21,
"step": 22200
},
{
"epoch": 1.69,
"learning_rate": 3.875e-05,
"loss": 0.662,
"step": 22500
},
{
"epoch": 1.69,
"eval_accuracy": 0.78,
"eval_loss": 0.7849723100662231,
"eval_runtime": 1187.4101,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.211,
"step": 22500
},
{
"epoch": 1.71,
"eval_accuracy": 0.7885,
"eval_loss": 0.7343565821647644,
"eval_runtime": 1187.2869,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 22800
},
{
"epoch": 1.72,
"learning_rate": 3.85e-05,
"loss": 0.6659,
"step": 23000
},
{
"epoch": 1.73,
"eval_accuracy": 0.7785,
"eval_loss": 0.789780855178833,
"eval_runtime": 1186.7745,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 23100
},
{
"epoch": 1.75,
"eval_accuracy": 0.79,
"eval_loss": 0.7574155330657959,
"eval_runtime": 1187.4419,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.211,
"step": 23400
},
{
"epoch": 1.76,
"learning_rate": 3.825e-05,
"loss": 0.641,
"step": 23500
},
{
"epoch": 1.78,
"eval_accuracy": 0.7915,
"eval_loss": 0.7914082407951355,
"eval_runtime": 1187.5239,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.211,
"step": 23700
},
{
"epoch": 1.8,
"learning_rate": 3.8e-05,
"loss": 0.6445,
"step": 24000
},
{
"epoch": 1.8,
"eval_accuracy": 0.785,
"eval_loss": 0.7899203300476074,
"eval_runtime": 1189.4198,
"eval_samples_per_second": 1.681,
"eval_steps_per_second": 0.21,
"step": 24000
},
{
"epoch": 1.82,
"eval_accuracy": 0.7785,
"eval_loss": 0.7695072293281555,
"eval_runtime": 1186.6637,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 24300
},
{
"epoch": 1.84,
"learning_rate": 3.775e-05,
"loss": 0.6667,
"step": 24500
},
{
"epoch": 1.84,
"eval_accuracy": 0.793,
"eval_loss": 0.812134861946106,
"eval_runtime": 1188.2303,
"eval_samples_per_second": 1.683,
"eval_steps_per_second": 0.21,
"step": 24600
},
{
"epoch": 1.87,
"eval_accuracy": 0.79,
"eval_loss": 0.7290639877319336,
"eval_runtime": 1188.6526,
"eval_samples_per_second": 1.683,
"eval_steps_per_second": 0.21,
"step": 24900
},
{
"epoch": 1.87,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.6313,
"step": 25000
},
{
"epoch": 1.89,
"eval_accuracy": 0.78,
"eval_loss": 0.7741135954856873,
"eval_runtime": 1188.1815,
"eval_samples_per_second": 1.683,
"eval_steps_per_second": 0.21,
"step": 25200
},
{
"epoch": 1.91,
"learning_rate": 3.7250000000000004e-05,
"loss": 0.6634,
"step": 25500
},
{
"epoch": 1.91,
"eval_accuracy": 0.778,
"eval_loss": 0.7394410371780396,
"eval_runtime": 1188.3415,
"eval_samples_per_second": 1.683,
"eval_steps_per_second": 0.21,
"step": 25500
},
{
"epoch": 1.93,
"eval_accuracy": 0.776,
"eval_loss": 0.7500986456871033,
"eval_runtime": 1188.0864,
"eval_samples_per_second": 1.683,
"eval_steps_per_second": 0.21,
"step": 25800
},
{
"epoch": 1.95,
"learning_rate": 3.7e-05,
"loss": 0.6614,
"step": 26000
},
{
"epoch": 1.96,
"eval_accuracy": 0.787,
"eval_loss": 0.7917065620422363,
"eval_runtime": 1187.7872,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.21,
"step": 26100
},
{
"epoch": 1.98,
"eval_accuracy": 0.778,
"eval_loss": 0.7841246724128723,
"eval_runtime": 1188.5012,
"eval_samples_per_second": 1.683,
"eval_steps_per_second": 0.21,
"step": 26400
},
{
"epoch": 1.99,
"learning_rate": 3.675e-05,
"loss": 0.6286,
"step": 26500
},
{
"epoch": 2.0,
"eval_accuracy": 0.7825,
"eval_loss": 0.7422041296958923,
"eval_runtime": 1188.5052,
"eval_samples_per_second": 1.683,
"eval_steps_per_second": 0.21,
"step": 26700
},
{
"epoch": 2.02,
"learning_rate": 3.65e-05,
"loss": 0.516,
"step": 27000
},
{
"epoch": 2.02,
"eval_accuracy": 0.773,
"eval_loss": 0.9123806357383728,
"eval_runtime": 1187.6306,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.211,
"step": 27000
},
{
"epoch": 2.05,
"eval_accuracy": 0.801,
"eval_loss": 0.8635061979293823,
"eval_runtime": 1187.6211,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.211,
"step": 27300
},
{
"epoch": 2.06,
"learning_rate": 3.625e-05,
"loss": 0.4621,
"step": 27500
},
{
"epoch": 2.07,
"eval_accuracy": 0.774,
"eval_loss": 0.9402374029159546,
"eval_runtime": 1187.828,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.21,
"step": 27600
},
{
"epoch": 2.09,
"eval_accuracy": 0.7975,
"eval_loss": 0.8873071074485779,
"eval_runtime": 1187.9698,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.21,
"step": 27900
},
{
"epoch": 2.1,
"learning_rate": 3.6e-05,
"loss": 0.4644,
"step": 28000
},
{
"epoch": 2.11,
"eval_accuracy": 0.788,
"eval_loss": 0.8984495401382446,
"eval_runtime": 1188.2025,
"eval_samples_per_second": 1.683,
"eval_steps_per_second": 0.21,
"step": 28200
},
{
"epoch": 2.14,
"learning_rate": 3.575e-05,
"loss": 0.4601,
"step": 28500
},
{
"epoch": 2.14,
"eval_accuracy": 0.7745,
"eval_loss": 0.9219344258308411,
"eval_runtime": 1188.2213,
"eval_samples_per_second": 1.683,
"eval_steps_per_second": 0.21,
"step": 28500
},
{
"epoch": 2.16,
"eval_accuracy": 0.7845,
"eval_loss": 0.895272433757782,
"eval_runtime": 1188.0314,
"eval_samples_per_second": 1.683,
"eval_steps_per_second": 0.21,
"step": 28800
},
{
"epoch": 2.17,
"learning_rate": 3.55e-05,
"loss": 0.4612,
"step": 29000
},
{
"epoch": 2.18,
"eval_accuracy": 0.7855,
"eval_loss": 0.9521645903587341,
"eval_runtime": 1188.1008,
"eval_samples_per_second": 1.683,
"eval_steps_per_second": 0.21,
"step": 29100
},
{
"epoch": 2.2,
"eval_accuracy": 0.7915,
"eval_loss": 0.8539759516716003,
"eval_runtime": 1188.3095,
"eval_samples_per_second": 1.683,
"eval_steps_per_second": 0.21,
"step": 29400
},
{
"epoch": 2.21,
"learning_rate": 3.525e-05,
"loss": 0.4594,
"step": 29500
},
{
"epoch": 2.23,
"eval_accuracy": 0.79,
"eval_loss": 0.8441948890686035,
"eval_runtime": 1188.0956,
"eval_samples_per_second": 1.683,
"eval_steps_per_second": 0.21,
"step": 29700
},
{
"epoch": 2.25,
"learning_rate": 3.5e-05,
"loss": 0.4502,
"step": 30000
},
{
"epoch": 2.25,
"eval_accuracy": 0.7815,
"eval_loss": 0.9029179811477661,
"eval_runtime": 1187.7265,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.21,
"step": 30000
},
{
"epoch": 2.27,
"eval_accuracy": 0.785,
"eval_loss": 0.8536617159843445,
"eval_runtime": 1187.8344,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.21,
"step": 30300
},
{
"epoch": 2.29,
"learning_rate": 3.475e-05,
"loss": 0.4733,
"step": 30500
},
{
"epoch": 2.29,
"eval_accuracy": 0.7855,
"eval_loss": 0.9026994705200195,
"eval_runtime": 1187.3948,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.211,
"step": 30600
},
{
"epoch": 2.32,
"eval_accuracy": 0.7875,
"eval_loss": 0.9556459784507751,
"eval_runtime": 1187.5961,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.211,
"step": 30900
},
{
"epoch": 2.32,
"learning_rate": 3.45e-05,
"loss": 0.4394,
"step": 31000
},
{
"epoch": 2.34,
"eval_accuracy": 0.7905,
"eval_loss": 0.8229288458824158,
"eval_runtime": 1188.2545,
"eval_samples_per_second": 1.683,
"eval_steps_per_second": 0.21,
"step": 31200
},
{
"epoch": 2.36,
"learning_rate": 3.4250000000000006e-05,
"loss": 0.4487,
"step": 31500
},
{
"epoch": 2.36,
"eval_accuracy": 0.7835,
"eval_loss": 0.9420590400695801,
"eval_runtime": 1188.0122,
"eval_samples_per_second": 1.683,
"eval_steps_per_second": 0.21,
"step": 31500
},
{
"epoch": 2.38,
"eval_accuracy": 0.792,
"eval_loss": 0.8579486608505249,
"eval_runtime": 1187.7995,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.21,
"step": 31800
},
{
"epoch": 2.4,
"learning_rate": 3.4000000000000007e-05,
"loss": 0.4713,
"step": 32000
},
{
"epoch": 2.41,
"eval_accuracy": 0.7955,
"eval_loss": 0.8385721445083618,
"eval_runtime": 1188.3507,
"eval_samples_per_second": 1.683,
"eval_steps_per_second": 0.21,
"step": 32100
},
{
"epoch": 2.43,
"eval_accuracy": 0.794,
"eval_loss": 0.8750296831130981,
"eval_runtime": 1189.7033,
"eval_samples_per_second": 1.681,
"eval_steps_per_second": 0.21,
"step": 32400
},
{
"epoch": 2.44,
"learning_rate": 3.375000000000001e-05,
"loss": 0.4886,
"step": 32500
},
{
"epoch": 2.45,
"eval_accuracy": 0.789,
"eval_loss": 0.8962150812149048,
"eval_runtime": 1189.4135,
"eval_samples_per_second": 1.682,
"eval_steps_per_second": 0.21,
"step": 32700
},
{
"epoch": 2.47,
"learning_rate": 3.35e-05,
"loss": 0.4904,
"step": 33000
},
{
"epoch": 2.47,
"eval_accuracy": 0.783,
"eval_loss": 0.8365711569786072,
"eval_runtime": 1189.5724,
"eval_samples_per_second": 1.681,
"eval_steps_per_second": 0.21,
"step": 33000
},
{
"epoch": 2.5,
"eval_accuracy": 0.796,
"eval_loss": 0.8707769513130188,
"eval_runtime": 1188.4348,
"eval_samples_per_second": 1.683,
"eval_steps_per_second": 0.21,
"step": 33300
},
{
"epoch": 2.51,
"learning_rate": 3.325e-05,
"loss": 0.4652,
"step": 33500
},
{
"epoch": 2.52,
"eval_accuracy": 0.7945,
"eval_loss": 0.8616237640380859,
"eval_runtime": 1187.5594,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.211,
"step": 33600
},
{
"epoch": 2.54,
"eval_accuracy": 0.7805,
"eval_loss": 0.8876886367797852,
"eval_runtime": 1186.5623,
"eval_samples_per_second": 1.686,
"eval_steps_per_second": 0.211,
"step": 33900
},
{
"epoch": 2.55,
"learning_rate": 3.3e-05,
"loss": 0.4526,
"step": 34000
},
{
"epoch": 2.56,
"eval_accuracy": 0.787,
"eval_loss": 0.8660940527915955,
"eval_runtime": 1187.4762,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.211,
"step": 34200
},
{
"epoch": 2.59,
"learning_rate": 3.275e-05,
"loss": 0.4691,
"step": 34500
},
{
"epoch": 2.59,
"eval_accuracy": 0.793,
"eval_loss": 0.8734720349311829,
"eval_runtime": 1188.0976,
"eval_samples_per_second": 1.683,
"eval_steps_per_second": 0.21,
"step": 34500
},
{
"epoch": 2.61,
"eval_accuracy": 0.784,
"eval_loss": 0.9059090614318848,
"eval_runtime": 1187.6806,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.21,
"step": 34800
},
{
"epoch": 2.62,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.4408,
"step": 35000
},
{
"epoch": 2.63,
"eval_accuracy": 0.7855,
"eval_loss": 0.8489252924919128,
"eval_runtime": 1186.6452,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.211,
"step": 35100
},
{
"epoch": 2.65,
"eval_accuracy": 0.774,
"eval_loss": 0.8704678416252136,
"eval_runtime": 1186.5813,
"eval_samples_per_second": 1.686,
"eval_steps_per_second": 0.211,
"step": 35400
},
{
"epoch": 2.66,
"learning_rate": 3.2250000000000005e-05,
"loss": 0.4725,
"step": 35500
},
{
"epoch": 2.68,
"eval_accuracy": 0.782,
"eval_loss": 0.9732263684272766,
"eval_runtime": 1187.7392,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.21,
"step": 35700
},
{
"epoch": 2.7,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.4486,
"step": 36000
},
{
"epoch": 2.7,
"eval_accuracy": 0.783,
"eval_loss": 0.8399544954299927,
"eval_runtime": 1187.666,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.21,
"step": 36000
},
{
"epoch": 2.72,
"eval_accuracy": 0.792,
"eval_loss": 0.8701621294021606,
"eval_runtime": 1187.7885,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.21,
"step": 36300
},
{
"epoch": 2.74,
"learning_rate": 3.175e-05,
"loss": 0.4427,
"step": 36500
},
{
"epoch": 2.74,
"eval_accuracy": 0.793,
"eval_loss": 0.8826534152030945,
"eval_runtime": 1187.3044,
"eval_samples_per_second": 1.684,
"eval_steps_per_second": 0.211,
"step": 36600
},
{
"epoch": 2.77,
"eval_accuracy": 0.802,
"eval_loss": 0.8090859651565552,
"eval_runtime": 1188.0705,
"eval_samples_per_second": 1.683,
"eval_steps_per_second": 0.21,
"step": 36900
},
{
"epoch": 2.77,
"learning_rate": 3.15e-05,
"loss": 0.4576,
"step": 37000
}
],
"logging_steps": 500,
"max_steps": 100000,
"num_input_tokens_seen": 0,
"num_train_epochs": 8,
"save_steps": 500,
"total_flos": 2.5551147393613824e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}