{ "best_metric": 3.599998950958252, "best_model_checkpoint": "models/GPT2_five_function_42/checkpoint-64080", "epoch": 10.0, "global_step": 64080, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 1e-05, "loss": 6.9949, "step": 1000 }, { "epoch": 0.31, "learning_rate": 2e-05, "loss": 5.7461, "step": 2000 }, { "epoch": 0.47, "learning_rate": 3e-05, "loss": 5.4085, "step": 3000 }, { "epoch": 0.62, "learning_rate": 4e-05, "loss": 5.1434, "step": 4000 }, { "epoch": 0.78, "learning_rate": 5e-05, "loss": 4.933, "step": 5000 }, { "epoch": 0.94, "learning_rate": 6e-05, "loss": 4.7568, "step": 6000 }, { "epoch": 1.0, "eval_accuracy": 0.3263572316618317, "eval_loss": 4.562152862548828, "eval_runtime": 1.9609, "eval_samples_per_second": 600.758, "eval_steps_per_second": 5.1, "step": 6408 }, { "epoch": 1.09, "learning_rate": 7e-05, "loss": 4.6067, "step": 7000 }, { "epoch": 1.25, "learning_rate": 8e-05, "loss": 4.4864, "step": 8000 }, { "epoch": 1.4, "learning_rate": 9e-05, "loss": 4.3914, "step": 9000 }, { "epoch": 1.56, "learning_rate": 0.0001, "loss": 4.3109, "step": 10000 }, { "epoch": 1.72, "learning_rate": 9.889000000000001e-05, "loss": 4.2433, "step": 11000 }, { "epoch": 1.87, "learning_rate": 9.77788888888889e-05, "loss": 4.1832, "step": 12000 }, { "epoch": 2.0, "eval_accuracy": 0.36074087937649557, "eval_loss": 4.056756019592285, "eval_runtime": 1.9676, "eval_samples_per_second": 598.695, "eval_steps_per_second": 5.082, "step": 12816 }, { "epoch": 2.03, "learning_rate": 9.66688888888889e-05, "loss": 4.1265, "step": 13000 }, { "epoch": 2.18, "learning_rate": 9.55588888888889e-05, "loss": 4.0565, "step": 14000 }, { "epoch": 2.34, "learning_rate": 9.444777777777778e-05, "loss": 4.0283, "step": 15000 }, { "epoch": 2.5, "learning_rate": 9.333777777777777e-05, "loss": 4.0035, "step": 16000 }, { "epoch": 2.65, "learning_rate": 9.222666666666668e-05, "loss": 3.9768, "step": 17000 }, { "epoch": 2.81, "learning_rate": 9.111666666666667e-05, "loss": 3.9553, "step": 18000 }, { "epoch": 2.97, "learning_rate": 9.000555555555557e-05, "loss": 3.9365, "step": 19000 }, { "epoch": 3.0, "eval_accuracy": 0.3769100169779287, "eval_loss": 3.874519109725952, "eval_runtime": 1.9728, "eval_samples_per_second": 597.128, "eval_steps_per_second": 5.069, "step": 19224 }, { "epoch": 3.12, "learning_rate": 8.889555555555556e-05, "loss": 3.871, "step": 20000 }, { "epoch": 3.28, "learning_rate": 8.778444444444445e-05, "loss": 3.8532, "step": 21000 }, { "epoch": 3.43, "learning_rate": 8.667444444444444e-05, "loss": 3.8443, "step": 22000 }, { "epoch": 3.59, "learning_rate": 8.556444444444445e-05, "loss": 3.8378, "step": 23000 }, { "epoch": 3.75, "learning_rate": 8.445333333333333e-05, "loss": 3.825, "step": 24000 }, { "epoch": 3.9, "learning_rate": 8.334222222222222e-05, "loss": 3.8154, "step": 25000 }, { "epoch": 4.0, "eval_accuracy": 0.3853922971003837, "eval_loss": 3.779280185699463, "eval_runtime": 1.9858, "eval_samples_per_second": 593.214, "eval_steps_per_second": 5.036, "step": 25632 }, { "epoch": 4.06, "learning_rate": 8.223111111111111e-05, "loss": 3.7779, "step": 26000 }, { "epoch": 4.21, "learning_rate": 8.112111111111111e-05, "loss": 3.7386, "step": 27000 }, { "epoch": 4.37, "learning_rate": 8.001e-05, "loss": 3.7408, "step": 28000 }, { "epoch": 4.53, "learning_rate": 7.890000000000001e-05, "loss": 3.7369, "step": 29000 }, { "epoch": 4.68, "learning_rate": 7.77888888888889e-05, "loss": 3.7354, "step": 30000 }, { "epoch": 4.84, "learning_rate": 7.667888888888889e-05, "loss": 3.7297, "step": 31000 }, { "epoch": 4.99, "learning_rate": 7.556777777777779e-05, "loss": 3.725, "step": 32000 }, { "epoch": 5.0, "eval_accuracy": 0.3909669398286165, "eval_loss": 3.7228105068206787, "eval_runtime": 2.0428, "eval_samples_per_second": 576.664, "eval_steps_per_second": 4.895, "step": 32040 }, { "epoch": 5.15, "learning_rate": 7.445777777777778e-05, "loss": 3.6547, "step": 33000 }, { "epoch": 5.31, "learning_rate": 7.334666666666668e-05, "loss": 3.6602, "step": 34000 }, { "epoch": 5.46, "learning_rate": 7.223666666666667e-05, "loss": 3.6624, "step": 35000 }, { "epoch": 5.62, "learning_rate": 7.112555555555556e-05, "loss": 3.6649, "step": 36000 }, { "epoch": 5.77, "learning_rate": 7.001555555555555e-05, "loss": 3.6615, "step": 37000 }, { "epoch": 5.93, "learning_rate": 6.890444444444445e-05, "loss": 3.6621, "step": 38000 }, { "epoch": 6.0, "eval_accuracy": 0.39523147467347564, "eval_loss": 3.681443691253662, "eval_runtime": 1.9586, "eval_samples_per_second": 601.465, "eval_steps_per_second": 5.106, "step": 38448 }, { "epoch": 6.09, "learning_rate": 6.779444444444444e-05, "loss": 3.6176, "step": 39000 }, { "epoch": 6.24, "learning_rate": 6.668333333333333e-05, "loss": 3.5948, "step": 40000 }, { "epoch": 6.4, "learning_rate": 6.557333333333332e-05, "loss": 3.6021, "step": 41000 }, { "epoch": 6.55, "learning_rate": 6.446222222222223e-05, "loss": 3.6043, "step": 42000 }, { "epoch": 6.71, "learning_rate": 6.335222222222222e-05, "loss": 3.6064, "step": 43000 }, { "epoch": 6.87, "learning_rate": 6.224111111111112e-05, "loss": 3.6072, "step": 44000 }, { "epoch": 7.0, "eval_accuracy": 0.39868721842706845, "eval_loss": 3.6536548137664795, "eval_runtime": 1.9678, "eval_samples_per_second": 598.633, "eval_steps_per_second": 5.082, "step": 44856 }, { "epoch": 7.02, "learning_rate": 6.113111111111111e-05, "loss": 3.5958, "step": 45000 }, { "epoch": 7.18, "learning_rate": 6.002e-05, "loss": 3.5371, "step": 46000 }, { "epoch": 7.33, "learning_rate": 5.891e-05, "loss": 3.5493, "step": 47000 }, { "epoch": 7.49, "learning_rate": 5.779888888888889e-05, "loss": 3.554, "step": 48000 }, { "epoch": 7.65, "learning_rate": 5.668888888888889e-05, "loss": 3.5593, "step": 49000 }, { "epoch": 7.8, "learning_rate": 5.5577777777777784e-05, "loss": 3.5597, "step": 50000 }, { "epoch": 7.96, "learning_rate": 5.446777777777778e-05, "loss": 3.5598, "step": 51000 }, { "epoch": 8.0, "eval_accuracy": 0.4002713794901274, "eval_loss": 3.631843328475952, "eval_runtime": 1.9682, "eval_samples_per_second": 598.504, "eval_steps_per_second": 5.081, "step": 51264 }, { "epoch": 8.11, "learning_rate": 5.3356666666666663e-05, "loss": 3.5064, "step": 52000 }, { "epoch": 8.27, "learning_rate": 5.224666666666667e-05, "loss": 3.5007, "step": 53000 }, { "epoch": 8.43, "learning_rate": 5.1135555555555556e-05, "loss": 3.511, "step": 54000 }, { "epoch": 8.58, "learning_rate": 5.002555555555556e-05, "loss": 3.5135, "step": 55000 }, { "epoch": 8.74, "learning_rate": 4.891444444444444e-05, "loss": 3.5193, "step": 56000 }, { "epoch": 8.9, "learning_rate": 4.780444444444445e-05, "loss": 3.5203, "step": 57000 }, { "epoch": 9.0, "eval_accuracy": 0.4027378581072952, "eval_loss": 3.6141819953918457, "eval_runtime": 1.9786, "eval_samples_per_second": 595.365, "eval_steps_per_second": 5.054, "step": 57672 }, { "epoch": 9.05, "learning_rate": 4.6693333333333336e-05, "loss": 3.4957, "step": 58000 }, { "epoch": 9.21, "learning_rate": 4.5583333333333335e-05, "loss": 3.4579, "step": 59000 }, { "epoch": 9.36, "learning_rate": 4.447222222222223e-05, "loss": 3.4682, "step": 60000 }, { "epoch": 9.52, "learning_rate": 4.336222222222223e-05, "loss": 3.4743, "step": 61000 }, { "epoch": 9.68, "learning_rate": 4.2251111111111115e-05, "loss": 3.4784, "step": 62000 }, { "epoch": 9.83, "learning_rate": 4.1141111111111114e-05, "loss": 3.4833, "step": 63000 }, { "epoch": 9.99, "learning_rate": 4.003e-05, "loss": 3.4839, "step": 64000 }, { "epoch": 10.0, "eval_accuracy": 0.40503723112709383, "eval_loss": 3.599998950958252, "eval_runtime": 1.9622, "eval_samples_per_second": 600.357, "eval_steps_per_second": 5.096, "step": 64080 } ], "max_steps": 100000, "num_train_epochs": 16, "total_flos": 5.3573097259008e+17, "trial_name": null, "trial_params": null }