{ "best_metric": 4.843646049499512, "best_model_checkpoint": "models/GPT2_random_function_42/checkpoint-64260", "epoch": 10.0, "global_step": 64260, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 1e-05, "loss": 8.1532, "step": 1000 }, { "epoch": 0.31, "learning_rate": 2e-05, "loss": 6.9215, "step": 2000 }, { "epoch": 0.47, "learning_rate": 3e-05, "loss": 6.5875, "step": 3000 }, { "epoch": 0.62, "learning_rate": 4e-05, "loss": 6.333, "step": 4000 }, { "epoch": 0.78, "learning_rate": 5e-05, "loss": 6.1289, "step": 5000 }, { "epoch": 0.93, "learning_rate": 6e-05, "loss": 5.9624, "step": 6000 }, { "epoch": 1.0, "eval_accuracy": 0.17896217672198259, "eval_loss": 5.776374816894531, "eval_runtime": 2.254, "eval_samples_per_second": 523.954, "eval_steps_per_second": 4.437, "step": 6426 }, { "epoch": 1.09, "learning_rate": 7e-05, "loss": 5.8201, "step": 7000 }, { "epoch": 1.24, "learning_rate": 8e-05, "loss": 5.7047, "step": 8000 }, { "epoch": 1.4, "learning_rate": 9e-05, "loss": 5.6153, "step": 9000 }, { "epoch": 1.56, "learning_rate": 0.0001, "loss": 5.5372, "step": 10000 }, { "epoch": 1.71, "learning_rate": 9.889000000000001e-05, "loss": 5.4722, "step": 11000 }, { "epoch": 1.87, "learning_rate": 9.77788888888889e-05, "loss": 5.4144, "step": 12000 }, { "epoch": 2.0, "eval_accuracy": 0.20932480815003968, "eval_loss": 5.288669586181641, "eval_runtime": 2.2534, "eval_samples_per_second": 524.088, "eval_steps_per_second": 4.438, "step": 12852 }, { "epoch": 2.02, "learning_rate": 9.66688888888889e-05, "loss": 5.3583, "step": 13000 }, { "epoch": 2.18, "learning_rate": 9.555777777777778e-05, "loss": 5.2917, "step": 14000 }, { "epoch": 2.33, "learning_rate": 9.444777777777778e-05, "loss": 5.261, "step": 15000 }, { "epoch": 2.49, "learning_rate": 9.333666666666667e-05, "loss": 5.2364, "step": 16000 }, { "epoch": 2.65, "learning_rate": 9.222666666666668e-05, "loss": 5.2124, "step": 17000 }, { "epoch": 2.8, "learning_rate": 9.111555555555556e-05, "loss": 5.1899, "step": 18000 }, { "epoch": 2.96, "learning_rate": 9.000555555555557e-05, "loss": 5.1712, "step": 19000 }, { "epoch": 3.0, "eval_accuracy": 0.22311266976471295, "eval_loss": 5.106184959411621, "eval_runtime": 2.2518, "eval_samples_per_second": 524.46, "eval_steps_per_second": 4.441, "step": 19278 }, { "epoch": 3.11, "learning_rate": 8.889444444444444e-05, "loss": 5.11, "step": 20000 }, { "epoch": 3.27, "learning_rate": 8.778444444444445e-05, "loss": 5.0904, "step": 21000 }, { "epoch": 3.42, "learning_rate": 8.667333333333334e-05, "loss": 5.0817, "step": 22000 }, { "epoch": 3.58, "learning_rate": 8.556333333333334e-05, "loss": 5.0734, "step": 23000 }, { "epoch": 3.73, "learning_rate": 8.445222222222222e-05, "loss": 5.0627, "step": 24000 }, { "epoch": 3.89, "learning_rate": 8.334222222222222e-05, "loss": 5.0536, "step": 25000 }, { "epoch": 4.0, "eval_accuracy": 0.23168674618466933, "eval_loss": 5.014615535736084, "eval_runtime": 2.2491, "eval_samples_per_second": 525.091, "eval_steps_per_second": 4.446, "step": 25704 }, { "epoch": 4.05, "learning_rate": 8.223111111111111e-05, "loss": 5.0232, "step": 26000 }, { "epoch": 4.2, "learning_rate": 8.112111111111111e-05, "loss": 4.977, "step": 27000 }, { "epoch": 4.36, "learning_rate": 8.001e-05, "loss": 4.9799, "step": 28000 }, { "epoch": 4.51, "learning_rate": 7.890000000000001e-05, "loss": 4.9757, "step": 29000 }, { "epoch": 4.67, "learning_rate": 7.77888888888889e-05, "loss": 4.9734, "step": 30000 }, { "epoch": 4.82, "learning_rate": 7.667888888888889e-05, "loss": 4.9701, "step": 31000 }, { "epoch": 4.98, "learning_rate": 7.556777777777779e-05, "loss": 4.9639, "step": 32000 }, { "epoch": 5.0, "eval_accuracy": 0.2363138138638682, "eval_loss": 4.959312438964844, "eval_runtime": 2.2437, "eval_samples_per_second": 526.357, "eval_steps_per_second": 4.457, "step": 32130 }, { "epoch": 5.14, "learning_rate": 7.445777777777778e-05, "loss": 4.9014, "step": 33000 }, { "epoch": 5.29, "learning_rate": 7.334666666666668e-05, "loss": 4.9002, "step": 34000 }, { "epoch": 5.45, "learning_rate": 7.223666666666667e-05, "loss": 4.9022, "step": 35000 }, { "epoch": 5.6, "learning_rate": 7.112555555555556e-05, "loss": 4.9051, "step": 36000 }, { "epoch": 5.76, "learning_rate": 7.001555555555555e-05, "loss": 4.9037, "step": 37000 }, { "epoch": 5.91, "learning_rate": 6.890444444444445e-05, "loss": 4.9031, "step": 38000 }, { "epoch": 6.0, "eval_accuracy": 0.24040750198350525, "eval_loss": 4.919950485229492, "eval_runtime": 2.3095, "eval_samples_per_second": 511.362, "eval_steps_per_second": 4.33, "step": 38556 }, { "epoch": 6.07, "learning_rate": 6.779444444444444e-05, "loss": 4.8674, "step": 39000 }, { "epoch": 6.22, "learning_rate": 6.668333333333333e-05, "loss": 4.836, "step": 40000 }, { "epoch": 6.38, "learning_rate": 6.557333333333332e-05, "loss": 4.8425, "step": 41000 }, { "epoch": 6.54, "learning_rate": 6.446222222222223e-05, "loss": 4.846, "step": 42000 }, { "epoch": 6.69, "learning_rate": 6.335222222222222e-05, "loss": 4.849, "step": 43000 }, { "epoch": 6.85, "learning_rate": 6.224111111111112e-05, "loss": 4.8501, "step": 44000 }, { "epoch": 7.0, "eval_accuracy": 0.24246101328781827, "eval_loss": 4.8934783935546875, "eval_runtime": 2.2468, "eval_samples_per_second": 525.632, "eval_steps_per_second": 4.451, "step": 44982 }, { "epoch": 7.0, "learning_rate": 6.113111111111111e-05, "loss": 4.8488, "step": 45000 }, { "epoch": 7.16, "learning_rate": 6.002e-05, "loss": 4.7787, "step": 46000 }, { "epoch": 7.31, "learning_rate": 5.890888888888889e-05, "loss": 4.7911, "step": 47000 }, { "epoch": 7.47, "learning_rate": 5.779888888888889e-05, "loss": 4.7989, "step": 48000 }, { "epoch": 7.63, "learning_rate": 5.6687777777777785e-05, "loss": 4.8007, "step": 49000 }, { "epoch": 7.78, "learning_rate": 5.5577777777777784e-05, "loss": 4.8028, "step": 50000 }, { "epoch": 7.94, "learning_rate": 5.4466666666666665e-05, "loss": 4.8034, "step": 51000 }, { "epoch": 8.0, "eval_accuracy": 0.24542793708788094, "eval_loss": 4.871953010559082, "eval_runtime": 2.2466, "eval_samples_per_second": 525.675, "eval_steps_per_second": 4.451, "step": 51408 }, { "epoch": 8.09, "learning_rate": 5.3356666666666663e-05, "loss": 4.7612, "step": 52000 }, { "epoch": 8.25, "learning_rate": 5.224555555555556e-05, "loss": 4.7436, "step": 53000 }, { "epoch": 8.4, "learning_rate": 5.1135555555555556e-05, "loss": 4.7511, "step": 54000 }, { "epoch": 8.56, "learning_rate": 5.002444444444445e-05, "loss": 4.7584, "step": 55000 }, { "epoch": 8.71, "learning_rate": 4.891444444444444e-05, "loss": 4.762, "step": 56000 }, { "epoch": 8.87, "learning_rate": 4.780333333333333e-05, "loss": 4.7644, "step": 57000 }, { "epoch": 9.0, "eval_accuracy": 0.24703474301106096, "eval_loss": 4.856387615203857, "eval_runtime": 2.2498, "eval_samples_per_second": 524.927, "eval_steps_per_second": 4.445, "step": 57834 }, { "epoch": 9.03, "learning_rate": 4.6693333333333336e-05, "loss": 4.7533, "step": 58000 }, { "epoch": 9.18, "learning_rate": 4.558222222222222e-05, "loss": 4.7005, "step": 59000 }, { "epoch": 9.34, "learning_rate": 4.447222222222223e-05, "loss": 4.7126, "step": 60000 }, { "epoch": 9.49, "learning_rate": 4.3361111111111116e-05, "loss": 4.7184, "step": 61000 }, { "epoch": 9.65, "learning_rate": 4.2251111111111115e-05, "loss": 4.7231, "step": 62000 }, { "epoch": 9.8, "learning_rate": 4.114e-05, "loss": 4.7254, "step": 63000 }, { "epoch": 9.96, "learning_rate": 4.003e-05, "loss": 4.7269, "step": 64000 }, { "epoch": 10.0, "eval_accuracy": 0.2490015801369452, "eval_loss": 4.843646049499512, "eval_runtime": 2.2484, "eval_samples_per_second": 525.261, "eval_steps_per_second": 4.448, "step": 64260 } ], "max_steps": 100000, "num_train_epochs": 16, "total_flos": 5.37232748544e+17, "trial_name": null, "trial_params": null }