| { |
| "best_metric": 4.843646049499512, |
| "best_model_checkpoint": "models/GPT2_random_function_42/checkpoint-64260", |
| "epoch": 10.0, |
| "global_step": 64260, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.16, |
| "learning_rate": 1e-05, |
| "loss": 8.1532, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 2e-05, |
| "loss": 6.9215, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 3e-05, |
| "loss": 6.5875, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 4e-05, |
| "loss": 6.333, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 5e-05, |
| "loss": 6.1289, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 6e-05, |
| "loss": 5.9624, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.17896217672198259, |
| "eval_loss": 5.776374816894531, |
| "eval_runtime": 2.254, |
| "eval_samples_per_second": 523.954, |
| "eval_steps_per_second": 4.437, |
| "step": 6426 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 7e-05, |
| "loss": 5.8201, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 8e-05, |
| "loss": 5.7047, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 9e-05, |
| "loss": 5.6153, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 0.0001, |
| "loss": 5.5372, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 9.889000000000001e-05, |
| "loss": 5.4722, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 9.77788888888889e-05, |
| "loss": 5.4144, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.20932480815003968, |
| "eval_loss": 5.288669586181641, |
| "eval_runtime": 2.2534, |
| "eval_samples_per_second": 524.088, |
| "eval_steps_per_second": 4.438, |
| "step": 12852 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 9.66688888888889e-05, |
| "loss": 5.3583, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 9.555777777777778e-05, |
| "loss": 5.2917, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 9.444777777777778e-05, |
| "loss": 5.261, |
| "step": 15000 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 9.333666666666667e-05, |
| "loss": 5.2364, |
| "step": 16000 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 9.222666666666668e-05, |
| "loss": 5.2124, |
| "step": 17000 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 9.111555555555556e-05, |
| "loss": 5.1899, |
| "step": 18000 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 9.000555555555557e-05, |
| "loss": 5.1712, |
| "step": 19000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.22311266976471295, |
| "eval_loss": 5.106184959411621, |
| "eval_runtime": 2.2518, |
| "eval_samples_per_second": 524.46, |
| "eval_steps_per_second": 4.441, |
| "step": 19278 |
| }, |
| { |
| "epoch": 3.11, |
| "learning_rate": 8.889444444444444e-05, |
| "loss": 5.11, |
| "step": 20000 |
| }, |
| { |
| "epoch": 3.27, |
| "learning_rate": 8.778444444444445e-05, |
| "loss": 5.0904, |
| "step": 21000 |
| }, |
| { |
| "epoch": 3.42, |
| "learning_rate": 8.667333333333334e-05, |
| "loss": 5.0817, |
| "step": 22000 |
| }, |
| { |
| "epoch": 3.58, |
| "learning_rate": 8.556333333333334e-05, |
| "loss": 5.0734, |
| "step": 23000 |
| }, |
| { |
| "epoch": 3.73, |
| "learning_rate": 8.445222222222222e-05, |
| "loss": 5.0627, |
| "step": 24000 |
| }, |
| { |
| "epoch": 3.89, |
| "learning_rate": 8.334222222222222e-05, |
| "loss": 5.0536, |
| "step": 25000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.23168674618466933, |
| "eval_loss": 5.014615535736084, |
| "eval_runtime": 2.2491, |
| "eval_samples_per_second": 525.091, |
| "eval_steps_per_second": 4.446, |
| "step": 25704 |
| }, |
| { |
| "epoch": 4.05, |
| "learning_rate": 8.223111111111111e-05, |
| "loss": 5.0232, |
| "step": 26000 |
| }, |
| { |
| "epoch": 4.2, |
| "learning_rate": 8.112111111111111e-05, |
| "loss": 4.977, |
| "step": 27000 |
| }, |
| { |
| "epoch": 4.36, |
| "learning_rate": 8.001e-05, |
| "loss": 4.9799, |
| "step": 28000 |
| }, |
| { |
| "epoch": 4.51, |
| "learning_rate": 7.890000000000001e-05, |
| "loss": 4.9757, |
| "step": 29000 |
| }, |
| { |
| "epoch": 4.67, |
| "learning_rate": 7.77888888888889e-05, |
| "loss": 4.9734, |
| "step": 30000 |
| }, |
| { |
| "epoch": 4.82, |
| "learning_rate": 7.667888888888889e-05, |
| "loss": 4.9701, |
| "step": 31000 |
| }, |
| { |
| "epoch": 4.98, |
| "learning_rate": 7.556777777777779e-05, |
| "loss": 4.9639, |
| "step": 32000 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.2363138138638682, |
| "eval_loss": 4.959312438964844, |
| "eval_runtime": 2.2437, |
| "eval_samples_per_second": 526.357, |
| "eval_steps_per_second": 4.457, |
| "step": 32130 |
| }, |
| { |
| "epoch": 5.14, |
| "learning_rate": 7.445777777777778e-05, |
| "loss": 4.9014, |
| "step": 33000 |
| }, |
| { |
| "epoch": 5.29, |
| "learning_rate": 7.334666666666668e-05, |
| "loss": 4.9002, |
| "step": 34000 |
| }, |
| { |
| "epoch": 5.45, |
| "learning_rate": 7.223666666666667e-05, |
| "loss": 4.9022, |
| "step": 35000 |
| }, |
| { |
| "epoch": 5.6, |
| "learning_rate": 7.112555555555556e-05, |
| "loss": 4.9051, |
| "step": 36000 |
| }, |
| { |
| "epoch": 5.76, |
| "learning_rate": 7.001555555555555e-05, |
| "loss": 4.9037, |
| "step": 37000 |
| }, |
| { |
| "epoch": 5.91, |
| "learning_rate": 6.890444444444445e-05, |
| "loss": 4.9031, |
| "step": 38000 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.24040750198350525, |
| "eval_loss": 4.919950485229492, |
| "eval_runtime": 2.3095, |
| "eval_samples_per_second": 511.362, |
| "eval_steps_per_second": 4.33, |
| "step": 38556 |
| }, |
| { |
| "epoch": 6.07, |
| "learning_rate": 6.779444444444444e-05, |
| "loss": 4.8674, |
| "step": 39000 |
| }, |
| { |
| "epoch": 6.22, |
| "learning_rate": 6.668333333333333e-05, |
| "loss": 4.836, |
| "step": 40000 |
| }, |
| { |
| "epoch": 6.38, |
| "learning_rate": 6.557333333333332e-05, |
| "loss": 4.8425, |
| "step": 41000 |
| }, |
| { |
| "epoch": 6.54, |
| "learning_rate": 6.446222222222223e-05, |
| "loss": 4.846, |
| "step": 42000 |
| }, |
| { |
| "epoch": 6.69, |
| "learning_rate": 6.335222222222222e-05, |
| "loss": 4.849, |
| "step": 43000 |
| }, |
| { |
| "epoch": 6.85, |
| "learning_rate": 6.224111111111112e-05, |
| "loss": 4.8501, |
| "step": 44000 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.24246101328781827, |
| "eval_loss": 4.8934783935546875, |
| "eval_runtime": 2.2468, |
| "eval_samples_per_second": 525.632, |
| "eval_steps_per_second": 4.451, |
| "step": 44982 |
| }, |
| { |
| "epoch": 7.0, |
| "learning_rate": 6.113111111111111e-05, |
| "loss": 4.8488, |
| "step": 45000 |
| }, |
| { |
| "epoch": 7.16, |
| "learning_rate": 6.002e-05, |
| "loss": 4.7787, |
| "step": 46000 |
| }, |
| { |
| "epoch": 7.31, |
| "learning_rate": 5.890888888888889e-05, |
| "loss": 4.7911, |
| "step": 47000 |
| }, |
| { |
| "epoch": 7.47, |
| "learning_rate": 5.779888888888889e-05, |
| "loss": 4.7989, |
| "step": 48000 |
| }, |
| { |
| "epoch": 7.63, |
| "learning_rate": 5.6687777777777785e-05, |
| "loss": 4.8007, |
| "step": 49000 |
| }, |
| { |
| "epoch": 7.78, |
| "learning_rate": 5.5577777777777784e-05, |
| "loss": 4.8028, |
| "step": 50000 |
| }, |
| { |
| "epoch": 7.94, |
| "learning_rate": 5.4466666666666665e-05, |
| "loss": 4.8034, |
| "step": 51000 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.24542793708788094, |
| "eval_loss": 4.871953010559082, |
| "eval_runtime": 2.2466, |
| "eval_samples_per_second": 525.675, |
| "eval_steps_per_second": 4.451, |
| "step": 51408 |
| }, |
| { |
| "epoch": 8.09, |
| "learning_rate": 5.3356666666666663e-05, |
| "loss": 4.7612, |
| "step": 52000 |
| }, |
| { |
| "epoch": 8.25, |
| "learning_rate": 5.224555555555556e-05, |
| "loss": 4.7436, |
| "step": 53000 |
| }, |
| { |
| "epoch": 8.4, |
| "learning_rate": 5.1135555555555556e-05, |
| "loss": 4.7511, |
| "step": 54000 |
| }, |
| { |
| "epoch": 8.56, |
| "learning_rate": 5.002444444444445e-05, |
| "loss": 4.7584, |
| "step": 55000 |
| }, |
| { |
| "epoch": 8.71, |
| "learning_rate": 4.891444444444444e-05, |
| "loss": 4.762, |
| "step": 56000 |
| }, |
| { |
| "epoch": 8.87, |
| "learning_rate": 4.780333333333333e-05, |
| "loss": 4.7644, |
| "step": 57000 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.24703474301106096, |
| "eval_loss": 4.856387615203857, |
| "eval_runtime": 2.2498, |
| "eval_samples_per_second": 524.927, |
| "eval_steps_per_second": 4.445, |
| "step": 57834 |
| }, |
| { |
| "epoch": 9.03, |
| "learning_rate": 4.6693333333333336e-05, |
| "loss": 4.7533, |
| "step": 58000 |
| }, |
| { |
| "epoch": 9.18, |
| "learning_rate": 4.558222222222222e-05, |
| "loss": 4.7005, |
| "step": 59000 |
| }, |
| { |
| "epoch": 9.34, |
| "learning_rate": 4.447222222222223e-05, |
| "loss": 4.7126, |
| "step": 60000 |
| }, |
| { |
| "epoch": 9.49, |
| "learning_rate": 4.3361111111111116e-05, |
| "loss": 4.7184, |
| "step": 61000 |
| }, |
| { |
| "epoch": 9.65, |
| "learning_rate": 4.2251111111111115e-05, |
| "loss": 4.7231, |
| "step": 62000 |
| }, |
| { |
| "epoch": 9.8, |
| "learning_rate": 4.114e-05, |
| "loss": 4.7254, |
| "step": 63000 |
| }, |
| { |
| "epoch": 9.96, |
| "learning_rate": 4.003e-05, |
| "loss": 4.7269, |
| "step": 64000 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.2490015801369452, |
| "eval_loss": 4.843646049499512, |
| "eval_runtime": 2.2484, |
| "eval_samples_per_second": 525.261, |
| "eval_steps_per_second": 4.448, |
| "step": 64260 |
| } |
| ], |
| "max_steps": 100000, |
| "num_train_epochs": 16, |
| "total_flos": 5.37232748544e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|