{ "best_metric": 4.0208740234375, "best_model_checkpoint": "models/GPT2_five_function_random_function_53/checkpoint-64070", "epoch": 10.0, "global_step": 64070, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 1e-05, "loss": 7.2283, "step": 1000 }, { "epoch": 0.31, "learning_rate": 2e-05, "loss": 6.0277, "step": 2000 }, { "epoch": 0.47, "learning_rate": 3e-05, "loss": 5.7267, "step": 3000 }, { "epoch": 0.62, "learning_rate": 4e-05, "loss": 5.4767, "step": 4000 }, { "epoch": 0.78, "learning_rate": 5e-05, "loss": 5.277, "step": 5000 }, { "epoch": 0.94, "learning_rate": 6e-05, "loss": 5.114, "step": 6000 }, { "epoch": 1.0, "eval_accuracy": 0.19812708046468724, "eval_loss": 4.937533378601074, "eval_runtime": 2.0754, "eval_samples_per_second": 567.592, "eval_steps_per_second": 4.818, "step": 6407 }, { "epoch": 1.09, "learning_rate": 7e-05, "loss": 4.9697, "step": 7000 }, { "epoch": 1.25, "learning_rate": 8e-05, "loss": 4.8582, "step": 8000 }, { "epoch": 1.4, "learning_rate": 9e-05, "loss": 4.7675, "step": 9000 }, { "epoch": 1.56, "learning_rate": 0.0001, "loss": 4.6939, "step": 10000 }, { "epoch": 1.72, "learning_rate": 9.815239504346218e-05, "loss": 4.6255, "step": 11000 }, { "epoch": 1.87, "learning_rate": 9.630294063251342e-05, "loss": 4.569, "step": 12000 }, { "epoch": 2.0, "eval_accuracy": 0.229262195366496, "eval_loss": 4.452399253845215, "eval_runtime": 2.1016, "eval_samples_per_second": 560.526, "eval_steps_per_second": 4.758, "step": 12814 }, { "epoch": 2.03, "learning_rate": 9.445718513038653e-05, "loss": 4.5155, "step": 13000 }, { "epoch": 2.19, "learning_rate": 9.260773071943777e-05, "loss": 4.447, "step": 14000 }, { "epoch": 2.34, "learning_rate": 9.0758276308489e-05, "loss": 4.4207, "step": 15000 }, { "epoch": 2.5, "learning_rate": 8.890882189754023e-05, "loss": 4.3963, "step": 16000 }, { "epoch": 2.65, "learning_rate": 8.70612169410024e-05, "loss": 4.3706, "step": 17000 }, { "epoch": 2.81, "learning_rate": 8.521176253005363e-05, "loss": 4.3528, "step": 18000 }, { "epoch": 2.97, "learning_rate": 8.336415757351582e-05, "loss": 4.3333, "step": 19000 }, { "epoch": 3.0, "eval_accuracy": 0.2426440116038127, "eval_loss": 4.2776312828063965, "eval_runtime": 2.0821, "eval_samples_per_second": 565.785, "eval_steps_per_second": 4.803, "step": 19221 }, { "epoch": 3.12, "learning_rate": 8.151655261697799e-05, "loss": 4.2694, "step": 20000 }, { "epoch": 3.28, "learning_rate": 7.966709820602923e-05, "loss": 4.2519, "step": 21000 }, { "epoch": 3.43, "learning_rate": 7.781764379508046e-05, "loss": 4.2432, "step": 22000 }, { "epoch": 3.59, "learning_rate": 7.596818938413168e-05, "loss": 4.2321, "step": 23000 }, { "epoch": 3.75, "learning_rate": 7.412058442759386e-05, "loss": 4.2238, "step": 24000 }, { "epoch": 3.9, "learning_rate": 7.22711300166451e-05, "loss": 4.2155, "step": 25000 }, { "epoch": 4.0, "eval_accuracy": 0.25107281793510955, "eval_loss": 4.186640739440918, "eval_runtime": 2.2141, "eval_samples_per_second": 532.038, "eval_steps_per_second": 4.516, "step": 25628 }, { "epoch": 4.06, "learning_rate": 7.042537451451821e-05, "loss": 4.1794, "step": 26000 }, { "epoch": 4.21, "learning_rate": 6.857592010356945e-05, "loss": 4.1418, "step": 27000 }, { "epoch": 4.37, "learning_rate": 6.672646569262068e-05, "loss": 4.1399, "step": 28000 }, { "epoch": 4.53, "learning_rate": 6.487701128167191e-05, "loss": 4.1392, "step": 29000 }, { "epoch": 4.68, "learning_rate": 6.302940632513408e-05, "loss": 4.1333, "step": 30000 }, { "epoch": 4.84, "learning_rate": 6.117995191418531e-05, "loss": 4.1287, "step": 31000 }, { "epoch": 4.99, "learning_rate": 5.93323469576475e-05, "loss": 4.1267, "step": 32000 }, { "epoch": 5.0, "eval_accuracy": 0.25577851155702314, "eval_loss": 4.129362106323242, "eval_runtime": 2.2135, "eval_samples_per_second": 532.185, "eval_steps_per_second": 4.518, "step": 32035 }, { "epoch": 5.15, "learning_rate": 5.748289254669873e-05, "loss": 4.0583, "step": 33000 }, { "epoch": 5.31, "learning_rate": 5.563343813574996e-05, "loss": 4.0624, "step": 34000 }, { "epoch": 5.46, "learning_rate": 5.378398372480119e-05, "loss": 4.063, "step": 35000 }, { "epoch": 5.62, "learning_rate": 5.193637876826336e-05, "loss": 4.0642, "step": 36000 }, { "epoch": 5.77, "learning_rate": 5.008692435731459e-05, "loss": 4.0627, "step": 37000 }, { "epoch": 5.93, "learning_rate": 4.823931940077677e-05, "loss": 4.0604, "step": 38000 }, { "epoch": 6.0, "eval_accuracy": 0.25978904589388124, "eval_loss": 4.091472625732422, "eval_runtime": 2.2427, "eval_samples_per_second": 525.259, "eval_steps_per_second": 4.459, "step": 38442 }, { "epoch": 6.09, "learning_rate": 4.6389864989828e-05, "loss": 4.0207, "step": 39000 }, { "epoch": 6.24, "learning_rate": 4.454226003329018e-05, "loss": 3.9982, "step": 40000 }, { "epoch": 6.4, "learning_rate": 4.269280562234141e-05, "loss": 4.0031, "step": 41000 }, { "epoch": 6.56, "learning_rate": 4.084520066580359e-05, "loss": 4.0014, "step": 42000 }, { "epoch": 6.71, "learning_rate": 3.899574625485482e-05, "loss": 4.0034, "step": 43000 }, { "epoch": 6.87, "learning_rate": 3.7148141298317e-05, "loss": 4.0046, "step": 44000 }, { "epoch": 7.0, "eval_accuracy": 0.26226889295883854, "eval_loss": 4.0633225440979, "eval_runtime": 2.2236, "eval_samples_per_second": 529.772, "eval_steps_per_second": 4.497, "step": 44849 }, { "epoch": 7.02, "learning_rate": 3.530053634177918e-05, "loss": 3.9915, "step": 45000 }, { "epoch": 7.18, "learning_rate": 3.345108193083041e-05, "loss": 3.9436, "step": 46000 }, { "epoch": 7.34, "learning_rate": 3.1601627519881636e-05, "loss": 3.9498, "step": 47000 }, { "epoch": 7.49, "learning_rate": 2.9752173108932868e-05, "loss": 3.9523, "step": 48000 }, { "epoch": 7.65, "learning_rate": 2.7904568152395044e-05, "loss": 3.9541, "step": 49000 }, { "epoch": 7.8, "learning_rate": 2.6055113741446275e-05, "loss": 3.9528, "step": 50000 }, { "epoch": 7.96, "learning_rate": 2.4207508784908454e-05, "loss": 3.9522, "step": 51000 }, { "epoch": 8.0, "eval_accuracy": 0.2649826878601126, "eval_loss": 4.0420684814453125, "eval_runtime": 2.2291, "eval_samples_per_second": 528.453, "eval_steps_per_second": 4.486, "step": 51256 }, { "epoch": 8.12, "learning_rate": 2.2359903828370634e-05, "loss": 3.9114, "step": 52000 }, { "epoch": 8.27, "learning_rate": 2.051044941742186e-05, "loss": 3.9048, "step": 53000 }, { "epoch": 8.43, "learning_rate": 1.8660995006473093e-05, "loss": 3.9086, "step": 54000 }, { "epoch": 8.58, "learning_rate": 1.681154059552432e-05, "loss": 3.9069, "step": 55000 }, { "epoch": 8.74, "learning_rate": 1.49639356389865e-05, "loss": 3.9082, "step": 56000 }, { "epoch": 8.9, "learning_rate": 1.3114481228037728e-05, "loss": 3.9091, "step": 57000 }, { "epoch": 9.0, "eval_accuracy": 0.2675226929401227, "eval_loss": 4.02744722366333, "eval_runtime": 2.0909, "eval_samples_per_second": 563.403, "eval_steps_per_second": 4.783, "step": 57663 }, { "epoch": 9.05, "learning_rate": 1.1268725725910857e-05, "loss": 3.8934, "step": 58000 }, { "epoch": 9.21, "learning_rate": 9.419271314962087e-06, "loss": 3.8677, "step": 59000 }, { "epoch": 9.36, "learning_rate": 7.569816904013316e-06, "loss": 3.8703, "step": 60000 }, { "epoch": 9.52, "learning_rate": 5.720362493064546e-06, "loss": 3.8714, "step": 61000 }, { "epoch": 9.68, "learning_rate": 3.872757536526725e-06, "loss": 3.8706, "step": 62000 }, { "epoch": 9.83, "learning_rate": 2.0233031255779547e-06, "loss": 3.8696, "step": 63000 }, { "epoch": 9.99, "learning_rate": 1.7569816904013318e-07, "loss": 3.8682, "step": 64000 }, { "epoch": 10.0, "eval_accuracy": 0.2680641150755986, "eval_loss": 4.0208740234375, "eval_runtime": 2.2018, "eval_samples_per_second": 535.026, "eval_steps_per_second": 4.542, "step": 64070 }, { "epoch": 10.0, "step": 64070, "total_flos": 5.356356009984e+17, "train_loss": 4.300366629619429, "train_runtime": 29153.8108, "train_samples_per_second": 281.26, "train_steps_per_second": 2.198 } ], "max_steps": 64070, "num_train_epochs": 10, "total_flos": 5.356356009984e+17, "trial_name": null, "trial_params": null }