{ "best_metric": 3.674250841140747, "best_model_checkpoint": "models/GPT2_natural_function_42/checkpoint-64390", "epoch": 10.0, "global_step": 64390, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 1e-05, "loss": 7.5617, "step": 1000 }, { "epoch": 0.31, "learning_rate": 2e-05, "loss": 6.0924, "step": 2000 }, { "epoch": 0.47, "learning_rate": 3e-05, "loss": 5.6562, "step": 3000 }, { "epoch": 0.62, "learning_rate": 4e-05, "loss": 5.3462, "step": 4000 }, { "epoch": 0.78, "learning_rate": 5e-05, "loss": 5.1032, "step": 5000 }, { "epoch": 0.93, "learning_rate": 6e-05, "loss": 4.9122, "step": 6000 }, { "epoch": 1.0, "eval_accuracy": 0.2911988072496855, "eval_loss": 4.683091640472412, "eval_runtime": 2.016, "eval_samples_per_second": 586.819, "eval_steps_per_second": 4.96, "step": 6439 }, { "epoch": 1.09, "learning_rate": 7e-05, "loss": 4.7481, "step": 7000 }, { "epoch": 1.24, "learning_rate": 8e-05, "loss": 4.6153, "step": 8000 }, { "epoch": 1.4, "learning_rate": 9e-05, "loss": 4.5141, "step": 9000 }, { "epoch": 1.55, "learning_rate": 0.0001, "loss": 4.4281, "step": 10000 }, { "epoch": 1.71, "learning_rate": 9.816326530612245e-05, "loss": 4.3533, "step": 11000 }, { "epoch": 1.86, "learning_rate": 9.632469203897775e-05, "loss": 4.2912, "step": 12000 }, { "epoch": 2.0, "eval_accuracy": 0.32702125252094966, "eval_loss": 4.145557403564453, "eval_runtime": 2.0315, "eval_samples_per_second": 582.318, "eval_steps_per_second": 4.922, "step": 12878 }, { "epoch": 2.02, "learning_rate": 9.448795734510021e-05, "loss": 4.2316, "step": 13000 }, { "epoch": 2.17, "learning_rate": 9.264938407795552e-05, "loss": 4.1534, "step": 14000 }, { "epoch": 2.33, "learning_rate": 9.081264938407796e-05, "loss": 4.1261, "step": 15000 }, { "epoch": 2.48, "learning_rate": 8.897407611693326e-05, "loss": 4.0963, "step": 16000 }, { "epoch": 2.64, "learning_rate": 8.713734142305572e-05, "loss": 4.0713, "step": 17000 }, { "epoch": 2.8, "learning_rate": 8.529876815591101e-05, "loss": 4.0484, "step": 18000 }, { "epoch": 2.95, "learning_rate": 8.346203346203346e-05, "loss": 4.0258, "step": 19000 }, { "epoch": 3.0, "eval_accuracy": 0.3454050492209184, "eval_loss": 3.9542500972747803, "eval_runtime": 2.0276, "eval_samples_per_second": 583.446, "eval_steps_per_second": 4.932, "step": 19317 }, { "epoch": 3.11, "learning_rate": 8.162346019488876e-05, "loss": 3.9666, "step": 20000 }, { "epoch": 3.26, "learning_rate": 7.978672550101122e-05, "loss": 3.9382, "step": 21000 }, { "epoch": 3.42, "learning_rate": 7.794815223386652e-05, "loss": 3.9307, "step": 22000 }, { "epoch": 3.57, "learning_rate": 7.611141753998897e-05, "loss": 3.9198, "step": 23000 }, { "epoch": 3.73, "learning_rate": 7.427284427284427e-05, "loss": 3.9101, "step": 24000 }, { "epoch": 3.88, "learning_rate": 7.243610957896673e-05, "loss": 3.8998, "step": 25000 }, { "epoch": 4.0, "eval_accuracy": 0.35441723630700006, "eval_loss": 3.8564870357513428, "eval_runtime": 2.0237, "eval_samples_per_second": 584.57, "eval_steps_per_second": 4.941, "step": 25756 }, { "epoch": 4.04, "learning_rate": 7.059753631182204e-05, "loss": 3.8732, "step": 26000 }, { "epoch": 4.19, "learning_rate": 6.876080161794448e-05, "loss": 3.8189, "step": 27000 }, { "epoch": 4.35, "learning_rate": 6.692222835079978e-05, "loss": 3.8169, "step": 28000 }, { "epoch": 4.5, "learning_rate": 6.508549365692223e-05, "loss": 3.8181, "step": 29000 }, { "epoch": 4.66, "learning_rate": 6.324692038977753e-05, "loss": 3.8129, "step": 30000 }, { "epoch": 4.81, "learning_rate": 6.141018569589998e-05, "loss": 3.8089, "step": 31000 }, { "epoch": 4.97, "learning_rate": 5.957161242875529e-05, "loss": 3.8025, "step": 32000 }, { "epoch": 5.0, "eval_accuracy": 0.36189854966354057, "eval_loss": 3.795483112335205, "eval_runtime": 2.0306, "eval_samples_per_second": 582.574, "eval_steps_per_second": 4.925, "step": 32195 }, { "epoch": 5.13, "learning_rate": 5.7734877734877734e-05, "loss": 3.7413, "step": 33000 }, { "epoch": 5.28, "learning_rate": 5.589630446773304e-05, "loss": 3.7338, "step": 34000 }, { "epoch": 5.44, "learning_rate": 5.405956977385549e-05, "loss": 3.7373, "step": 35000 }, { "epoch": 5.59, "learning_rate": 5.222099650671079e-05, "loss": 3.7383, "step": 36000 }, { "epoch": 5.75, "learning_rate": 5.0384261812833245e-05, "loss": 3.7345, "step": 37000 }, { "epoch": 5.9, "learning_rate": 4.854568854568855e-05, "loss": 3.7321, "step": 38000 }, { "epoch": 6.0, "eval_accuracy": 0.36563255036907366, "eval_loss": 3.7517600059509277, "eval_runtime": 2.0166, "eval_samples_per_second": 586.638, "eval_steps_per_second": 4.959, "step": 38634 }, { "epoch": 6.06, "learning_rate": 4.6708953851811e-05, "loss": 3.7007, "step": 39000 }, { "epoch": 6.21, "learning_rate": 4.4870380584666296e-05, "loss": 3.6644, "step": 40000 }, { "epoch": 6.37, "learning_rate": 4.303364589078875e-05, "loss": 3.6716, "step": 41000 }, { "epoch": 6.52, "learning_rate": 4.119507262364405e-05, "loss": 3.6729, "step": 42000 }, { "epoch": 6.68, "learning_rate": 3.9358337929766504e-05, "loss": 3.6723, "step": 43000 }, { "epoch": 6.83, "learning_rate": 3.751976466262181e-05, "loss": 3.6717, "step": 44000 }, { "epoch": 6.99, "learning_rate": 3.568302996874426e-05, "loss": 3.6704, "step": 45000 }, { "epoch": 7.0, "eval_accuracy": 0.37023182753043443, "eval_loss": 3.7216103076934814, "eval_runtime": 2.024, "eval_samples_per_second": 584.495, "eval_steps_per_second": 4.941, "step": 45073 }, { "epoch": 7.14, "learning_rate": 3.3844456701599556e-05, "loss": 3.6092, "step": 46000 }, { "epoch": 7.3, "learning_rate": 3.200772200772201e-05, "loss": 3.6111, "step": 47000 }, { "epoch": 7.45, "learning_rate": 3.0169148740577315e-05, "loss": 3.6175, "step": 48000 }, { "epoch": 7.61, "learning_rate": 2.8332414046699764e-05, "loss": 3.6178, "step": 49000 }, { "epoch": 7.77, "learning_rate": 2.6493840779555067e-05, "loss": 3.6179, "step": 50000 }, { "epoch": 7.92, "learning_rate": 2.4657106085677516e-05, "loss": 3.6185, "step": 51000 }, { "epoch": 8.0, "eval_accuracy": 0.372614665770329, "eval_loss": 3.6979215145111084, "eval_runtime": 2.0125, "eval_samples_per_second": 587.82, "eval_steps_per_second": 4.969, "step": 51512 }, { "epoch": 8.08, "learning_rate": 2.281853281853282e-05, "loss": 3.5893, "step": 52000 }, { "epoch": 8.23, "learning_rate": 2.0981798124655268e-05, "loss": 3.5628, "step": 53000 }, { "epoch": 8.39, "learning_rate": 1.914322485751057e-05, "loss": 3.5694, "step": 54000 }, { "epoch": 8.54, "learning_rate": 1.730649016363302e-05, "loss": 3.57, "step": 55000 }, { "epoch": 8.7, "learning_rate": 1.5467916896488326e-05, "loss": 3.5698, "step": 56000 }, { "epoch": 8.85, "learning_rate": 1.3631182202610776e-05, "loss": 3.5692, "step": 57000 }, { "epoch": 9.0, "eval_accuracy": 0.3745981456459954, "eval_loss": 3.681605100631714, "eval_runtime": 2.023, "eval_samples_per_second": 584.781, "eval_steps_per_second": 4.943, "step": 57951 }, { "epoch": 9.01, "learning_rate": 1.1792608935466078e-05, "loss": 3.5682, "step": 58000 }, { "epoch": 9.16, "learning_rate": 9.955874241588528e-06, "loss": 3.5273, "step": 59000 }, { "epoch": 9.32, "learning_rate": 8.117300974443832e-06, "loss": 3.5306, "step": 60000 }, { "epoch": 9.47, "learning_rate": 6.2805662805662815e-06, "loss": 3.5293, "step": 61000 }, { "epoch": 9.63, "learning_rate": 4.441993013421585e-06, "loss": 3.529, "step": 62000 }, { "epoch": 9.78, "learning_rate": 2.605258319544034e-06, "loss": 3.5288, "step": 63000 }, { "epoch": 9.94, "learning_rate": 7.666850523993381e-07, "loss": 3.5276, "step": 64000 }, { "epoch": 10.0, "eval_accuracy": 0.3759892439480568, "eval_loss": 3.674250841140747, "eval_runtime": 2.0424, "eval_samples_per_second": 579.222, "eval_steps_per_second": 4.896, "step": 64390 }, { "epoch": 10.0, "step": 64390, "total_flos": 5.3836218335232e+17, "train_loss": 4.00796840269015, "train_runtime": 29832.6906, "train_samples_per_second": 276.259, "train_steps_per_second": 2.158 } ], "max_steps": 64390, "num_train_epochs": 10, "total_flos": 5.3836218335232e+17, "trial_name": null, "trial_params": null }