| { | |
| "best_metric": 3.674250841140747, | |
| "best_model_checkpoint": "models/GPT2_natural_function_42/checkpoint-64390", | |
| "epoch": 10.0, | |
| "global_step": 64390, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1e-05, | |
| "loss": 7.5617, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 2e-05, | |
| "loss": 6.0924, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 3e-05, | |
| "loss": 5.6562, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4e-05, | |
| "loss": 5.3462, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 5e-05, | |
| "loss": 5.1032, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 6e-05, | |
| "loss": 4.9122, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.2911988072496855, | |
| "eval_loss": 4.683091640472412, | |
| "eval_runtime": 2.016, | |
| "eval_samples_per_second": 586.819, | |
| "eval_steps_per_second": 4.96, | |
| "step": 6439 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 7e-05, | |
| "loss": 4.7481, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 8e-05, | |
| "loss": 4.6153, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 9e-05, | |
| "loss": 4.5141, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 0.0001, | |
| "loss": 4.4281, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 9.816326530612245e-05, | |
| "loss": 4.3533, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 9.632469203897775e-05, | |
| "loss": 4.2912, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.32702125252094966, | |
| "eval_loss": 4.145557403564453, | |
| "eval_runtime": 2.0315, | |
| "eval_samples_per_second": 582.318, | |
| "eval_steps_per_second": 4.922, | |
| "step": 12878 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 9.448795734510021e-05, | |
| "loss": 4.2316, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 9.264938407795552e-05, | |
| "loss": 4.1534, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 9.081264938407796e-05, | |
| "loss": 4.1261, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 8.897407611693326e-05, | |
| "loss": 4.0963, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 8.713734142305572e-05, | |
| "loss": 4.0713, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 8.529876815591101e-05, | |
| "loss": 4.0484, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 8.346203346203346e-05, | |
| "loss": 4.0258, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.3454050492209184, | |
| "eval_loss": 3.9542500972747803, | |
| "eval_runtime": 2.0276, | |
| "eval_samples_per_second": 583.446, | |
| "eval_steps_per_second": 4.932, | |
| "step": 19317 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 8.162346019488876e-05, | |
| "loss": 3.9666, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 7.978672550101122e-05, | |
| "loss": 3.9382, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 7.794815223386652e-05, | |
| "loss": 3.9307, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 7.611141753998897e-05, | |
| "loss": 3.9198, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 7.427284427284427e-05, | |
| "loss": 3.9101, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 7.243610957896673e-05, | |
| "loss": 3.8998, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.35441723630700006, | |
| "eval_loss": 3.8564870357513428, | |
| "eval_runtime": 2.0237, | |
| "eval_samples_per_second": 584.57, | |
| "eval_steps_per_second": 4.941, | |
| "step": 25756 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 7.059753631182204e-05, | |
| "loss": 3.8732, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 6.876080161794448e-05, | |
| "loss": 3.8189, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 6.692222835079978e-05, | |
| "loss": 3.8169, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 6.508549365692223e-05, | |
| "loss": 3.8181, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 6.324692038977753e-05, | |
| "loss": 3.8129, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 6.141018569589998e-05, | |
| "loss": 3.8089, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 5.957161242875529e-05, | |
| "loss": 3.8025, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.36189854966354057, | |
| "eval_loss": 3.795483112335205, | |
| "eval_runtime": 2.0306, | |
| "eval_samples_per_second": 582.574, | |
| "eval_steps_per_second": 4.925, | |
| "step": 32195 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "learning_rate": 5.7734877734877734e-05, | |
| "loss": 3.7413, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 5.589630446773304e-05, | |
| "loss": 3.7338, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 5.405956977385549e-05, | |
| "loss": 3.7373, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 5.222099650671079e-05, | |
| "loss": 3.7383, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 5.0384261812833245e-05, | |
| "loss": 3.7345, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "learning_rate": 4.854568854568855e-05, | |
| "loss": 3.7321, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.36563255036907366, | |
| "eval_loss": 3.7517600059509277, | |
| "eval_runtime": 2.0166, | |
| "eval_samples_per_second": 586.638, | |
| "eval_steps_per_second": 4.959, | |
| "step": 38634 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 4.6708953851811e-05, | |
| "loss": 3.7007, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 4.4870380584666296e-05, | |
| "loss": 3.6644, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "learning_rate": 4.303364589078875e-05, | |
| "loss": 3.6716, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 4.119507262364405e-05, | |
| "loss": 3.6729, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "learning_rate": 3.9358337929766504e-05, | |
| "loss": 3.6723, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 3.751976466262181e-05, | |
| "loss": 3.6717, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 3.568302996874426e-05, | |
| "loss": 3.6704, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.37023182753043443, | |
| "eval_loss": 3.7216103076934814, | |
| "eval_runtime": 2.024, | |
| "eval_samples_per_second": 584.495, | |
| "eval_steps_per_second": 4.941, | |
| "step": 45073 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 3.3844456701599556e-05, | |
| "loss": 3.6092, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 3.200772200772201e-05, | |
| "loss": 3.6111, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 3.0169148740577315e-05, | |
| "loss": 3.6175, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "learning_rate": 2.8332414046699764e-05, | |
| "loss": 3.6178, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "learning_rate": 2.6493840779555067e-05, | |
| "loss": 3.6179, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "learning_rate": 2.4657106085677516e-05, | |
| "loss": 3.6185, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.372614665770329, | |
| "eval_loss": 3.6979215145111084, | |
| "eval_runtime": 2.0125, | |
| "eval_samples_per_second": 587.82, | |
| "eval_steps_per_second": 4.969, | |
| "step": 51512 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "learning_rate": 2.281853281853282e-05, | |
| "loss": 3.5893, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "learning_rate": 2.0981798124655268e-05, | |
| "loss": 3.5628, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 1.914322485751057e-05, | |
| "loss": 3.5694, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 8.54, | |
| "learning_rate": 1.730649016363302e-05, | |
| "loss": 3.57, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "learning_rate": 1.5467916896488326e-05, | |
| "loss": 3.5698, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "learning_rate": 1.3631182202610776e-05, | |
| "loss": 3.5692, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.3745981456459954, | |
| "eval_loss": 3.681605100631714, | |
| "eval_runtime": 2.023, | |
| "eval_samples_per_second": 584.781, | |
| "eval_steps_per_second": 4.943, | |
| "step": 57951 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "learning_rate": 1.1792608935466078e-05, | |
| "loss": 3.5682, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 9.16, | |
| "learning_rate": 9.955874241588528e-06, | |
| "loss": 3.5273, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 9.32, | |
| "learning_rate": 8.117300974443832e-06, | |
| "loss": 3.5306, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 9.47, | |
| "learning_rate": 6.2805662805662815e-06, | |
| "loss": 3.5293, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "learning_rate": 4.441993013421585e-06, | |
| "loss": 3.529, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 9.78, | |
| "learning_rate": 2.605258319544034e-06, | |
| "loss": 3.5288, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 9.94, | |
| "learning_rate": 7.666850523993381e-07, | |
| "loss": 3.5276, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.3759892439480568, | |
| "eval_loss": 3.674250841140747, | |
| "eval_runtime": 2.0424, | |
| "eval_samples_per_second": 579.222, | |
| "eval_steps_per_second": 4.896, | |
| "step": 64390 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 64390, | |
| "total_flos": 5.3836218335232e+17, | |
| "train_loss": 4.00796840269015, | |
| "train_runtime": 29832.6906, | |
| "train_samples_per_second": 276.259, | |
| "train_steps_per_second": 2.158 | |
| } | |
| ], | |
| "max_steps": 64390, | |
| "num_train_epochs": 10, | |
| "total_flos": 5.3836218335232e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |