{ "best_metric": 3.674802303314209, "best_model_checkpoint": "models/GPT2_natural_function_53/checkpoint-64390", "epoch": 10.0, "global_step": 64390, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 1e-05, "loss": 7.5304, "step": 1000 }, { "epoch": 0.31, "learning_rate": 2e-05, "loss": 6.092, "step": 2000 }, { "epoch": 0.47, "learning_rate": 3e-05, "loss": 5.6571, "step": 3000 }, { "epoch": 0.62, "learning_rate": 4e-05, "loss": 5.3448, "step": 4000 }, { "epoch": 0.78, "learning_rate": 5e-05, "loss": 5.1037, "step": 5000 }, { "epoch": 0.93, "learning_rate": 6e-05, "loss": 4.9154, "step": 6000 }, { "epoch": 1.0, "eval_accuracy": 0.2904666502486006, "eval_loss": 4.692026615142822, "eval_runtime": 2.0379, "eval_samples_per_second": 580.51, "eval_steps_per_second": 4.907, "step": 6439 }, { "epoch": 1.09, "learning_rate": 7e-05, "loss": 4.7488, "step": 7000 }, { "epoch": 1.24, "learning_rate": 8e-05, "loss": 4.6188, "step": 8000 }, { "epoch": 1.4, "learning_rate": 9e-05, "loss": 4.5168, "step": 9000 }, { "epoch": 1.55, "learning_rate": 0.0001, "loss": 4.4318, "step": 10000 }, { "epoch": 1.71, "learning_rate": 9.816326530612245e-05, "loss": 4.3561, "step": 11000 }, { "epoch": 1.86, "learning_rate": 9.632469203897775e-05, "loss": 4.2909, "step": 12000 }, { "epoch": 2.0, "eval_accuracy": 0.32767353784918896, "eval_loss": 4.149644374847412, "eval_runtime": 2.021, "eval_samples_per_second": 585.348, "eval_steps_per_second": 4.948, "step": 12878 }, { "epoch": 2.02, "learning_rate": 9.448795734510021e-05, "loss": 4.2328, "step": 13000 }, { "epoch": 2.17, "learning_rate": 9.264938407795552e-05, "loss": 4.1551, "step": 14000 }, { "epoch": 2.33, "learning_rate": 9.081264938407796e-05, "loss": 4.1258, "step": 15000 }, { "epoch": 2.48, "learning_rate": 8.897407611693326e-05, "loss": 4.0986, "step": 16000 }, { "epoch": 2.64, "learning_rate": 8.713734142305572e-05, "loss": 4.0723, "step": 17000 }, { "epoch": 2.8, "learning_rate": 8.529876815591101e-05, "loss": 4.0509, "step": 18000 }, { "epoch": 2.95, "learning_rate": 8.346203346203346e-05, "loss": 4.0302, "step": 19000 }, { "epoch": 3.0, "eval_accuracy": 0.34635019734959166, "eval_loss": 3.9564931392669678, "eval_runtime": 2.0173, "eval_samples_per_second": 586.417, "eval_steps_per_second": 4.957, "step": 19317 }, { "epoch": 3.11, "learning_rate": 8.162346019488876e-05, "loss": 3.965, "step": 20000 }, { "epoch": 3.26, "learning_rate": 7.978672550101122e-05, "loss": 3.9397, "step": 21000 }, { "epoch": 3.42, "learning_rate": 7.794815223386652e-05, "loss": 3.9311, "step": 22000 }, { "epoch": 3.57, "learning_rate": 7.611141753998897e-05, "loss": 3.9215, "step": 23000 }, { "epoch": 3.73, "learning_rate": 7.427284427284427e-05, "loss": 3.9134, "step": 24000 }, { "epoch": 3.88, "learning_rate": 7.243610957896673e-05, "loss": 3.9006, "step": 25000 }, { "epoch": 4.0, "eval_accuracy": 0.3559081742001185, "eval_loss": 3.8566830158233643, "eval_runtime": 2.0115, "eval_samples_per_second": 588.109, "eval_steps_per_second": 4.971, "step": 25756 }, { "epoch": 4.04, "learning_rate": 7.059753631182204e-05, "loss": 3.8738, "step": 26000 }, { "epoch": 4.19, "learning_rate": 6.876080161794448e-05, "loss": 3.8194, "step": 27000 }, { "epoch": 4.35, "learning_rate": 6.692222835079978e-05, "loss": 3.8212, "step": 28000 }, { "epoch": 4.5, "learning_rate": 6.508549365692223e-05, "loss": 3.8184, "step": 29000 }, { "epoch": 4.66, "learning_rate": 6.324692038977753e-05, "loss": 3.8122, "step": 30000 }, { "epoch": 4.81, "learning_rate": 6.141018569589998e-05, "loss": 3.8097, "step": 31000 }, { "epoch": 4.97, "learning_rate": 5.957161242875529e-05, "loss": 3.8039, "step": 32000 }, { "epoch": 5.0, "eval_accuracy": 0.3622513162186088, "eval_loss": 3.794768810272217, "eval_runtime": 1.9964, "eval_samples_per_second": 592.559, "eval_steps_per_second": 5.009, "step": 32195 }, { "epoch": 5.13, "learning_rate": 5.7734877734877734e-05, "loss": 3.7419, "step": 33000 }, { "epoch": 5.28, "learning_rate": 5.589630446773304e-05, "loss": 3.7348, "step": 34000 }, { "epoch": 5.44, "learning_rate": 5.405956977385549e-05, "loss": 3.7377, "step": 35000 }, { "epoch": 5.59, "learning_rate": 5.222099650671079e-05, "loss": 3.7365, "step": 36000 }, { "epoch": 5.75, "learning_rate": 5.0384261812833245e-05, "loss": 3.7376, "step": 37000 }, { "epoch": 5.9, "learning_rate": 4.854568854568855e-05, "loss": 3.7327, "step": 38000 }, { "epoch": 6.0, "eval_accuracy": 0.36671081795248966, "eval_loss": 3.752819061279297, "eval_runtime": 2.015, "eval_samples_per_second": 587.092, "eval_steps_per_second": 4.963, "step": 38634 }, { "epoch": 6.06, "learning_rate": 4.6708953851811e-05, "loss": 3.7048, "step": 39000 }, { "epoch": 6.21, "learning_rate": 4.4870380584666296e-05, "loss": 3.6664, "step": 40000 }, { "epoch": 6.37, "learning_rate": 4.303364589078875e-05, "loss": 3.6704, "step": 41000 }, { "epoch": 6.52, "learning_rate": 4.119507262364405e-05, "loss": 3.6736, "step": 42000 }, { "epoch": 6.68, "learning_rate": 3.9358337929766504e-05, "loss": 3.6738, "step": 43000 }, { "epoch": 6.83, "learning_rate": 3.752160323588895e-05, "loss": 3.6732, "step": 44000 }, { "epoch": 6.99, "learning_rate": 3.568302996874426e-05, "loss": 3.6704, "step": 45000 }, { "epoch": 7.0, "eval_accuracy": 0.37029173128506865, "eval_loss": 3.7217955589294434, "eval_runtime": 2.0227, "eval_samples_per_second": 584.848, "eval_steps_per_second": 4.944, "step": 45073 }, { "epoch": 7.14, "learning_rate": 3.3844456701599556e-05, "loss": 3.6105, "step": 46000 }, { "epoch": 7.3, "learning_rate": 3.2005883434454865e-05, "loss": 3.6132, "step": 47000 }, { "epoch": 7.45, "learning_rate": 3.0169148740577315e-05, "loss": 3.6177, "step": 48000 }, { "epoch": 7.61, "learning_rate": 2.8330575473432618e-05, "loss": 3.6194, "step": 49000 }, { "epoch": 7.77, "learning_rate": 2.6493840779555067e-05, "loss": 3.6186, "step": 50000 }, { "epoch": 7.92, "learning_rate": 2.465526751241037e-05, "loss": 3.618, "step": 51000 }, { "epoch": 8.0, "eval_accuracy": 0.3727411292523346, "eval_loss": 3.6992549896240234, "eval_runtime": 2.0077, "eval_samples_per_second": 589.224, "eval_steps_per_second": 4.981, "step": 51512 }, { "epoch": 8.08, "learning_rate": 2.281853281853282e-05, "loss": 3.5905, "step": 52000 }, { "epoch": 8.23, "learning_rate": 2.0979959551388122e-05, "loss": 3.5663, "step": 53000 }, { "epoch": 8.39, "learning_rate": 1.914322485751057e-05, "loss": 3.5696, "step": 54000 }, { "epoch": 8.54, "learning_rate": 1.7304651590365877e-05, "loss": 3.5698, "step": 55000 }, { "epoch": 8.7, "learning_rate": 1.5467916896488326e-05, "loss": 3.5701, "step": 56000 }, { "epoch": 8.85, "learning_rate": 1.362934362934363e-05, "loss": 3.5713, "step": 57000 }, { "epoch": 9.0, "eval_accuracy": 0.3757496289295199, "eval_loss": 3.6823694705963135, "eval_runtime": 2.0391, "eval_samples_per_second": 580.153, "eval_steps_per_second": 4.904, "step": 57951 }, { "epoch": 9.01, "learning_rate": 1.1792608935466078e-05, "loss": 3.567, "step": 58000 }, { "epoch": 9.16, "learning_rate": 9.954035668321383e-06, "loss": 3.5291, "step": 59000 }, { "epoch": 9.32, "learning_rate": 8.117300974443832e-06, "loss": 3.5312, "step": 60000 }, { "epoch": 9.47, "learning_rate": 6.278727707299137e-06, "loss": 3.5291, "step": 61000 }, { "epoch": 9.63, "learning_rate": 4.441993013421585e-06, "loss": 3.5297, "step": 62000 }, { "epoch": 9.78, "learning_rate": 2.6034197462768893e-06, "loss": 3.531, "step": 63000 }, { "epoch": 9.94, "learning_rate": 7.666850523993381e-07, "loss": 3.5291, "step": 64000 }, { "epoch": 10.0, "eval_accuracy": 0.37667480913998175, "eval_loss": 3.674802303314209, "eval_runtime": 2.0038, "eval_samples_per_second": 590.388, "eval_steps_per_second": 4.991, "step": 64390 }, { "epoch": 10.0, "step": 64390, "total_flos": 5.3836218335232e+17, "train_loss": 4.0085866237023655, "train_runtime": 29974.7216, "train_samples_per_second": 274.95, "train_steps_per_second": 2.148 } ], "max_steps": 64390, "num_train_epochs": 10, "total_flos": 5.3836218335232e+17, "trial_name": null, "trial_params": null }