| { | |
| "best_metric": 3.674802303314209, | |
| "best_model_checkpoint": "models/GPT2_natural_function_53/checkpoint-64390", | |
| "epoch": 10.0, | |
| "global_step": 64390, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1e-05, | |
| "loss": 7.5304, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 2e-05, | |
| "loss": 6.092, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 3e-05, | |
| "loss": 5.6571, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4e-05, | |
| "loss": 5.3448, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 5e-05, | |
| "loss": 5.1037, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 6e-05, | |
| "loss": 4.9154, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.2904666502486006, | |
| "eval_loss": 4.692026615142822, | |
| "eval_runtime": 2.0379, | |
| "eval_samples_per_second": 580.51, | |
| "eval_steps_per_second": 4.907, | |
| "step": 6439 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 7e-05, | |
| "loss": 4.7488, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 8e-05, | |
| "loss": 4.6188, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 9e-05, | |
| "loss": 4.5168, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 0.0001, | |
| "loss": 4.4318, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 9.816326530612245e-05, | |
| "loss": 4.3561, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 9.632469203897775e-05, | |
| "loss": 4.2909, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.32767353784918896, | |
| "eval_loss": 4.149644374847412, | |
| "eval_runtime": 2.021, | |
| "eval_samples_per_second": 585.348, | |
| "eval_steps_per_second": 4.948, | |
| "step": 12878 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 9.448795734510021e-05, | |
| "loss": 4.2328, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 9.264938407795552e-05, | |
| "loss": 4.1551, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 9.081264938407796e-05, | |
| "loss": 4.1258, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 8.897407611693326e-05, | |
| "loss": 4.0986, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 8.713734142305572e-05, | |
| "loss": 4.0723, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 8.529876815591101e-05, | |
| "loss": 4.0509, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 8.346203346203346e-05, | |
| "loss": 4.0302, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.34635019734959166, | |
| "eval_loss": 3.9564931392669678, | |
| "eval_runtime": 2.0173, | |
| "eval_samples_per_second": 586.417, | |
| "eval_steps_per_second": 4.957, | |
| "step": 19317 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 8.162346019488876e-05, | |
| "loss": 3.965, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 7.978672550101122e-05, | |
| "loss": 3.9397, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 7.794815223386652e-05, | |
| "loss": 3.9311, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 7.611141753998897e-05, | |
| "loss": 3.9215, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 7.427284427284427e-05, | |
| "loss": 3.9134, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 7.243610957896673e-05, | |
| "loss": 3.9006, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.3559081742001185, | |
| "eval_loss": 3.8566830158233643, | |
| "eval_runtime": 2.0115, | |
| "eval_samples_per_second": 588.109, | |
| "eval_steps_per_second": 4.971, | |
| "step": 25756 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 7.059753631182204e-05, | |
| "loss": 3.8738, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 6.876080161794448e-05, | |
| "loss": 3.8194, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 6.692222835079978e-05, | |
| "loss": 3.8212, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 6.508549365692223e-05, | |
| "loss": 3.8184, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 6.324692038977753e-05, | |
| "loss": 3.8122, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 6.141018569589998e-05, | |
| "loss": 3.8097, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 5.957161242875529e-05, | |
| "loss": 3.8039, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.3622513162186088, | |
| "eval_loss": 3.794768810272217, | |
| "eval_runtime": 1.9964, | |
| "eval_samples_per_second": 592.559, | |
| "eval_steps_per_second": 5.009, | |
| "step": 32195 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "learning_rate": 5.7734877734877734e-05, | |
| "loss": 3.7419, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 5.589630446773304e-05, | |
| "loss": 3.7348, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 5.405956977385549e-05, | |
| "loss": 3.7377, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 5.222099650671079e-05, | |
| "loss": 3.7365, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 5.0384261812833245e-05, | |
| "loss": 3.7376, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "learning_rate": 4.854568854568855e-05, | |
| "loss": 3.7327, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.36671081795248966, | |
| "eval_loss": 3.752819061279297, | |
| "eval_runtime": 2.015, | |
| "eval_samples_per_second": 587.092, | |
| "eval_steps_per_second": 4.963, | |
| "step": 38634 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 4.6708953851811e-05, | |
| "loss": 3.7048, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 4.4870380584666296e-05, | |
| "loss": 3.6664, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "learning_rate": 4.303364589078875e-05, | |
| "loss": 3.6704, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 4.119507262364405e-05, | |
| "loss": 3.6736, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "learning_rate": 3.9358337929766504e-05, | |
| "loss": 3.6738, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 3.752160323588895e-05, | |
| "loss": 3.6732, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 3.568302996874426e-05, | |
| "loss": 3.6704, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.37029173128506865, | |
| "eval_loss": 3.7217955589294434, | |
| "eval_runtime": 2.0227, | |
| "eval_samples_per_second": 584.848, | |
| "eval_steps_per_second": 4.944, | |
| "step": 45073 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 3.3844456701599556e-05, | |
| "loss": 3.6105, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 3.2005883434454865e-05, | |
| "loss": 3.6132, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 3.0169148740577315e-05, | |
| "loss": 3.6177, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "learning_rate": 2.8330575473432618e-05, | |
| "loss": 3.6194, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "learning_rate": 2.6493840779555067e-05, | |
| "loss": 3.6186, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "learning_rate": 2.465526751241037e-05, | |
| "loss": 3.618, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.3727411292523346, | |
| "eval_loss": 3.6992549896240234, | |
| "eval_runtime": 2.0077, | |
| "eval_samples_per_second": 589.224, | |
| "eval_steps_per_second": 4.981, | |
| "step": 51512 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "learning_rate": 2.281853281853282e-05, | |
| "loss": 3.5905, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "learning_rate": 2.0979959551388122e-05, | |
| "loss": 3.5663, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 1.914322485751057e-05, | |
| "loss": 3.5696, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 8.54, | |
| "learning_rate": 1.7304651590365877e-05, | |
| "loss": 3.5698, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "learning_rate": 1.5467916896488326e-05, | |
| "loss": 3.5701, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "learning_rate": 1.362934362934363e-05, | |
| "loss": 3.5713, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.3757496289295199, | |
| "eval_loss": 3.6823694705963135, | |
| "eval_runtime": 2.0391, | |
| "eval_samples_per_second": 580.153, | |
| "eval_steps_per_second": 4.904, | |
| "step": 57951 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "learning_rate": 1.1792608935466078e-05, | |
| "loss": 3.567, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 9.16, | |
| "learning_rate": 9.954035668321383e-06, | |
| "loss": 3.5291, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 9.32, | |
| "learning_rate": 8.117300974443832e-06, | |
| "loss": 3.5312, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 9.47, | |
| "learning_rate": 6.278727707299137e-06, | |
| "loss": 3.5291, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "learning_rate": 4.441993013421585e-06, | |
| "loss": 3.5297, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 9.78, | |
| "learning_rate": 2.6034197462768893e-06, | |
| "loss": 3.531, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 9.94, | |
| "learning_rate": 7.666850523993381e-07, | |
| "loss": 3.5291, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.37667480913998175, | |
| "eval_loss": 3.674802303314209, | |
| "eval_runtime": 2.0038, | |
| "eval_samples_per_second": 590.388, | |
| "eval_steps_per_second": 4.991, | |
| "step": 64390 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 64390, | |
| "total_flos": 5.3836218335232e+17, | |
| "train_loss": 4.0085866237023655, | |
| "train_runtime": 29974.7216, | |
| "train_samples_per_second": 274.95, | |
| "train_steps_per_second": 2.148 | |
| } | |
| ], | |
| "max_steps": 64390, | |
| "num_train_epochs": 10, | |
| "total_flos": 5.3836218335232e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |