| { |
| "best_metric": 3.7814266681671143, |
| "best_model_checkpoint": "models/GPT2_within_boundary_42/checkpoint-64390", |
| "epoch": 10.0, |
| "global_step": 64390, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.16, |
| "learning_rate": 1e-05, |
| "loss": 7.6113, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 2e-05, |
| "loss": 6.1925, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 3e-05, |
| "loss": 5.7568, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 4e-05, |
| "loss": 5.4549, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 5e-05, |
| "loss": 5.2188, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 6e-05, |
| "loss": 5.0339, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.27648910750061567, |
| "eval_loss": 4.81440544128418, |
| "eval_runtime": 2.0217, |
| "eval_samples_per_second": 585.141, |
| "eval_steps_per_second": 4.946, |
| "step": 6439 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 7e-05, |
| "loss": 4.875, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 8e-05, |
| "loss": 4.7442, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 9e-05, |
| "loss": 4.642, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 0.0001, |
| "loss": 4.5539, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 9.816326530612245e-05, |
| "loss": 4.4768, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 9.632469203897775e-05, |
| "loss": 4.4123, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.3180024094621308, |
| "eval_loss": 4.2657365798950195, |
| "eval_runtime": 1.9844, |
| "eval_samples_per_second": 596.14, |
| "eval_steps_per_second": 5.039, |
| "step": 12878 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 9.448795734510021e-05, |
| "loss": 4.3514, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 9.264938407795552e-05, |
| "loss": 4.2724, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 9.081264938407796e-05, |
| "loss": 4.2442, |
| "step": 15000 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 8.897407611693326e-05, |
| "loss": 4.213, |
| "step": 16000 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 8.713734142305572e-05, |
| "loss": 4.1873, |
| "step": 17000 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 8.529876815591101e-05, |
| "loss": 4.1641, |
| "step": 18000 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 8.346203346203346e-05, |
| "loss": 4.1405, |
| "step": 19000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.3367855645263277, |
| "eval_loss": 4.068915367126465, |
| "eval_runtime": 2.0079, |
| "eval_samples_per_second": 589.181, |
| "eval_steps_per_second": 4.98, |
| "step": 19317 |
| }, |
| { |
| "epoch": 3.11, |
| "learning_rate": 8.162346019488876e-05, |
| "loss": 4.0802, |
| "step": 20000 |
| }, |
| { |
| "epoch": 3.26, |
| "learning_rate": 7.978672550101122e-05, |
| "loss": 4.0519, |
| "step": 21000 |
| }, |
| { |
| "epoch": 3.42, |
| "learning_rate": 7.794815223386652e-05, |
| "loss": 4.044, |
| "step": 22000 |
| }, |
| { |
| "epoch": 3.57, |
| "learning_rate": 7.611141753998897e-05, |
| "loss": 4.0323, |
| "step": 23000 |
| }, |
| { |
| "epoch": 3.73, |
| "learning_rate": 7.427284427284427e-05, |
| "loss": 4.0222, |
| "step": 24000 |
| }, |
| { |
| "epoch": 3.88, |
| "learning_rate": 7.243610957896673e-05, |
| "loss": 4.011, |
| "step": 25000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.34676952363203123, |
| "eval_loss": 3.9678547382354736, |
| "eval_runtime": 1.9888, |
| "eval_samples_per_second": 594.835, |
| "eval_steps_per_second": 5.028, |
| "step": 25756 |
| }, |
| { |
| "epoch": 4.04, |
| "learning_rate": 7.059753631182204e-05, |
| "loss": 3.9845, |
| "step": 26000 |
| }, |
| { |
| "epoch": 4.19, |
| "learning_rate": 6.876080161794448e-05, |
| "loss": 3.9294, |
| "step": 27000 |
| }, |
| { |
| "epoch": 4.35, |
| "learning_rate": 6.692222835079978e-05, |
| "loss": 3.9276, |
| "step": 28000 |
| }, |
| { |
| "epoch": 4.5, |
| "learning_rate": 6.508549365692223e-05, |
| "loss": 3.9279, |
| "step": 29000 |
| }, |
| { |
| "epoch": 4.66, |
| "learning_rate": 6.324692038977753e-05, |
| "loss": 3.9227, |
| "step": 30000 |
| }, |
| { |
| "epoch": 4.81, |
| "learning_rate": 6.141018569589998e-05, |
| "loss": 3.9178, |
| "step": 31000 |
| }, |
| { |
| "epoch": 4.97, |
| "learning_rate": 5.957161242875529e-05, |
| "loss": 3.9116, |
| "step": 32000 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.35384482265160644, |
| "eval_loss": 3.906778335571289, |
| "eval_runtime": 1.989, |
| "eval_samples_per_second": 594.762, |
| "eval_steps_per_second": 5.028, |
| "step": 32195 |
| }, |
| { |
| "epoch": 5.13, |
| "learning_rate": 5.7734877734877734e-05, |
| "loss": 3.8501, |
| "step": 33000 |
| }, |
| { |
| "epoch": 5.28, |
| "learning_rate": 5.589630446773304e-05, |
| "loss": 3.8422, |
| "step": 34000 |
| }, |
| { |
| "epoch": 5.44, |
| "learning_rate": 5.405956977385549e-05, |
| "loss": 3.8456, |
| "step": 35000 |
| }, |
| { |
| "epoch": 5.59, |
| "learning_rate": 5.222099650671079e-05, |
| "loss": 3.8461, |
| "step": 36000 |
| }, |
| { |
| "epoch": 5.75, |
| "learning_rate": 5.0384261812833245e-05, |
| "loss": 3.8422, |
| "step": 37000 |
| }, |
| { |
| "epoch": 5.9, |
| "learning_rate": 4.854568854568855e-05, |
| "loss": 3.84, |
| "step": 38000 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.35970873463302294, |
| "eval_loss": 3.859607696533203, |
| "eval_runtime": 1.9868, |
| "eval_samples_per_second": 595.443, |
| "eval_steps_per_second": 5.033, |
| "step": 38634 |
| }, |
| { |
| "epoch": 6.06, |
| "learning_rate": 4.6708953851811e-05, |
| "loss": 3.8083, |
| "step": 39000 |
| }, |
| { |
| "epoch": 6.21, |
| "learning_rate": 4.4870380584666296e-05, |
| "loss": 3.7712, |
| "step": 40000 |
| }, |
| { |
| "epoch": 6.37, |
| "learning_rate": 4.303364589078875e-05, |
| "loss": 3.7783, |
| "step": 41000 |
| }, |
| { |
| "epoch": 6.52, |
| "learning_rate": 4.119507262364405e-05, |
| "loss": 3.7797, |
| "step": 42000 |
| }, |
| { |
| "epoch": 6.68, |
| "learning_rate": 3.9358337929766504e-05, |
| "loss": 3.7792, |
| "step": 43000 |
| }, |
| { |
| "epoch": 6.83, |
| "learning_rate": 3.751976466262181e-05, |
| "loss": 3.7777, |
| "step": 44000 |
| }, |
| { |
| "epoch": 6.99, |
| "learning_rate": 3.568302996874426e-05, |
| "loss": 3.7769, |
| "step": 45000 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.3633562076929733, |
| "eval_loss": 3.8291213512420654, |
| "eval_runtime": 1.9804, |
| "eval_samples_per_second": 597.34, |
| "eval_steps_per_second": 5.049, |
| "step": 45073 |
| }, |
| { |
| "epoch": 7.14, |
| "learning_rate": 3.3844456701599556e-05, |
| "loss": 3.715, |
| "step": 46000 |
| }, |
| { |
| "epoch": 7.3, |
| "learning_rate": 3.200772200772201e-05, |
| "loss": 3.7167, |
| "step": 47000 |
| }, |
| { |
| "epoch": 7.45, |
| "learning_rate": 3.0169148740577315e-05, |
| "loss": 3.7231, |
| "step": 48000 |
| }, |
| { |
| "epoch": 7.61, |
| "learning_rate": 2.8332414046699764e-05, |
| "loss": 3.7237, |
| "step": 49000 |
| }, |
| { |
| "epoch": 7.77, |
| "learning_rate": 2.6493840779555067e-05, |
| "loss": 3.7234, |
| "step": 50000 |
| }, |
| { |
| "epoch": 7.92, |
| "learning_rate": 2.4657106085677516e-05, |
| "loss": 3.7239, |
| "step": 51000 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.3666043223886955, |
| "eval_loss": 3.806194543838501, |
| "eval_runtime": 1.9824, |
| "eval_samples_per_second": 596.737, |
| "eval_steps_per_second": 5.044, |
| "step": 51512 |
| }, |
| { |
| "epoch": 8.08, |
| "learning_rate": 2.281853281853282e-05, |
| "loss": 3.6947, |
| "step": 52000 |
| }, |
| { |
| "epoch": 8.23, |
| "learning_rate": 2.0981798124655268e-05, |
| "loss": 3.668, |
| "step": 53000 |
| }, |
| { |
| "epoch": 8.39, |
| "learning_rate": 1.914322485751057e-05, |
| "loss": 3.6744, |
| "step": 54000 |
| }, |
| { |
| "epoch": 8.54, |
| "learning_rate": 1.730649016363302e-05, |
| "loss": 3.6748, |
| "step": 55000 |
| }, |
| { |
| "epoch": 8.7, |
| "learning_rate": 1.5467916896488326e-05, |
| "loss": 3.6747, |
| "step": 56000 |
| }, |
| { |
| "epoch": 8.85, |
| "learning_rate": 1.3631182202610776e-05, |
| "loss": 3.6741, |
| "step": 57000 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.36866767393720756, |
| "eval_loss": 3.7892143726348877, |
| "eval_runtime": 1.9796, |
| "eval_samples_per_second": 597.605, |
| "eval_steps_per_second": 5.052, |
| "step": 57951 |
| }, |
| { |
| "epoch": 9.01, |
| "learning_rate": 1.1792608935466078e-05, |
| "loss": 3.6731, |
| "step": 58000 |
| }, |
| { |
| "epoch": 9.16, |
| "learning_rate": 9.955874241588528e-06, |
| "loss": 3.6324, |
| "step": 59000 |
| }, |
| { |
| "epoch": 9.32, |
| "learning_rate": 8.117300974443832e-06, |
| "loss": 3.6352, |
| "step": 60000 |
| }, |
| { |
| "epoch": 9.47, |
| "learning_rate": 6.2805662805662815e-06, |
| "loss": 3.6333, |
| "step": 61000 |
| }, |
| { |
| "epoch": 9.63, |
| "learning_rate": 4.441993013421585e-06, |
| "loss": 3.6333, |
| "step": 62000 |
| }, |
| { |
| "epoch": 9.78, |
| "learning_rate": 2.605258319544034e-06, |
| "loss": 3.6329, |
| "step": 63000 |
| }, |
| { |
| "epoch": 9.94, |
| "learning_rate": 7.666850523993381e-07, |
| "loss": 3.632, |
| "step": 64000 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.37035829101244, |
| "eval_loss": 3.7814266681671143, |
| "eval_runtime": 1.9839, |
| "eval_samples_per_second": 596.29, |
| "eval_steps_per_second": 5.04, |
| "step": 64390 |
| }, |
| { |
| "epoch": 10.0, |
| "step": 64390, |
| "total_flos": 5.3836218335232e+17, |
| "train_loss": 4.117447375732839, |
| "train_runtime": 28248.9117, |
| "train_samples_per_second": 291.747, |
| "train_steps_per_second": 2.279 |
| } |
| ], |
| "max_steps": 64390, |
| "num_train_epochs": 10, |
| "total_flos": 5.3836218335232e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|