| { |
| "best_metric": 3.780226469039917, |
| "best_model_checkpoint": "models/GPT2_within_boundary_53/checkpoint-64390", |
| "epoch": 10.0, |
| "global_step": 64390, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.16, |
| "learning_rate": 1e-05, |
| "loss": 7.5787, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 2e-05, |
| "loss": 6.1944, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 3e-05, |
| "loss": 5.7613, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 4e-05, |
| "loss": 5.4544, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 5e-05, |
| "loss": 5.2199, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 6e-05, |
| "loss": 5.0366, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.274592155270532, |
| "eval_loss": 4.82023811340332, |
| "eval_runtime": 1.9746, |
| "eval_samples_per_second": 599.107, |
| "eval_steps_per_second": 5.064, |
| "step": 6439 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 7e-05, |
| "loss": 4.8755, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 8e-05, |
| "loss": 4.747, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 9e-05, |
| "loss": 4.6445, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 0.0001, |
| "loss": 4.5579, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 9.816326530612245e-05, |
| "loss": 4.481, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 9.632469203897775e-05, |
| "loss": 4.4144, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.3175963951251656, |
| "eval_loss": 4.273038864135742, |
| "eval_runtime": 2.0074, |
| "eval_samples_per_second": 589.311, |
| "eval_steps_per_second": 4.981, |
| "step": 12878 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 9.448795734510021e-05, |
| "loss": 4.3554, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 9.264938407795552e-05, |
| "loss": 4.2769, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 9.081264938407796e-05, |
| "loss": 4.2467, |
| "step": 15000 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 8.897407611693326e-05, |
| "loss": 4.2188, |
| "step": 16000 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 8.713734142305572e-05, |
| "loss": 4.1913, |
| "step": 17000 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 8.529876815591101e-05, |
| "loss": 4.1693, |
| "step": 18000 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 8.346203346203346e-05, |
| "loss": 4.1479, |
| "step": 19000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.33655260548052796, |
| "eval_loss": 4.074155807495117, |
| "eval_runtime": 2.0077, |
| "eval_samples_per_second": 589.223, |
| "eval_steps_per_second": 4.981, |
| "step": 19317 |
| }, |
| { |
| "epoch": 3.11, |
| "learning_rate": 8.162346019488876e-05, |
| "loss": 4.0822, |
| "step": 20000 |
| }, |
| { |
| "epoch": 3.26, |
| "learning_rate": 7.978672550101122e-05, |
| "loss": 4.0563, |
| "step": 21000 |
| }, |
| { |
| "epoch": 3.42, |
| "learning_rate": 7.794815223386652e-05, |
| "loss": 4.0468, |
| "step": 22000 |
| }, |
| { |
| "epoch": 3.57, |
| "learning_rate": 7.611141753998897e-05, |
| "loss": 4.0367, |
| "step": 23000 |
| }, |
| { |
| "epoch": 3.73, |
| "learning_rate": 7.427284427284427e-05, |
| "loss": 4.0278, |
| "step": 24000 |
| }, |
| { |
| "epoch": 3.88, |
| "learning_rate": 7.243610957896673e-05, |
| "loss": 4.0152, |
| "step": 25000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.34734193728742485, |
| "eval_loss": 3.9701826572418213, |
| "eval_runtime": 1.991, |
| "eval_samples_per_second": 594.178, |
| "eval_steps_per_second": 5.023, |
| "step": 25756 |
| }, |
| { |
| "epoch": 4.04, |
| "learning_rate": 7.059753631182204e-05, |
| "loss": 3.987, |
| "step": 26000 |
| }, |
| { |
| "epoch": 4.19, |
| "learning_rate": 6.876080161794448e-05, |
| "loss": 3.9326, |
| "step": 27000 |
| }, |
| { |
| "epoch": 4.35, |
| "learning_rate": 6.692222835079978e-05, |
| "loss": 3.934, |
| "step": 28000 |
| }, |
| { |
| "epoch": 4.5, |
| "learning_rate": 6.508549365692223e-05, |
| "loss": 3.9308, |
| "step": 29000 |
| }, |
| { |
| "epoch": 4.66, |
| "learning_rate": 6.324692038977753e-05, |
| "loss": 3.9249, |
| "step": 30000 |
| }, |
| { |
| "epoch": 4.81, |
| "learning_rate": 6.141018569589998e-05, |
| "loss": 3.9216, |
| "step": 31000 |
| }, |
| { |
| "epoch": 4.97, |
| "learning_rate": 5.957161242875529e-05, |
| "loss": 3.9157, |
| "step": 32000 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.35449710797984574, |
| "eval_loss": 3.9046249389648438, |
| "eval_runtime": 1.9871, |
| "eval_samples_per_second": 595.334, |
| "eval_steps_per_second": 5.032, |
| "step": 32195 |
| }, |
| { |
| "epoch": 5.13, |
| "learning_rate": 5.7734877734877734e-05, |
| "loss": 3.8528, |
| "step": 33000 |
| }, |
| { |
| "epoch": 5.28, |
| "learning_rate": 5.589630446773304e-05, |
| "loss": 3.8457, |
| "step": 34000 |
| }, |
| { |
| "epoch": 5.44, |
| "learning_rate": 5.405956977385549e-05, |
| "loss": 3.8486, |
| "step": 35000 |
| }, |
| { |
| "epoch": 5.59, |
| "learning_rate": 5.222099650671079e-05, |
| "loss": 3.8467, |
| "step": 36000 |
| }, |
| { |
| "epoch": 5.75, |
| "learning_rate": 5.0384261812833245e-05, |
| "loss": 3.8475, |
| "step": 37000 |
| }, |
| { |
| "epoch": 5.9, |
| "learning_rate": 4.854568854568855e-05, |
| "loss": 3.8427, |
| "step": 38000 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.35875693053161256, |
| "eval_loss": 3.8616793155670166, |
| "eval_runtime": 1.9889, |
| "eval_samples_per_second": 594.807, |
| "eval_steps_per_second": 5.028, |
| "step": 38634 |
| }, |
| { |
| "epoch": 6.06, |
| "learning_rate": 4.6708953851811e-05, |
| "loss": 3.8151, |
| "step": 39000 |
| }, |
| { |
| "epoch": 6.21, |
| "learning_rate": 4.4870380584666296e-05, |
| "loss": 3.7762, |
| "step": 40000 |
| }, |
| { |
| "epoch": 6.37, |
| "learning_rate": 4.303364589078875e-05, |
| "loss": 3.779, |
| "step": 41000 |
| }, |
| { |
| "epoch": 6.52, |
| "learning_rate": 4.119507262364405e-05, |
| "loss": 3.7826, |
| "step": 42000 |
| }, |
| { |
| "epoch": 6.68, |
| "learning_rate": 3.9358337929766504e-05, |
| "loss": 3.7827, |
| "step": 43000 |
| }, |
| { |
| "epoch": 6.83, |
| "learning_rate": 3.751976466262181e-05, |
| "loss": 3.7814, |
| "step": 44000 |
| }, |
| { |
| "epoch": 6.99, |
| "learning_rate": 3.568302996874426e-05, |
| "loss": 3.7786, |
| "step": 45000 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.363882029539207, |
| "eval_loss": 3.8290469646453857, |
| "eval_runtime": 1.9757, |
| "eval_samples_per_second": 598.776, |
| "eval_steps_per_second": 5.062, |
| "step": 45073 |
| }, |
| { |
| "epoch": 7.14, |
| "learning_rate": 3.3844456701599556e-05, |
| "loss": 3.7187, |
| "step": 46000 |
| }, |
| { |
| "epoch": 7.3, |
| "learning_rate": 3.200772200772201e-05, |
| "loss": 3.7211, |
| "step": 47000 |
| }, |
| { |
| "epoch": 7.45, |
| "learning_rate": 3.0169148740577315e-05, |
| "loss": 3.726, |
| "step": 48000 |
| }, |
| { |
| "epoch": 7.61, |
| "learning_rate": 2.8332414046699764e-05, |
| "loss": 3.7267, |
| "step": 49000 |
| }, |
| { |
| "epoch": 7.77, |
| "learning_rate": 2.6493840779555067e-05, |
| "loss": 3.7259, |
| "step": 50000 |
| }, |
| { |
| "epoch": 7.92, |
| "learning_rate": 2.4657106085677516e-05, |
| "loss": 3.7255, |
| "step": 51000 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.366138404297096, |
| "eval_loss": 3.804433584213257, |
| "eval_runtime": 1.9904, |
| "eval_samples_per_second": 594.35, |
| "eval_steps_per_second": 5.024, |
| "step": 51512 |
| }, |
| { |
| "epoch": 8.08, |
| "learning_rate": 2.281853281853282e-05, |
| "loss": 3.6986, |
| "step": 52000 |
| }, |
| { |
| "epoch": 8.23, |
| "learning_rate": 2.0981798124655268e-05, |
| "loss": 3.674, |
| "step": 53000 |
| }, |
| { |
| "epoch": 8.39, |
| "learning_rate": 1.914322485751057e-05, |
| "loss": 3.677, |
| "step": 54000 |
| }, |
| { |
| "epoch": 8.54, |
| "learning_rate": 1.730649016363302e-05, |
| "loss": 3.6769, |
| "step": 55000 |
| }, |
| { |
| "epoch": 8.7, |
| "learning_rate": 1.5467916896488326e-05, |
| "loss": 3.6768, |
| "step": 56000 |
| }, |
| { |
| "epoch": 8.85, |
| "learning_rate": 1.3631182202610776e-05, |
| "loss": 3.6782, |
| "step": 57000 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.36906703230143567, |
| "eval_loss": 3.78792405128479, |
| "eval_runtime": 1.9815, |
| "eval_samples_per_second": 597.016, |
| "eval_steps_per_second": 5.047, |
| "step": 57951 |
| }, |
| { |
| "epoch": 9.01, |
| "learning_rate": 1.1792608935466078e-05, |
| "loss": 3.6742, |
| "step": 58000 |
| }, |
| { |
| "epoch": 9.16, |
| "learning_rate": 9.955874241588528e-06, |
| "loss": 3.6357, |
| "step": 59000 |
| }, |
| { |
| "epoch": 9.32, |
| "learning_rate": 8.117300974443832e-06, |
| "loss": 3.6375, |
| "step": 60000 |
| }, |
| { |
| "epoch": 9.47, |
| "learning_rate": 6.2805662805662815e-06, |
| "loss": 3.6355, |
| "step": 61000 |
| }, |
| { |
| "epoch": 9.63, |
| "learning_rate": 4.441993013421585e-06, |
| "loss": 3.6365, |
| "step": 62000 |
| }, |
| { |
| "epoch": 9.78, |
| "learning_rate": 2.605258319544034e-06, |
| "loss": 3.638, |
| "step": 63000 |
| }, |
| { |
| "epoch": 9.94, |
| "learning_rate": 7.666850523993381e-07, |
| "loss": 3.6357, |
| "step": 64000 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.3704181947670742, |
| "eval_loss": 3.780226469039917, |
| "eval_runtime": 1.9875, |
| "eval_samples_per_second": 595.233, |
| "eval_steps_per_second": 5.032, |
| "step": 64390 |
| }, |
| { |
| "epoch": 10.0, |
| "step": 64390, |
| "total_flos": 5.3836218335232e+17, |
| "train_loss": 4.120190780409783, |
| "train_runtime": 28241.4191, |
| "train_samples_per_second": 291.825, |
| "train_steps_per_second": 2.28 |
| } |
| ], |
| "max_steps": 64390, |
| "num_train_epochs": 10, |
| "total_flos": 5.3836218335232e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|