| { |
| "best_metric": 4.0208740234375, |
| "best_model_checkpoint": "models/GPT2_five_function_random_function_53/checkpoint-64070", |
| "epoch": 10.0, |
| "global_step": 64070, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.16, |
| "learning_rate": 1e-05, |
| "loss": 7.2283, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 2e-05, |
| "loss": 6.0277, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 3e-05, |
| "loss": 5.7267, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 4e-05, |
| "loss": 5.4767, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 5e-05, |
| "loss": 5.277, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 6e-05, |
| "loss": 5.114, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.19812708046468724, |
| "eval_loss": 4.937533378601074, |
| "eval_runtime": 2.0754, |
| "eval_samples_per_second": 567.592, |
| "eval_steps_per_second": 4.818, |
| "step": 6407 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 7e-05, |
| "loss": 4.9697, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 8e-05, |
| "loss": 4.8582, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 9e-05, |
| "loss": 4.7675, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 0.0001, |
| "loss": 4.6939, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 9.815239504346218e-05, |
| "loss": 4.6255, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 9.630294063251342e-05, |
| "loss": 4.569, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.229262195366496, |
| "eval_loss": 4.452399253845215, |
| "eval_runtime": 2.1016, |
| "eval_samples_per_second": 560.526, |
| "eval_steps_per_second": 4.758, |
| "step": 12814 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 9.445718513038653e-05, |
| "loss": 4.5155, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 9.260773071943777e-05, |
| "loss": 4.447, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 9.0758276308489e-05, |
| "loss": 4.4207, |
| "step": 15000 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 8.890882189754023e-05, |
| "loss": 4.3963, |
| "step": 16000 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 8.70612169410024e-05, |
| "loss": 4.3706, |
| "step": 17000 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 8.521176253005363e-05, |
| "loss": 4.3528, |
| "step": 18000 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 8.336415757351582e-05, |
| "loss": 4.3333, |
| "step": 19000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.2426440116038127, |
| "eval_loss": 4.2776312828063965, |
| "eval_runtime": 2.0821, |
| "eval_samples_per_second": 565.785, |
| "eval_steps_per_second": 4.803, |
| "step": 19221 |
| }, |
| { |
| "epoch": 3.12, |
| "learning_rate": 8.151655261697799e-05, |
| "loss": 4.2694, |
| "step": 20000 |
| }, |
| { |
| "epoch": 3.28, |
| "learning_rate": 7.966709820602923e-05, |
| "loss": 4.2519, |
| "step": 21000 |
| }, |
| { |
| "epoch": 3.43, |
| "learning_rate": 7.781764379508046e-05, |
| "loss": 4.2432, |
| "step": 22000 |
| }, |
| { |
| "epoch": 3.59, |
| "learning_rate": 7.596818938413168e-05, |
| "loss": 4.2321, |
| "step": 23000 |
| }, |
| { |
| "epoch": 3.75, |
| "learning_rate": 7.412058442759386e-05, |
| "loss": 4.2238, |
| "step": 24000 |
| }, |
| { |
| "epoch": 3.9, |
| "learning_rate": 7.22711300166451e-05, |
| "loss": 4.2155, |
| "step": 25000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.25107281793510955, |
| "eval_loss": 4.186640739440918, |
| "eval_runtime": 2.2141, |
| "eval_samples_per_second": 532.038, |
| "eval_steps_per_second": 4.516, |
| "step": 25628 |
| }, |
| { |
| "epoch": 4.06, |
| "learning_rate": 7.042537451451821e-05, |
| "loss": 4.1794, |
| "step": 26000 |
| }, |
| { |
| "epoch": 4.21, |
| "learning_rate": 6.857592010356945e-05, |
| "loss": 4.1418, |
| "step": 27000 |
| }, |
| { |
| "epoch": 4.37, |
| "learning_rate": 6.672646569262068e-05, |
| "loss": 4.1399, |
| "step": 28000 |
| }, |
| { |
| "epoch": 4.53, |
| "learning_rate": 6.487701128167191e-05, |
| "loss": 4.1392, |
| "step": 29000 |
| }, |
| { |
| "epoch": 4.68, |
| "learning_rate": 6.302940632513408e-05, |
| "loss": 4.1333, |
| "step": 30000 |
| }, |
| { |
| "epoch": 4.84, |
| "learning_rate": 6.117995191418531e-05, |
| "loss": 4.1287, |
| "step": 31000 |
| }, |
| { |
| "epoch": 4.99, |
| "learning_rate": 5.93323469576475e-05, |
| "loss": 4.1267, |
| "step": 32000 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.25577851155702314, |
| "eval_loss": 4.129362106323242, |
| "eval_runtime": 2.2135, |
| "eval_samples_per_second": 532.185, |
| "eval_steps_per_second": 4.518, |
| "step": 32035 |
| }, |
| { |
| "epoch": 5.15, |
| "learning_rate": 5.748289254669873e-05, |
| "loss": 4.0583, |
| "step": 33000 |
| }, |
| { |
| "epoch": 5.31, |
| "learning_rate": 5.563343813574996e-05, |
| "loss": 4.0624, |
| "step": 34000 |
| }, |
| { |
| "epoch": 5.46, |
| "learning_rate": 5.378398372480119e-05, |
| "loss": 4.063, |
| "step": 35000 |
| }, |
| { |
| "epoch": 5.62, |
| "learning_rate": 5.193637876826336e-05, |
| "loss": 4.0642, |
| "step": 36000 |
| }, |
| { |
| "epoch": 5.77, |
| "learning_rate": 5.008692435731459e-05, |
| "loss": 4.0627, |
| "step": 37000 |
| }, |
| { |
| "epoch": 5.93, |
| "learning_rate": 4.823931940077677e-05, |
| "loss": 4.0604, |
| "step": 38000 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.25978904589388124, |
| "eval_loss": 4.091472625732422, |
| "eval_runtime": 2.2427, |
| "eval_samples_per_second": 525.259, |
| "eval_steps_per_second": 4.459, |
| "step": 38442 |
| }, |
| { |
| "epoch": 6.09, |
| "learning_rate": 4.6389864989828e-05, |
| "loss": 4.0207, |
| "step": 39000 |
| }, |
| { |
| "epoch": 6.24, |
| "learning_rate": 4.454226003329018e-05, |
| "loss": 3.9982, |
| "step": 40000 |
| }, |
| { |
| "epoch": 6.4, |
| "learning_rate": 4.269280562234141e-05, |
| "loss": 4.0031, |
| "step": 41000 |
| }, |
| { |
| "epoch": 6.56, |
| "learning_rate": 4.084520066580359e-05, |
| "loss": 4.0014, |
| "step": 42000 |
| }, |
| { |
| "epoch": 6.71, |
| "learning_rate": 3.899574625485482e-05, |
| "loss": 4.0034, |
| "step": 43000 |
| }, |
| { |
| "epoch": 6.87, |
| "learning_rate": 3.7148141298317e-05, |
| "loss": 4.0046, |
| "step": 44000 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.26226889295883854, |
| "eval_loss": 4.0633225440979, |
| "eval_runtime": 2.2236, |
| "eval_samples_per_second": 529.772, |
| "eval_steps_per_second": 4.497, |
| "step": 44849 |
| }, |
| { |
| "epoch": 7.02, |
| "learning_rate": 3.530053634177918e-05, |
| "loss": 3.9915, |
| "step": 45000 |
| }, |
| { |
| "epoch": 7.18, |
| "learning_rate": 3.345108193083041e-05, |
| "loss": 3.9436, |
| "step": 46000 |
| }, |
| { |
| "epoch": 7.34, |
| "learning_rate": 3.1601627519881636e-05, |
| "loss": 3.9498, |
| "step": 47000 |
| }, |
| { |
| "epoch": 7.49, |
| "learning_rate": 2.9752173108932868e-05, |
| "loss": 3.9523, |
| "step": 48000 |
| }, |
| { |
| "epoch": 7.65, |
| "learning_rate": 2.7904568152395044e-05, |
| "loss": 3.9541, |
| "step": 49000 |
| }, |
| { |
| "epoch": 7.8, |
| "learning_rate": 2.6055113741446275e-05, |
| "loss": 3.9528, |
| "step": 50000 |
| }, |
| { |
| "epoch": 7.96, |
| "learning_rate": 2.4207508784908454e-05, |
| "loss": 3.9522, |
| "step": 51000 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.2649826878601126, |
| "eval_loss": 4.0420684814453125, |
| "eval_runtime": 2.2291, |
| "eval_samples_per_second": 528.453, |
| "eval_steps_per_second": 4.486, |
| "step": 51256 |
| }, |
| { |
| "epoch": 8.12, |
| "learning_rate": 2.2359903828370634e-05, |
| "loss": 3.9114, |
| "step": 52000 |
| }, |
| { |
| "epoch": 8.27, |
| "learning_rate": 2.051044941742186e-05, |
| "loss": 3.9048, |
| "step": 53000 |
| }, |
| { |
| "epoch": 8.43, |
| "learning_rate": 1.8660995006473093e-05, |
| "loss": 3.9086, |
| "step": 54000 |
| }, |
| { |
| "epoch": 8.58, |
| "learning_rate": 1.681154059552432e-05, |
| "loss": 3.9069, |
| "step": 55000 |
| }, |
| { |
| "epoch": 8.74, |
| "learning_rate": 1.49639356389865e-05, |
| "loss": 3.9082, |
| "step": 56000 |
| }, |
| { |
| "epoch": 8.9, |
| "learning_rate": 1.3114481228037728e-05, |
| "loss": 3.9091, |
| "step": 57000 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.2675226929401227, |
| "eval_loss": 4.02744722366333, |
| "eval_runtime": 2.0909, |
| "eval_samples_per_second": 563.403, |
| "eval_steps_per_second": 4.783, |
| "step": 57663 |
| }, |
| { |
| "epoch": 9.05, |
| "learning_rate": 1.1268725725910857e-05, |
| "loss": 3.8934, |
| "step": 58000 |
| }, |
| { |
| "epoch": 9.21, |
| "learning_rate": 9.419271314962087e-06, |
| "loss": 3.8677, |
| "step": 59000 |
| }, |
| { |
| "epoch": 9.36, |
| "learning_rate": 7.569816904013316e-06, |
| "loss": 3.8703, |
| "step": 60000 |
| }, |
| { |
| "epoch": 9.52, |
| "learning_rate": 5.720362493064546e-06, |
| "loss": 3.8714, |
| "step": 61000 |
| }, |
| { |
| "epoch": 9.68, |
| "learning_rate": 3.872757536526725e-06, |
| "loss": 3.8706, |
| "step": 62000 |
| }, |
| { |
| "epoch": 9.83, |
| "learning_rate": 2.0233031255779547e-06, |
| "loss": 3.8696, |
| "step": 63000 |
| }, |
| { |
| "epoch": 9.99, |
| "learning_rate": 1.7569816904013318e-07, |
| "loss": 3.8682, |
| "step": 64000 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.2680641150755986, |
| "eval_loss": 4.0208740234375, |
| "eval_runtime": 2.2018, |
| "eval_samples_per_second": 535.026, |
| "eval_steps_per_second": 4.542, |
| "step": 64070 |
| }, |
| { |
| "epoch": 10.0, |
| "step": 64070, |
| "total_flos": 5.356356009984e+17, |
| "train_loss": 4.300366629619429, |
| "train_runtime": 29153.8108, |
| "train_samples_per_second": 281.26, |
| "train_steps_per_second": 2.198 |
| } |
| ], |
| "max_steps": 64070, |
| "num_train_epochs": 10, |
| "total_flos": 5.356356009984e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|