| { | |
| "best_metric": 4.725980758666992, | |
| "best_model_checkpoint": "models/GPT2_no_function_67/checkpoint-47020", | |
| "epoch": 10.0, | |
| "global_step": 47020, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1e-05, | |
| "loss": 8.0666, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2e-05, | |
| "loss": 7.0832, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3e-05, | |
| "loss": 6.6789, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4e-05, | |
| "loss": 6.3673, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.22100889236420504, | |
| "eval_loss": 6.009326934814453, | |
| "eval_runtime": 1.4336, | |
| "eval_samples_per_second": 604.067, | |
| "eval_steps_per_second": 4.883, | |
| "step": 4702 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 5e-05, | |
| "loss": 6.1194, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 6e-05, | |
| "loss": 5.9118, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 7e-05, | |
| "loss": 5.7482, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 8e-05, | |
| "loss": 5.6171, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 9e-05, | |
| "loss": 5.5122, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.2530959611572803, | |
| "eval_loss": 5.362746238708496, | |
| "eval_runtime": 1.4445, | |
| "eval_samples_per_second": 599.499, | |
| "eval_steps_per_second": 4.846, | |
| "step": 9404 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 0.0001, | |
| "loss": 5.3993, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 9.730145867098865e-05, | |
| "loss": 5.3202, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 9.460021609940574e-05, | |
| "loss": 5.2575, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 9.190167477039439e-05, | |
| "loss": 5.2089, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 8.920043219881146e-05, | |
| "loss": 5.1637, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.2754268880362241, | |
| "eval_loss": 5.0802507400512695, | |
| "eval_runtime": 1.4537, | |
| "eval_samples_per_second": 595.734, | |
| "eval_steps_per_second": 4.815, | |
| "step": 14106 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 8.650189086980012e-05, | |
| "loss": 5.0658, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 8.380064829821718e-05, | |
| "loss": 5.0423, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 8.110210696920584e-05, | |
| "loss": 5.0206, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 7.84008643976229e-05, | |
| "loss": 5.0001, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.2882471677183539, | |
| "eval_loss": 4.943727970123291, | |
| "eval_runtime": 1.5715, | |
| "eval_samples_per_second": 551.081, | |
| "eval_steps_per_second": 4.454, | |
| "step": 18808 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 7.570232306861156e-05, | |
| "loss": 4.9618, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 7.300108049702864e-05, | |
| "loss": 4.8897, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 7.030253916801729e-05, | |
| "loss": 4.8867, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 6.760129659643436e-05, | |
| "loss": 4.8784, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 6.490275526742302e-05, | |
| "loss": 4.8699, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.29503009583386375, | |
| "eval_loss": 4.865215301513672, | |
| "eval_runtime": 1.4538, | |
| "eval_samples_per_second": 595.674, | |
| "eval_steps_per_second": 4.815, | |
| "step": 23510 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 6.22015126958401e-05, | |
| "loss": 4.8168, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "learning_rate": 5.950297136682874e-05, | |
| "loss": 4.7782, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "learning_rate": 5.680172879524581e-05, | |
| "loss": 4.7791, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 5.410318746623447e-05, | |
| "loss": 4.7784, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "learning_rate": 5.140194489465154e-05, | |
| "loss": 4.7738, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.30020366969140405, | |
| "eval_loss": 4.812718391418457, | |
| "eval_runtime": 1.4498, | |
| "eval_samples_per_second": 597.338, | |
| "eval_steps_per_second": 4.828, | |
| "step": 28212 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 4.87034035656402e-05, | |
| "loss": 4.7033, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "learning_rate": 4.600216099405727e-05, | |
| "loss": 4.6956, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "learning_rate": 4.330361966504592e-05, | |
| "loss": 4.6959, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "learning_rate": 4.0602377093463e-05, | |
| "loss": 4.6966, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.3044225418704879, | |
| "eval_loss": 4.7761945724487305, | |
| "eval_runtime": 1.4505, | |
| "eval_samples_per_second": 597.024, | |
| "eval_steps_per_second": 4.826, | |
| "step": 32914 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "learning_rate": 3.790383576445165e-05, | |
| "loss": 4.6884, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "learning_rate": 3.520259319286872e-05, | |
| "loss": 4.6182, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "learning_rate": 3.250405186385738e-05, | |
| "loss": 4.6246, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "learning_rate": 2.9802809292274448e-05, | |
| "loss": 4.6318, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 7.87, | |
| "learning_rate": 2.7104267963263103e-05, | |
| "loss": 4.6296, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.30685021185284866, | |
| "eval_loss": 4.7499237060546875, | |
| "eval_runtime": 1.4456, | |
| "eval_samples_per_second": 599.074, | |
| "eval_steps_per_second": 4.842, | |
| "step": 37616 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "learning_rate": 2.4403025391680173e-05, | |
| "loss": 4.6027, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 8.29, | |
| "learning_rate": 2.170448406266883e-05, | |
| "loss": 4.5656, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 8.51, | |
| "learning_rate": 1.9003241491085898e-05, | |
| "loss": 4.5703, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 8.72, | |
| "learning_rate": 1.6304700162074554e-05, | |
| "loss": 4.5718, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 8.93, | |
| "learning_rate": 1.3603457590491628e-05, | |
| "loss": 4.5712, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.30869596843119784, | |
| "eval_loss": 4.733290195465088, | |
| "eval_runtime": 1.4519, | |
| "eval_samples_per_second": 596.477, | |
| "eval_steps_per_second": 4.821, | |
| "step": 42318 | |
| }, | |
| { | |
| "epoch": 9.15, | |
| "learning_rate": 1.0904916261480282e-05, | |
| "loss": 4.5367, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 9.36, | |
| "learning_rate": 8.203673689897353e-06, | |
| "loss": 4.5226, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 9.57, | |
| "learning_rate": 5.505132360886007e-06, | |
| "loss": 4.5228, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 9.78, | |
| "learning_rate": 2.8038897893030795e-06, | |
| "loss": 4.5214, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 1.0534846029173421e-07, | |
| "loss": 4.5227, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.31029623029222964, | |
| "eval_loss": 4.725980758666992, | |
| "eval_runtime": 1.4835, | |
| "eval_samples_per_second": 583.771, | |
| "eval_steps_per_second": 4.719, | |
| "step": 47020 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 47020, | |
| "total_flos": 3.930942652416e+17, | |
| "train_loss": 5.095248515795566, | |
| "train_runtime": 20611.3432, | |
| "train_samples_per_second": 291.961, | |
| "train_steps_per_second": 2.281 | |
| } | |
| ], | |
| "max_steps": 47020, | |
| "num_train_epochs": 10, | |
| "total_flos": 3.930942652416e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |