| { | |
| "best_metric": 3.675755739212036, | |
| "best_model_checkpoint": "models/GPT2_natural_function_67/checkpoint-64390", | |
| "epoch": 10.0, | |
| "global_step": 64390, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1e-05, | |
| "loss": 7.5832, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 2e-05, | |
| "loss": 6.0927, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 3e-05, | |
| "loss": 5.6579, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4e-05, | |
| "loss": 5.3482, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 5e-05, | |
| "loss": 5.1066, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 6e-05, | |
| "loss": 4.9145, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.29016047550269236, | |
| "eval_loss": 4.688190937042236, | |
| "eval_runtime": 1.9998, | |
| "eval_samples_per_second": 591.551, | |
| "eval_steps_per_second": 5.0, | |
| "step": 6439 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 7e-05, | |
| "loss": 4.7467, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 8e-05, | |
| "loss": 4.6193, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 9e-05, | |
| "loss": 4.5146, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 0.0001, | |
| "loss": 4.4312, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 9.816326530612245e-05, | |
| "loss": 4.3532, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 9.632469203897775e-05, | |
| "loss": 4.2877, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.32783993716761733, | |
| "eval_loss": 4.14790153503418, | |
| "eval_runtime": 2.0061, | |
| "eval_samples_per_second": 589.709, | |
| "eval_steps_per_second": 4.985, | |
| "step": 12878 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 9.448795734510021e-05, | |
| "loss": 4.2314, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 9.264938407795552e-05, | |
| "loss": 4.1519, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 9.081264938407796e-05, | |
| "loss": 4.1216, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 8.897407611693326e-05, | |
| "loss": 4.0984, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 8.713734142305572e-05, | |
| "loss": 4.0698, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 8.529876815591101e-05, | |
| "loss": 4.049, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 8.346203346203346e-05, | |
| "loss": 4.0279, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.3453651133844956, | |
| "eval_loss": 3.954080820083618, | |
| "eval_runtime": 2.0088, | |
| "eval_samples_per_second": 588.916, | |
| "eval_steps_per_second": 4.978, | |
| "step": 19317 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 8.162346019488876e-05, | |
| "loss": 3.9655, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 7.978672550101122e-05, | |
| "loss": 3.9369, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 7.794815223386652e-05, | |
| "loss": 3.9293, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 7.611325611325611e-05, | |
| "loss": 3.9213, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 7.427468284611142e-05, | |
| "loss": 3.9107, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 7.243610957896673e-05, | |
| "loss": 3.9003, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.3553157926265134, | |
| "eval_loss": 3.856142044067383, | |
| "eval_runtime": 2.0123, | |
| "eval_samples_per_second": 587.87, | |
| "eval_steps_per_second": 4.969, | |
| "step": 25756 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 7.059937488508918e-05, | |
| "loss": 3.8711, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 6.876080161794448e-05, | |
| "loss": 3.8186, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 6.692406692406693e-05, | |
| "loss": 3.8207, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 6.508549365692223e-05, | |
| "loss": 3.8165, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 6.324692038977753e-05, | |
| "loss": 3.8114, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 6.141018569589998e-05, | |
| "loss": 3.8074, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 5.957161242875529e-05, | |
| "loss": 3.8034, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.36155243908120954, | |
| "eval_loss": 3.7953288555145264, | |
| "eval_runtime": 1.9992, | |
| "eval_samples_per_second": 591.726, | |
| "eval_steps_per_second": 5.002, | |
| "step": 32195 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "learning_rate": 5.7734877734877734e-05, | |
| "loss": 3.7432, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 5.589630446773304e-05, | |
| "loss": 3.7348, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 5.4057731200588346e-05, | |
| "loss": 3.7356, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 5.222099650671079e-05, | |
| "loss": 3.7344, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 5.03824232395661e-05, | |
| "loss": 3.735, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "learning_rate": 4.854568854568855e-05, | |
| "loss": 3.731, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.36634473945194723, | |
| "eval_loss": 3.753897190093994, | |
| "eval_runtime": 2.0037, | |
| "eval_samples_per_second": 590.407, | |
| "eval_steps_per_second": 4.991, | |
| "step": 38634 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 4.670711527854385e-05, | |
| "loss": 3.7019, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 4.486854201139915e-05, | |
| "loss": 3.6644, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "learning_rate": 4.3031807317521606e-05, | |
| "loss": 3.6718, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 4.119323405037691e-05, | |
| "loss": 3.6717, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "learning_rate": 3.935466078323221e-05, | |
| "loss": 3.6718, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 3.751792608935466e-05, | |
| "loss": 3.6713, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 3.567935282220997e-05, | |
| "loss": 3.6709, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.3699988684846347, | |
| "eval_loss": 3.72359037399292, | |
| "eval_runtime": 2.0134, | |
| "eval_samples_per_second": 587.554, | |
| "eval_steps_per_second": 4.967, | |
| "step": 45073 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 3.384261812833241e-05, | |
| "loss": 3.6101, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 3.200404486118772e-05, | |
| "loss": 3.6119, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 3.0165471594043025e-05, | |
| "loss": 3.6167, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "learning_rate": 2.8326898326898328e-05, | |
| "loss": 3.618, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "learning_rate": 2.6490163633020777e-05, | |
| "loss": 3.6183, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "learning_rate": 2.465159036587608e-05, | |
| "loss": 3.6162, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.3734133824987853, | |
| "eval_loss": 3.7009902000427246, | |
| "eval_runtime": 2.0145, | |
| "eval_samples_per_second": 587.246, | |
| "eval_steps_per_second": 4.964, | |
| "step": 51512 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "learning_rate": 2.281485567199853e-05, | |
| "loss": 3.5874, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "learning_rate": 2.0976282404853832e-05, | |
| "loss": 3.5646, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 1.913954771097628e-05, | |
| "loss": 3.5686, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 8.54, | |
| "learning_rate": 1.7300974443831588e-05, | |
| "loss": 3.5686, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "learning_rate": 1.546240117668689e-05, | |
| "loss": 3.5705, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "learning_rate": 1.3623827909542197e-05, | |
| "loss": 3.5698, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.3750973436012806, | |
| "eval_loss": 3.684567928314209, | |
| "eval_runtime": 2.0058, | |
| "eval_samples_per_second": 589.776, | |
| "eval_steps_per_second": 4.985, | |
| "step": 57951 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "learning_rate": 1.1787093215664645e-05, | |
| "loss": 3.566, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 9.16, | |
| "learning_rate": 9.948519948519949e-06, | |
| "loss": 3.5272, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 9.32, | |
| "learning_rate": 8.111785254642398e-06, | |
| "loss": 3.5296, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 9.47, | |
| "learning_rate": 6.273211987497703e-06, | |
| "loss": 3.5308, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "learning_rate": 4.436477293620151e-06, | |
| "loss": 3.5305, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 9.78, | |
| "learning_rate": 2.597904026475455e-06, | |
| "loss": 3.527, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 9.94, | |
| "learning_rate": 7.61169332597904e-07, | |
| "loss": 3.5258, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.3762155470211194, | |
| "eval_loss": 3.675755739212036, | |
| "eval_runtime": 2.0114, | |
| "eval_samples_per_second": 588.151, | |
| "eval_steps_per_second": 4.972, | |
| "step": 64390 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 64390, | |
| "total_flos": 5.3836218335232e+17, | |
| "train_loss": 4.008237893900936, | |
| "train_runtime": 29754.0363, | |
| "train_samples_per_second": 276.989, | |
| "train_steps_per_second": 2.164 | |
| } | |
| ], | |
| "max_steps": 64390, | |
| "num_train_epochs": 10, | |
| "total_flos": 5.3836218335232e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |