| { | |
| "best_metric": 0.9064748201438849, | |
| "best_model_checkpoint": "train_authorship/train_outputs/05-26-2024_17:09:07/checkpoint-300", | |
| "epoch": 10.0, | |
| "eval_steps": 15, | |
| "global_step": 300, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 5.808341979980469, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.0972, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_acc_product": 0.21928799282400072, | |
| "eval_bush_acc": 0.8201438848920863, | |
| "eval_loss": 0.9504616856575012, | |
| "eval_obama_acc": 0.45323741007194246, | |
| "eval_overall_acc": 0.6211031175059952, | |
| "eval_runtime": 2.8206, | |
| "eval_samples_per_second": 147.84, | |
| "eval_steps_per_second": 2.482, | |
| "eval_trump_acc": 0.5899280575539568, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 48.525230407714844, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8423, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_acc_product": 0.39412887680642716, | |
| "eval_bush_acc": 0.5755395683453237, | |
| "eval_loss": 0.648549497127533, | |
| "eval_obama_acc": 0.7266187050359713, | |
| "eval_overall_acc": 0.7482014388489209, | |
| "eval_runtime": 2.7763, | |
| "eval_samples_per_second": 150.202, | |
| "eval_steps_per_second": 2.521, | |
| "eval_trump_acc": 0.9424460431654677, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 12.380006790161133, | |
| "learning_rate": 4.722222222222222e-05, | |
| "loss": 0.5581, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_acc_product": 0.43508777678442107, | |
| "eval_bush_acc": 0.9640287769784173, | |
| "eval_loss": 0.6018202304840088, | |
| "eval_obama_acc": 0.5755395683453237, | |
| "eval_overall_acc": 0.7745803357314148, | |
| "eval_runtime": 2.7813, | |
| "eval_samples_per_second": 149.93, | |
| "eval_steps_per_second": 2.517, | |
| "eval_trump_acc": 0.7841726618705036, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 3.6562857627868652, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 0.4719, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_acc_product": 0.5783158370565594, | |
| "eval_bush_acc": 0.7482014388489209, | |
| "eval_loss": 0.4394747018814087, | |
| "eval_obama_acc": 0.8201438848920863, | |
| "eval_overall_acc": 0.8369304556354916, | |
| "eval_runtime": 2.7839, | |
| "eval_samples_per_second": 149.787, | |
| "eval_steps_per_second": 2.514, | |
| "eval_trump_acc": 0.9424460431654677, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 14.938467025756836, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 0.265, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "eval_acc_product": 0.5558941905013333, | |
| "eval_bush_acc": 0.7913669064748201, | |
| "eval_loss": 0.4741947650909424, | |
| "eval_obama_acc": 0.8345323741007195, | |
| "eval_overall_acc": 0.8225419664268585, | |
| "eval_runtime": 2.7828, | |
| "eval_samples_per_second": 149.851, | |
| "eval_steps_per_second": 2.515, | |
| "eval_trump_acc": 0.841726618705036, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.4905509948730469, | |
| "learning_rate": 3.888888888888889e-05, | |
| "loss": 0.1729, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_acc_product": 0.25067517023077357, | |
| "eval_bush_acc": 0.302158273381295, | |
| "eval_loss": 1.0451148748397827, | |
| "eval_obama_acc": 0.841726618705036, | |
| "eval_overall_acc": 0.709832134292566, | |
| "eval_runtime": 2.7825, | |
| "eval_samples_per_second": 149.863, | |
| "eval_steps_per_second": 2.516, | |
| "eval_trump_acc": 0.9856115107913669, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "grad_norm": 32.64374542236328, | |
| "learning_rate": 3.611111111111111e-05, | |
| "loss": 0.2945, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "eval_acc_product": 0.5622189893652079, | |
| "eval_bush_acc": 0.7338129496402878, | |
| "eval_loss": 0.5390242338180542, | |
| "eval_obama_acc": 0.8129496402877698, | |
| "eval_overall_acc": 0.829736211031175, | |
| "eval_runtime": 2.7882, | |
| "eval_samples_per_second": 149.56, | |
| "eval_steps_per_second": 2.511, | |
| "eval_trump_acc": 0.9424460431654677, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.29498815536499023, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.1925, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_acc_product": 0.6885265557028006, | |
| "eval_bush_acc": 0.935251798561151, | |
| "eval_loss": 0.42252907156944275, | |
| "eval_obama_acc": 0.8057553956834532, | |
| "eval_overall_acc": 0.8848920863309353, | |
| "eval_runtime": 2.7847, | |
| "eval_samples_per_second": 149.745, | |
| "eval_steps_per_second": 2.514, | |
| "eval_trump_acc": 0.9136690647482014, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "grad_norm": 5.147099018096924, | |
| "learning_rate": 3.055555555555556e-05, | |
| "loss": 0.0396, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "eval_acc_product": 0.628604429742268, | |
| "eval_bush_acc": 0.920863309352518, | |
| "eval_loss": 0.6601312160491943, | |
| "eval_obama_acc": 0.8705035971223022, | |
| "eval_overall_acc": 0.8585131894484412, | |
| "eval_runtime": 2.7927, | |
| "eval_samples_per_second": 149.316, | |
| "eval_steps_per_second": 2.507, | |
| "eval_trump_acc": 0.7841726618705036, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.057328637689352036, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 0.0438, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_acc_product": 0.6582616521554249, | |
| "eval_bush_acc": 0.8633093525179856, | |
| "eval_loss": 0.6852461695671082, | |
| "eval_obama_acc": 0.9136690647482014, | |
| "eval_overall_acc": 0.8705035971223022, | |
| "eval_runtime": 2.7951, | |
| "eval_samples_per_second": 149.19, | |
| "eval_steps_per_second": 2.504, | |
| "eval_trump_acc": 0.8345323741007195, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "grad_norm": 1.9609147310256958, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.0264, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "eval_acc_product": 0.6410678506519354, | |
| "eval_bush_acc": 0.9064748201438849, | |
| "eval_loss": 0.7376511096954346, | |
| "eval_obama_acc": 0.8057553956834532, | |
| "eval_overall_acc": 0.8633093525179856, | |
| "eval_runtime": 2.7923, | |
| "eval_samples_per_second": 149.337, | |
| "eval_steps_per_second": 2.507, | |
| "eval_trump_acc": 0.8776978417266187, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.07944349199533463, | |
| "learning_rate": 2.2222222222222223e-05, | |
| "loss": 0.0444, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_acc_product": 0.6344280406118664, | |
| "eval_bush_acc": 0.9280575539568345, | |
| "eval_loss": 0.775315523147583, | |
| "eval_obama_acc": 0.7482014388489209, | |
| "eval_overall_acc": 0.8633093525179856, | |
| "eval_runtime": 2.7918, | |
| "eval_samples_per_second": 149.364, | |
| "eval_steps_per_second": 2.507, | |
| "eval_trump_acc": 0.9136690647482014, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "grad_norm": 17.292938232421875, | |
| "learning_rate": 1.9444444444444445e-05, | |
| "loss": 0.0344, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "eval_acc_product": 0.6043388879807597, | |
| "eval_bush_acc": 0.9784172661870504, | |
| "eval_loss": 0.862628698348999, | |
| "eval_obama_acc": 0.7338129496402878, | |
| "eval_overall_acc": 0.8513189448441247, | |
| "eval_runtime": 2.7928, | |
| "eval_samples_per_second": 149.314, | |
| "eval_steps_per_second": 2.506, | |
| "eval_trump_acc": 0.841726618705036, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.018421683460474014, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.0185, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_acc_product": 0.698349244624796, | |
| "eval_bush_acc": 0.8992805755395683, | |
| "eval_loss": 0.6285017132759094, | |
| "eval_obama_acc": 0.8920863309352518, | |
| "eval_overall_acc": 0.8872901678657075, | |
| "eval_runtime": 2.7927, | |
| "eval_samples_per_second": 149.321, | |
| "eval_steps_per_second": 2.507, | |
| "eval_trump_acc": 0.8705035971223022, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "grad_norm": 0.12802745401859283, | |
| "learning_rate": 1.388888888888889e-05, | |
| "loss": 0.0114, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "eval_acc_product": 0.6621914724314953, | |
| "eval_bush_acc": 0.8129496402877698, | |
| "eval_loss": 0.7886965870857239, | |
| "eval_obama_acc": 0.9280575539568345, | |
| "eval_overall_acc": 0.8729016786570744, | |
| "eval_runtime": 2.7919, | |
| "eval_samples_per_second": 149.361, | |
| "eval_steps_per_second": 2.507, | |
| "eval_trump_acc": 0.8776978417266187, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 0.008947780355811119, | |
| "learning_rate": 1.1111111111111112e-05, | |
| "loss": 0.0025, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_acc_product": 0.7039360385817944, | |
| "eval_bush_acc": 0.9064748201438849, | |
| "eval_loss": 0.6208213567733765, | |
| "eval_obama_acc": 0.8920863309352518, | |
| "eval_overall_acc": 0.8896882494004796, | |
| "eval_runtime": 2.7916, | |
| "eval_samples_per_second": 149.375, | |
| "eval_steps_per_second": 2.508, | |
| "eval_trump_acc": 0.8705035971223022, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "grad_norm": 0.06868788599967957, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.0069, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "eval_acc_product": 0.7387000911149348, | |
| "eval_bush_acc": 0.9136690647482014, | |
| "eval_loss": 0.6432462334632874, | |
| "eval_obama_acc": 0.9136690647482014, | |
| "eval_overall_acc": 0.9040767386091128, | |
| "eval_runtime": 2.796, | |
| "eval_samples_per_second": 149.139, | |
| "eval_steps_per_second": 2.504, | |
| "eval_trump_acc": 0.8848920863309353, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 0.00421088607981801, | |
| "learning_rate": 5.555555555555556e-06, | |
| "loss": 0.0019, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_acc_product": 0.7271128183111604, | |
| "eval_bush_acc": 0.8848920863309353, | |
| "eval_loss": 0.6422414779663086, | |
| "eval_obama_acc": 0.9064748201438849, | |
| "eval_overall_acc": 0.8992805755395683, | |
| "eval_runtime": 2.794, | |
| "eval_samples_per_second": 149.248, | |
| "eval_steps_per_second": 2.505, | |
| "eval_trump_acc": 0.9064748201438849, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "grad_norm": 0.2265014946460724, | |
| "learning_rate": 2.777777777777778e-06, | |
| "loss": 0.0009, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "eval_acc_product": 0.7271128183111604, | |
| "eval_bush_acc": 0.8848920863309353, | |
| "eval_loss": 0.6394339799880981, | |
| "eval_obama_acc": 0.9064748201438849, | |
| "eval_overall_acc": 0.8992805755395683, | |
| "eval_runtime": 2.7933, | |
| "eval_samples_per_second": 149.288, | |
| "eval_steps_per_second": 2.506, | |
| "eval_trump_acc": 0.9064748201438849, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.013048585504293442, | |
| "learning_rate": 0.0, | |
| "loss": 0.0021, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_acc_product": 0.7448472772943594, | |
| "eval_bush_acc": 0.9064748201438849, | |
| "eval_loss": 0.6265907287597656, | |
| "eval_obama_acc": 0.9064748201438849, | |
| "eval_overall_acc": 0.9064748201438849, | |
| "eval_runtime": 2.7927, | |
| "eval_samples_per_second": 149.318, | |
| "eval_steps_per_second": 2.507, | |
| "eval_trump_acc": 0.9064748201438849, | |
| "step": 300 | |
| } | |
| ], | |
| "logging_steps": 15, | |
| "max_steps": 300, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 15, | |
| "total_flos": 5170662082636590.0, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |