| { | |
| "best_metric": 0.9596994535519126, | |
| "best_model_checkpoint": "train_authorship/train_outputs/05-25-2024_21:30:31/checkpoint-560", | |
| "epoch": 8.115942028985508, | |
| "eval_steps": 35, | |
| "global_step": 560, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 9.12179946899414, | |
| "learning_rate": 2.5362318840579714e-05, | |
| "loss": 0.9599, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_acc_product": 0.522295927646477, | |
| "eval_fitzgerald_acc": 0.6823770491803278, | |
| "eval_hemingway_acc": 0.9651639344262295, | |
| "eval_loss": 0.4738686978816986, | |
| "eval_overall_acc": 0.8135245901639344, | |
| "eval_runtime": 13.1203, | |
| "eval_samples_per_second": 111.583, | |
| "eval_steps_per_second": 1.753, | |
| "eval_woolf_acc": 0.7930327868852459, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 18.4027099609375, | |
| "learning_rate": 4.99194847020934e-05, | |
| "loss": 0.3839, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "eval_acc_product": 0.7210372836134964, | |
| "eval_fitzgerald_acc": 0.764344262295082, | |
| "eval_hemingway_acc": 0.9815573770491803, | |
| "eval_loss": 0.24899350106716156, | |
| "eval_overall_acc": 0.9023224043715847, | |
| "eval_runtime": 13.1386, | |
| "eval_samples_per_second": 111.428, | |
| "eval_steps_per_second": 1.751, | |
| "eval_woolf_acc": 0.9610655737704918, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 7.836585998535156, | |
| "learning_rate": 4.710144927536232e-05, | |
| "loss": 0.2387, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "eval_acc_product": 0.8025821475696203, | |
| "eval_fitzgerald_acc": 0.9631147540983607, | |
| "eval_hemingway_acc": 0.9221311475409836, | |
| "eval_loss": 0.20742054283618927, | |
| "eval_overall_acc": 0.9296448087431693, | |
| "eval_runtime": 13.1443, | |
| "eval_samples_per_second": 111.379, | |
| "eval_steps_per_second": 1.75, | |
| "eval_woolf_acc": 0.9036885245901639, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 10.438405990600586, | |
| "learning_rate": 4.428341384863124e-05, | |
| "loss": 0.1938, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "eval_acc_product": 0.8229529244050163, | |
| "eval_fitzgerald_acc": 0.9528688524590164, | |
| "eval_hemingway_acc": 0.9733606557377049, | |
| "eval_loss": 0.2009282410144806, | |
| "eval_overall_acc": 0.9378415300546448, | |
| "eval_runtime": 13.1419, | |
| "eval_samples_per_second": 111.399, | |
| "eval_steps_per_second": 1.75, | |
| "eval_woolf_acc": 0.8872950819672131, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "grad_norm": 11.882472038269043, | |
| "learning_rate": 4.146537842190016e-05, | |
| "loss": 0.1373, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "eval_acc_product": 0.8087266940845269, | |
| "eval_fitzgerald_acc": 0.9467213114754098, | |
| "eval_hemingway_acc": 0.8831967213114754, | |
| "eval_loss": 0.20678555965423584, | |
| "eval_overall_acc": 0.9323770491803278, | |
| "eval_runtime": 13.1478, | |
| "eval_samples_per_second": 111.35, | |
| "eval_steps_per_second": 1.749, | |
| "eval_woolf_acc": 0.9672131147540983, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "grad_norm": 3.76464581489563, | |
| "learning_rate": 3.864734299516908e-05, | |
| "loss": 0.0751, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "eval_acc_product": 0.8355915183980156, | |
| "eval_fitzgerald_acc": 0.8913934426229508, | |
| "eval_hemingway_acc": 0.9774590163934426, | |
| "eval_loss": 0.21373361349105835, | |
| "eval_overall_acc": 0.9426229508196722, | |
| "eval_runtime": 13.1464, | |
| "eval_samples_per_second": 111.361, | |
| "eval_steps_per_second": 1.75, | |
| "eval_woolf_acc": 0.9590163934426229, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "grad_norm": 7.145068168640137, | |
| "learning_rate": 3.5829307568438e-05, | |
| "loss": 0.0799, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "eval_acc_product": 0.8595802071539027, | |
| "eval_fitzgerald_acc": 0.9467213114754098, | |
| "eval_hemingway_acc": 0.9508196721311475, | |
| "eval_loss": 0.19885893166065216, | |
| "eval_overall_acc": 0.9508196721311475, | |
| "eval_runtime": 13.143, | |
| "eval_samples_per_second": 111.39, | |
| "eval_steps_per_second": 1.75, | |
| "eval_woolf_acc": 0.9549180327868853, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "grad_norm": 19.213457107543945, | |
| "learning_rate": 3.301127214170693e-05, | |
| "loss": 0.0689, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "eval_acc_product": 0.720976809113428, | |
| "eval_fitzgerald_acc": 0.985655737704918, | |
| "eval_hemingway_acc": 0.9036885245901639, | |
| "eval_loss": 0.44272100925445557, | |
| "eval_overall_acc": 0.8995901639344263, | |
| "eval_runtime": 13.1467, | |
| "eval_samples_per_second": 111.359, | |
| "eval_steps_per_second": 1.749, | |
| "eval_woolf_acc": 0.8094262295081968, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "grad_norm": 18.969886779785156, | |
| "learning_rate": 3.0193236714975848e-05, | |
| "loss": 0.0514, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "eval_acc_product": 0.8326357712760099, | |
| "eval_fitzgerald_acc": 0.9139344262295082, | |
| "eval_hemingway_acc": 0.9836065573770492, | |
| "eval_loss": 0.3214350938796997, | |
| "eval_overall_acc": 0.9412568306010929, | |
| "eval_runtime": 13.1454, | |
| "eval_samples_per_second": 111.37, | |
| "eval_steps_per_second": 1.75, | |
| "eval_woolf_acc": 0.9262295081967213, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "grad_norm": 3.789048433303833, | |
| "learning_rate": 2.7375201288244768e-05, | |
| "loss": 0.0247, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "eval_acc_product": 0.8625536629442553, | |
| "eval_fitzgerald_acc": 0.9200819672131147, | |
| "eval_hemingway_acc": 0.9651639344262295, | |
| "eval_loss": 0.282697856426239, | |
| "eval_overall_acc": 0.9521857923497268, | |
| "eval_runtime": 13.1482, | |
| "eval_samples_per_second": 111.346, | |
| "eval_steps_per_second": 1.749, | |
| "eval_woolf_acc": 0.9713114754098361, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "grad_norm": 0.13263003528118134, | |
| "learning_rate": 2.455716586151369e-05, | |
| "loss": 0.0061, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "eval_acc_product": 0.870603844594922, | |
| "eval_fitzgerald_acc": 0.9508196721311475, | |
| "eval_hemingway_acc": 0.9692622950819673, | |
| "eval_loss": 0.28144514560699463, | |
| "eval_overall_acc": 0.9549180327868853, | |
| "eval_runtime": 13.1424, | |
| "eval_samples_per_second": 111.395, | |
| "eval_steps_per_second": 1.75, | |
| "eval_woolf_acc": 0.944672131147541, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "grad_norm": 0.01614902913570404, | |
| "learning_rate": 2.173913043478261e-05, | |
| "loss": 0.017, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "eval_acc_product": 0.8501492828694912, | |
| "eval_fitzgerald_acc": 0.9528688524590164, | |
| "eval_hemingway_acc": 0.930327868852459, | |
| "eval_loss": 0.34684956073760986, | |
| "eval_overall_acc": 0.9474043715846995, | |
| "eval_runtime": 13.1477, | |
| "eval_samples_per_second": 111.35, | |
| "eval_steps_per_second": 1.749, | |
| "eval_woolf_acc": 0.9590163934426229, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "grad_norm": 0.007705519441515207, | |
| "learning_rate": 1.892109500805153e-05, | |
| "loss": 0.0108, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "eval_acc_product": 0.8705621113386143, | |
| "eval_fitzgerald_acc": 0.9385245901639344, | |
| "eval_hemingway_acc": 0.9651639344262295, | |
| "eval_loss": 0.26387327909469604, | |
| "eval_overall_acc": 0.9549180327868853, | |
| "eval_runtime": 13.1463, | |
| "eval_samples_per_second": 111.362, | |
| "eval_steps_per_second": 1.75, | |
| "eval_woolf_acc": 0.9610655737704918, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "grad_norm": 0.04329540580511093, | |
| "learning_rate": 1.610305958132045e-05, | |
| "loss": 0.0237, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "eval_acc_product": 0.8607751722611143, | |
| "eval_fitzgerald_acc": 0.9426229508196722, | |
| "eval_hemingway_acc": 0.9815573770491803, | |
| "eval_loss": 0.32457903027534485, | |
| "eval_overall_acc": 0.9515027322404371, | |
| "eval_runtime": 13.1416, | |
| "eval_samples_per_second": 111.402, | |
| "eval_steps_per_second": 1.75, | |
| "eval_woolf_acc": 0.930327868852459, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "grad_norm": 18.448688507080078, | |
| "learning_rate": 1.3285024154589374e-05, | |
| "loss": 0.0109, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "eval_acc_product": 0.8574545818262321, | |
| "eval_fitzgerald_acc": 0.9569672131147541, | |
| "eval_hemingway_acc": 0.9631147540983607, | |
| "eval_loss": 0.32199960947036743, | |
| "eval_overall_acc": 0.950136612021858, | |
| "eval_runtime": 13.141, | |
| "eval_samples_per_second": 111.407, | |
| "eval_steps_per_second": 1.75, | |
| "eval_woolf_acc": 0.930327868852459, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "grad_norm": 0.07037464529275894, | |
| "learning_rate": 1.0466988727858294e-05, | |
| "loss": 0.0108, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "eval_acc_product": 0.8835792216639279, | |
| "eval_fitzgerald_acc": 0.9385245901639344, | |
| "eval_hemingway_acc": 0.9692622950819673, | |
| "eval_loss": 0.27515432238578796, | |
| "eval_overall_acc": 0.9596994535519126, | |
| "eval_runtime": 13.1404, | |
| "eval_samples_per_second": 111.412, | |
| "eval_steps_per_second": 1.75, | |
| "eval_woolf_acc": 0.9713114754098361, | |
| "step": 560 | |
| } | |
| ], | |
| "logging_steps": 35, | |
| "max_steps": 690, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 35, | |
| "total_flos": 1.4725988675974908e+16, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |