| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 30.0, | |
| "global_step": 3180, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.3832186408159307e-05, | |
| "loss": 2.8049, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.5764944934955999, | |
| "eval_loss": 2.5287580490112305, | |
| "eval_runtime": 22.0895, | |
| "eval_samples_per_second": 80.128, | |
| "eval_steps_per_second": 0.272, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.5888124272106204e-05, | |
| "loss": 2.4814, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.5855989573905803, | |
| "eval_loss": 2.429943561553955, | |
| "eval_runtime": 20.8189, | |
| "eval_samples_per_second": 85.019, | |
| "eval_steps_per_second": 0.288, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 1.7090770826327895e-05, | |
| "loss": 2.3778, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.5945957840073934, | |
| "eval_loss": 2.348792314529419, | |
| "eval_runtime": 20.8031, | |
| "eval_samples_per_second": 85.084, | |
| "eval_steps_per_second": 0.288, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 1.7944062136053104e-05, | |
| "loss": 2.307, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.6146158043850404, | |
| "eval_loss": 2.1809699535369873, | |
| "eval_runtime": 20.6813, | |
| "eval_samples_per_second": 85.585, | |
| "eval_steps_per_second": 0.29, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 1.860592629580032e-05, | |
| "loss": 2.0807, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.7058374199856394, | |
| "eval_loss": 1.4772239923477173, | |
| "eval_runtime": 20.7293, | |
| "eval_samples_per_second": 85.386, | |
| "eval_steps_per_second": 0.289, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 1.9146708690274792e-05, | |
| "loss": 1.6063, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.7498181077384244, | |
| "eval_loss": 1.183250069618225, | |
| "eval_runtime": 20.8379, | |
| "eval_samples_per_second": 84.941, | |
| "eval_steps_per_second": 0.288, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 1.9603933689955228e-05, | |
| "loss": 1.3122, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.7694247060500784, | |
| "eval_loss": 1.0655418634414673, | |
| "eval_runtime": 20.7257, | |
| "eval_samples_per_second": 85.401, | |
| "eval_steps_per_second": 0.289, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1911, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.7711616016924874, | |
| "eval_loss": 1.059058427810669, | |
| "eval_runtime": 20.8383, | |
| "eval_samples_per_second": 84.94, | |
| "eval_steps_per_second": 0.288, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1273, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.7756633909375628, | |
| "eval_loss": 1.020793080329895, | |
| "eval_runtime": 20.7787, | |
| "eval_samples_per_second": 85.183, | |
| "eval_steps_per_second": 0.289, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0954, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.7866386428464195, | |
| "eval_loss": 0.9602928161621094, | |
| "eval_runtime": 20.6664, | |
| "eval_samples_per_second": 85.646, | |
| "eval_steps_per_second": 0.29, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0565, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.7900944085581504, | |
| "eval_loss": 0.9395522475242615, | |
| "eval_runtime": 20.7654, | |
| "eval_samples_per_second": 85.238, | |
| "eval_steps_per_second": 0.289, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0351, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.7892073832790445, | |
| "eval_loss": 0.9436615705490112, | |
| "eval_runtime": 20.1273, | |
| "eval_samples_per_second": 87.94, | |
| "eval_steps_per_second": 0.298, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0136, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.7930297660705511, | |
| "eval_loss": 0.9114692211151123, | |
| "eval_runtime": 20.7577, | |
| "eval_samples_per_second": 85.269, | |
| "eval_steps_per_second": 0.289, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.996, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.7986084994358782, | |
| "eval_loss": 0.9008192420005798, | |
| "eval_runtime": 20.8068, | |
| "eval_samples_per_second": 85.068, | |
| "eval_steps_per_second": 0.288, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.985, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.8006165209970277, | |
| "eval_loss": 0.8874076008796692, | |
| "eval_runtime": 20.7048, | |
| "eval_samples_per_second": 85.487, | |
| "eval_steps_per_second": 0.29, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9654, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.8016079609686441, | |
| "eval_loss": 0.8684276938438416, | |
| "eval_runtime": 20.7798, | |
| "eval_samples_per_second": 85.179, | |
| "eval_steps_per_second": 0.289, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9529, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.7997229383352594, | |
| "eval_loss": 0.8870733976364136, | |
| "eval_runtime": 20.7541, | |
| "eval_samples_per_second": 85.284, | |
| "eval_steps_per_second": 0.289, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9442, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.8027364602986559, | |
| "eval_loss": 0.8661928772926331, | |
| "eval_runtime": 20.7884, | |
| "eval_samples_per_second": 85.144, | |
| "eval_steps_per_second": 0.289, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9327, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.8069067033935976, | |
| "eval_loss": 0.8454752564430237, | |
| "eval_runtime": 20.7342, | |
| "eval_samples_per_second": 85.366, | |
| "eval_steps_per_second": 0.289, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9204, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.8092386081748497, | |
| "eval_loss": 0.8449112772941589, | |
| "eval_runtime": 20.6952, | |
| "eval_samples_per_second": 85.527, | |
| "eval_steps_per_second": 0.29, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9097, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.809981731823164, | |
| "eval_loss": 0.8322621583938599, | |
| "eval_runtime": 20.7947, | |
| "eval_samples_per_second": 85.118, | |
| "eval_steps_per_second": 0.289, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9024, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.8133925596890617, | |
| "eval_loss": 0.8194364905357361, | |
| "eval_runtime": 20.8718, | |
| "eval_samples_per_second": 84.803, | |
| "eval_steps_per_second": 0.287, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.895, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.8129690134121053, | |
| "eval_loss": 0.8122667670249939, | |
| "eval_runtime": 20.8197, | |
| "eval_samples_per_second": 85.016, | |
| "eval_steps_per_second": 0.288, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.891, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.8167250838730983, | |
| "eval_loss": 0.7972639799118042, | |
| "eval_runtime": 20.743, | |
| "eval_samples_per_second": 85.33, | |
| "eval_steps_per_second": 0.289, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.8784, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.8133820647746538, | |
| "eval_loss": 0.8083846569061279, | |
| "eval_runtime": 20.8823, | |
| "eval_samples_per_second": 84.761, | |
| "eval_steps_per_second": 0.287, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.8771, | |
| "step": 2756 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.8162191337201458, | |
| "eval_loss": 0.8039098381996155, | |
| "eval_runtime": 20.8754, | |
| "eval_samples_per_second": 84.789, | |
| "eval_steps_per_second": 0.287, | |
| "step": 2756 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.8713, | |
| "step": 2862 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.8169723253335294, | |
| "eval_loss": 0.7862613797187805, | |
| "eval_runtime": 20.8372, | |
| "eval_samples_per_second": 84.944, | |
| "eval_steps_per_second": 0.288, | |
| "step": 2862 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.8707, | |
| "step": 2968 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.8173417825447652, | |
| "eval_loss": 0.7960302829742432, | |
| "eval_runtime": 20.7882, | |
| "eval_samples_per_second": 85.145, | |
| "eval_steps_per_second": 0.289, | |
| "step": 2968 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.8571, | |
| "step": 3074 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.8181885887496183, | |
| "eval_loss": 0.7875123023986816, | |
| "eval_runtime": 20.7114, | |
| "eval_samples_per_second": 85.46, | |
| "eval_steps_per_second": 0.29, | |
| "step": 3074 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.8593, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.8199450484519026, | |
| "eval_loss": 0.7765340805053711, | |
| "eval_runtime": 20.9094, | |
| "eval_samples_per_second": 84.651, | |
| "eval_steps_per_second": 0.287, | |
| "step": 3180 | |
| } | |
| ], | |
| "max_steps": 4240, | |
| "num_train_epochs": 40, | |
| "total_flos": 1006048479543296.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |