| { | |
| "best_metric": 0.8655030800821355, | |
| "best_model_checkpoint": "AnimeCharacterClassifierMark1/checkpoint-258", | |
| "epoch": 16.0, | |
| "eval_steps": 500, | |
| "global_step": 276, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 6.944444444444445e-06, | |
| "loss": 5.0145, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.009240246406570842, | |
| "eval_loss": 4.930349826812744, | |
| "eval_runtime": 9.5929, | |
| "eval_samples_per_second": 101.533, | |
| "eval_steps_per_second": 0.834, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 1.388888888888889e-05, | |
| "loss": 4.932, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.0833333333333336e-05, | |
| "loss": 4.8416, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "eval_accuracy": 0.028747433264887063, | |
| "eval_loss": 4.748697757720947, | |
| "eval_runtime": 8.8137, | |
| "eval_samples_per_second": 110.51, | |
| "eval_steps_per_second": 0.908, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 4.6652, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 3.472222222222222e-05, | |
| "loss": 4.4383, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "eval_accuracy": 0.11704312114989733, | |
| "eval_loss": 4.359685897827148, | |
| "eval_runtime": 10.311, | |
| "eval_samples_per_second": 94.462, | |
| "eval_steps_per_second": 0.776, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 4.0762, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.32238193018480493, | |
| "eval_loss": 3.641871213912964, | |
| "eval_runtime": 9.02, | |
| "eval_samples_per_second": 107.982, | |
| "eval_steps_per_second": 0.887, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 4.8611111111111115e-05, | |
| "loss": 3.6518, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 4.937694704049845e-05, | |
| "loss": 3.108, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "eval_accuracy": 0.5246406570841889, | |
| "eval_loss": 2.857390880584717, | |
| "eval_runtime": 8.9304, | |
| "eval_samples_per_second": 109.065, | |
| "eval_steps_per_second": 0.896, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 4.85981308411215e-05, | |
| "loss": 2.606, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 4.781931464174455e-05, | |
| "loss": 2.1571, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 5.97, | |
| "eval_accuracy": 0.6652977412731006, | |
| "eval_loss": 2.2128942012786865, | |
| "eval_runtime": 8.8437, | |
| "eval_samples_per_second": 110.136, | |
| "eval_steps_per_second": 0.905, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "learning_rate": 4.7040498442367604e-05, | |
| "loss": 1.7668, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 4.6261682242990654e-05, | |
| "loss": 1.4685, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "eval_accuracy": 0.7494866529774127, | |
| "eval_loss": 1.7289572954177856, | |
| "eval_runtime": 9.9463, | |
| "eval_samples_per_second": 97.926, | |
| "eval_steps_per_second": 0.804, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 7.54, | |
| "learning_rate": 4.548286604361371e-05, | |
| "loss": 1.1649, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.797741273100616, | |
| "eval_loss": 1.3861801624298096, | |
| "eval_runtime": 8.8226, | |
| "eval_samples_per_second": 110.398, | |
| "eval_steps_per_second": 0.907, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "learning_rate": 4.470404984423676e-05, | |
| "loss": 0.9897, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "learning_rate": 4.392523364485982e-05, | |
| "loss": 0.7905, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 8.99, | |
| "eval_accuracy": 0.8213552361396304, | |
| "eval_loss": 1.1588941812515259, | |
| "eval_runtime": 8.795, | |
| "eval_samples_per_second": 110.745, | |
| "eval_steps_per_second": 0.91, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 9.28, | |
| "learning_rate": 4.314641744548287e-05, | |
| "loss": 0.6727, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 9.86, | |
| "learning_rate": 4.236760124610592e-05, | |
| "loss": 0.5549, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 9.97, | |
| "eval_accuracy": 0.8295687885010267, | |
| "eval_loss": 1.0262539386749268, | |
| "eval_runtime": 8.8584, | |
| "eval_samples_per_second": 109.953, | |
| "eval_steps_per_second": 0.903, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 10.43, | |
| "learning_rate": 4.1588785046728974e-05, | |
| "loss": 0.4577, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 10.96, | |
| "eval_accuracy": 0.8367556468172485, | |
| "eval_loss": 0.8994325995445251, | |
| "eval_runtime": 8.7654, | |
| "eval_samples_per_second": 111.119, | |
| "eval_steps_per_second": 0.913, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 11.01, | |
| "learning_rate": 4.0809968847352024e-05, | |
| "loss": 0.3757, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 11.59, | |
| "learning_rate": 4.003115264797508e-05, | |
| "loss": 0.2964, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.8552361396303901, | |
| "eval_loss": 0.808638870716095, | |
| "eval_runtime": 11.2618, | |
| "eval_samples_per_second": 86.487, | |
| "eval_steps_per_second": 0.71, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 12.17, | |
| "learning_rate": 3.925233644859813e-05, | |
| "loss": 0.2592, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 12.75, | |
| "learning_rate": 3.847352024922119e-05, | |
| "loss": 0.194, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 12.99, | |
| "eval_accuracy": 0.8583162217659137, | |
| "eval_loss": 0.744567334651947, | |
| "eval_runtime": 8.8124, | |
| "eval_samples_per_second": 110.526, | |
| "eval_steps_per_second": 0.908, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 13.33, | |
| "learning_rate": 3.769470404984424e-05, | |
| "loss": 0.1626, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 13.91, | |
| "learning_rate": 3.691588785046729e-05, | |
| "loss": 0.1358, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 13.97, | |
| "eval_accuracy": 0.8572895277207392, | |
| "eval_loss": 0.7063936591148376, | |
| "eval_runtime": 8.8917, | |
| "eval_samples_per_second": 109.54, | |
| "eval_steps_per_second": 0.9, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 14.49, | |
| "learning_rate": 3.6137071651090344e-05, | |
| "loss": 0.1116, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 14.96, | |
| "eval_accuracy": 0.8655030800821355, | |
| "eval_loss": 0.67196124792099, | |
| "eval_runtime": 9.0077, | |
| "eval_samples_per_second": 108.129, | |
| "eval_steps_per_second": 0.888, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 15.07, | |
| "learning_rate": 3.5358255451713394e-05, | |
| "loss": 0.0974, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 15.65, | |
| "learning_rate": 3.457943925233645e-05, | |
| "loss": 0.0811, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.864476386036961, | |
| "eval_loss": 0.6515084505081177, | |
| "eval_runtime": 9.3868, | |
| "eval_samples_per_second": 103.762, | |
| "eval_steps_per_second": 0.852, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "step": 276, | |
| "total_flos": 1.087746145977493e+19, | |
| "train_loss": 1.7795628476617993, | |
| "train_runtime": 3092.1212, | |
| "train_samples_per_second": 119.027, | |
| "train_steps_per_second": 0.231 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 714, | |
| "num_train_epochs": 42, | |
| "save_steps": 500, | |
| "total_flos": 1.087746145977493e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |