| { | |
| "best_metric": 0.65, | |
| "best_model_checkpoint": "vit-base-patch16-224-isic248/checkpoint-600", | |
| "epoch": 29.662921348314608, | |
| "eval_steps": 500, | |
| "global_step": 660, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.9887640449438202, | |
| "grad_norm": 12.167838096618652, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 2.4757, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.9887640449438202, | |
| "eval_accuracy": 0.15, | |
| "eval_loss": 2.201775312423706, | |
| "eval_runtime": 1.4445, | |
| "eval_samples_per_second": 13.846, | |
| "eval_steps_per_second": 6.923, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 1.9775280898876404, | |
| "grad_norm": 9.576192855834961, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 2.0035, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 1.9775280898876404, | |
| "eval_accuracy": 0.35, | |
| "eval_loss": 1.9165420532226562, | |
| "eval_runtime": 1.4545, | |
| "eval_samples_per_second": 13.75, | |
| "eval_steps_per_second": 6.875, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 2.966292134831461, | |
| "grad_norm": 9.708001136779785, | |
| "learning_rate": 5e-05, | |
| "loss": 1.5959, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 2.966292134831461, | |
| "eval_accuracy": 0.35, | |
| "eval_loss": 1.6903650760650635, | |
| "eval_runtime": 1.4059, | |
| "eval_samples_per_second": 14.226, | |
| "eval_steps_per_second": 7.113, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 12.472097396850586, | |
| "learning_rate": 4.806397306397307e-05, | |
| "loss": 1.1695, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.45, | |
| "eval_loss": 1.5546133518218994, | |
| "eval_runtime": 1.4757, | |
| "eval_samples_per_second": 13.553, | |
| "eval_steps_per_second": 6.776, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 4.98876404494382, | |
| "grad_norm": 12.761962890625, | |
| "learning_rate": 4.621212121212121e-05, | |
| "loss": 0.8869, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 4.98876404494382, | |
| "eval_accuracy": 0.45, | |
| "eval_loss": 1.5020616054534912, | |
| "eval_runtime": 1.4294, | |
| "eval_samples_per_second": 13.992, | |
| "eval_steps_per_second": 6.996, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 5.97752808988764, | |
| "grad_norm": 7.685186386108398, | |
| "learning_rate": 4.436026936026936e-05, | |
| "loss": 0.6398, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 5.97752808988764, | |
| "eval_accuracy": 0.5, | |
| "eval_loss": 1.5211018323898315, | |
| "eval_runtime": 1.4329, | |
| "eval_samples_per_second": 13.958, | |
| "eval_steps_per_second": 6.979, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 6.966292134831461, | |
| "grad_norm": 8.553827285766602, | |
| "learning_rate": 4.250841750841751e-05, | |
| "loss": 0.5405, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 6.966292134831461, | |
| "eval_accuracy": 0.55, | |
| "eval_loss": 1.4225242137908936, | |
| "eval_runtime": 1.4838, | |
| "eval_samples_per_second": 13.479, | |
| "eval_steps_per_second": 6.74, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 4.784798622131348, | |
| "learning_rate": 4.0572390572390575e-05, | |
| "loss": 0.3626, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.45, | |
| "eval_loss": 1.3762075901031494, | |
| "eval_runtime": 1.4684, | |
| "eval_samples_per_second": 13.62, | |
| "eval_steps_per_second": 6.81, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 8.98876404494382, | |
| "grad_norm": 1.4554903507232666, | |
| "learning_rate": 3.872053872053872e-05, | |
| "loss": 0.2971, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 8.98876404494382, | |
| "eval_accuracy": 0.55, | |
| "eval_loss": 1.5557674169540405, | |
| "eval_runtime": 1.4444, | |
| "eval_samples_per_second": 13.847, | |
| "eval_steps_per_second": 6.923, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 9.97752808988764, | |
| "grad_norm": 7.8950629234313965, | |
| "learning_rate": 3.686868686868687e-05, | |
| "loss": 0.2324, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 9.97752808988764, | |
| "eval_accuracy": 0.55, | |
| "eval_loss": 1.4037200212478638, | |
| "eval_runtime": 1.4411, | |
| "eval_samples_per_second": 13.878, | |
| "eval_steps_per_second": 6.939, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 10.96629213483146, | |
| "grad_norm": 11.152678489685059, | |
| "learning_rate": 3.501683501683502e-05, | |
| "loss": 0.169, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 10.96629213483146, | |
| "eval_accuracy": 0.55, | |
| "eval_loss": 1.4981868267059326, | |
| "eval_runtime": 1.4462, | |
| "eval_samples_per_second": 13.829, | |
| "eval_steps_per_second": 6.915, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 3.168487310409546, | |
| "learning_rate": 3.308080808080809e-05, | |
| "loss": 0.1625, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.55, | |
| "eval_loss": 1.3436851501464844, | |
| "eval_runtime": 1.463, | |
| "eval_samples_per_second": 13.671, | |
| "eval_steps_per_second": 6.835, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 12.98876404494382, | |
| "grad_norm": 3.2774059772491455, | |
| "learning_rate": 3.122895622895623e-05, | |
| "loss": 0.1088, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 12.98876404494382, | |
| "eval_accuracy": 0.55, | |
| "eval_loss": 1.296514630317688, | |
| "eval_runtime": 1.5097, | |
| "eval_samples_per_second": 13.247, | |
| "eval_steps_per_second": 6.624, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 13.97752808988764, | |
| "grad_norm": 0.407360315322876, | |
| "learning_rate": 2.9377104377104382e-05, | |
| "loss": 0.1924, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 13.97752808988764, | |
| "eval_accuracy": 0.5, | |
| "eval_loss": 1.255786657333374, | |
| "eval_runtime": 1.4427, | |
| "eval_samples_per_second": 13.863, | |
| "eval_steps_per_second": 6.932, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 14.96629213483146, | |
| "grad_norm": 0.5581790208816528, | |
| "learning_rate": 2.7525252525252528e-05, | |
| "loss": 0.124, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 14.96629213483146, | |
| "eval_accuracy": 0.5, | |
| "eval_loss": 1.4686721563339233, | |
| "eval_runtime": 1.5037, | |
| "eval_samples_per_second": 13.3, | |
| "eval_steps_per_second": 6.65, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 12.646324157714844, | |
| "learning_rate": 2.5589225589225592e-05, | |
| "loss": 0.0901, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.6, | |
| "eval_loss": 1.5054057836532593, | |
| "eval_runtime": 1.4476, | |
| "eval_samples_per_second": 13.816, | |
| "eval_steps_per_second": 6.908, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 16.98876404494382, | |
| "grad_norm": 0.19395825266838074, | |
| "learning_rate": 2.3737373737373738e-05, | |
| "loss": 0.0641, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 16.98876404494382, | |
| "eval_accuracy": 0.5, | |
| "eval_loss": 1.4898236989974976, | |
| "eval_runtime": 1.4446, | |
| "eval_samples_per_second": 13.844, | |
| "eval_steps_per_second": 6.922, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 17.97752808988764, | |
| "grad_norm": 4.937531471252441, | |
| "learning_rate": 2.1885521885521887e-05, | |
| "loss": 0.1093, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 17.97752808988764, | |
| "eval_accuracy": 0.55, | |
| "eval_loss": 1.3880454301834106, | |
| "eval_runtime": 1.4563, | |
| "eval_samples_per_second": 13.733, | |
| "eval_steps_per_second": 6.867, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 18.96629213483146, | |
| "grad_norm": 0.08288563787937164, | |
| "learning_rate": 2.0033670033670036e-05, | |
| "loss": 0.1307, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 18.96629213483146, | |
| "eval_accuracy": 0.45, | |
| "eval_loss": 1.460898995399475, | |
| "eval_runtime": 1.4485, | |
| "eval_samples_per_second": 13.807, | |
| "eval_steps_per_second": 6.903, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.22473017871379852, | |
| "learning_rate": 1.80976430976431e-05, | |
| "loss": 0.1268, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.6, | |
| "eval_loss": 1.5604647397994995, | |
| "eval_runtime": 1.4452, | |
| "eval_samples_per_second": 13.839, | |
| "eval_steps_per_second": 6.919, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 20.98876404494382, | |
| "grad_norm": 0.07075084000825882, | |
| "learning_rate": 1.6245791245791246e-05, | |
| "loss": 0.0416, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 20.98876404494382, | |
| "eval_accuracy": 0.55, | |
| "eval_loss": 1.5643846988677979, | |
| "eval_runtime": 1.4571, | |
| "eval_samples_per_second": 13.725, | |
| "eval_steps_per_second": 6.863, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 21.97752808988764, | |
| "grad_norm": 0.09144988656044006, | |
| "learning_rate": 1.4393939393939396e-05, | |
| "loss": 0.0538, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 21.97752808988764, | |
| "eval_accuracy": 0.6, | |
| "eval_loss": 1.5291801691055298, | |
| "eval_runtime": 1.4841, | |
| "eval_samples_per_second": 13.476, | |
| "eval_steps_per_second": 6.738, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 22.96629213483146, | |
| "grad_norm": 3.7509801387786865, | |
| "learning_rate": 1.2542087542087543e-05, | |
| "loss": 0.109, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 22.96629213483146, | |
| "eval_accuracy": 0.6, | |
| "eval_loss": 1.4048659801483154, | |
| "eval_runtime": 1.4957, | |
| "eval_samples_per_second": 13.372, | |
| "eval_steps_per_second": 6.686, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "grad_norm": 0.06139756739139557, | |
| "learning_rate": 1.0606060606060607e-05, | |
| "loss": 0.0633, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.55, | |
| "eval_loss": 1.5016696453094482, | |
| "eval_runtime": 1.4463, | |
| "eval_samples_per_second": 13.828, | |
| "eval_steps_per_second": 6.914, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 24.98876404494382, | |
| "grad_norm": 0.3579421639442444, | |
| "learning_rate": 8.754208754208755e-06, | |
| "loss": 0.031, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 24.98876404494382, | |
| "eval_accuracy": 0.55, | |
| "eval_loss": 1.483483076095581, | |
| "eval_runtime": 1.4688, | |
| "eval_samples_per_second": 13.617, | |
| "eval_steps_per_second": 6.808, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 25.97752808988764, | |
| "grad_norm": 4.043008804321289, | |
| "learning_rate": 6.902356902356903e-06, | |
| "loss": 0.0926, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 25.97752808988764, | |
| "eval_accuracy": 0.6, | |
| "eval_loss": 1.4584197998046875, | |
| "eval_runtime": 1.398, | |
| "eval_samples_per_second": 14.306, | |
| "eval_steps_per_second": 7.153, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 26.96629213483146, | |
| "grad_norm": 0.06617555022239685, | |
| "learning_rate": 5.050505050505051e-06, | |
| "loss": 0.067, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 26.96629213483146, | |
| "eval_accuracy": 0.65, | |
| "eval_loss": 1.5056991577148438, | |
| "eval_runtime": 1.444, | |
| "eval_samples_per_second": 13.85, | |
| "eval_steps_per_second": 6.925, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "grad_norm": 0.05678021162748337, | |
| "learning_rate": 3.1144781144781145e-06, | |
| "loss": 0.0344, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.6, | |
| "eval_loss": 1.4680153131484985, | |
| "eval_runtime": 1.4795, | |
| "eval_samples_per_second": 13.518, | |
| "eval_steps_per_second": 6.759, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 28.98876404494382, | |
| "grad_norm": 8.12785816192627, | |
| "learning_rate": 1.2626262626262627e-06, | |
| "loss": 0.0806, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 28.98876404494382, | |
| "eval_accuracy": 0.6, | |
| "eval_loss": 1.4399160146713257, | |
| "eval_runtime": 1.4391, | |
| "eval_samples_per_second": 13.898, | |
| "eval_steps_per_second": 6.949, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 29.662921348314608, | |
| "grad_norm": 0.07079606503248215, | |
| "learning_rate": 0.0, | |
| "loss": 0.0329, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 29.662921348314608, | |
| "eval_accuracy": 0.6, | |
| "eval_loss": 1.4424515962600708, | |
| "eval_runtime": 1.4569, | |
| "eval_samples_per_second": 13.728, | |
| "eval_steps_per_second": 6.864, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 29.662921348314608, | |
| "step": 660, | |
| "total_flos": 4.091833752227021e+17, | |
| "train_loss": 0.40561601510553646, | |
| "train_runtime": 567.9414, | |
| "train_samples_per_second": 9.402, | |
| "train_steps_per_second": 1.162 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 660, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 30, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.091833752227021e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |