{ "best_metric": 0.65, "best_model_checkpoint": "vit-base-patch16-224-isic248/checkpoint-600", "epoch": 29.662921348314608, "eval_steps": 500, "global_step": 660, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9887640449438202, "grad_norm": 12.167838096618652, "learning_rate": 1.6666666666666667e-05, "loss": 2.4757, "step": 22 }, { "epoch": 0.9887640449438202, "eval_accuracy": 0.15, "eval_loss": 2.201775312423706, "eval_runtime": 1.4445, "eval_samples_per_second": 13.846, "eval_steps_per_second": 6.923, "step": 22 }, { "epoch": 1.9775280898876404, "grad_norm": 9.576192855834961, "learning_rate": 3.3333333333333335e-05, "loss": 2.0035, "step": 44 }, { "epoch": 1.9775280898876404, "eval_accuracy": 0.35, "eval_loss": 1.9165420532226562, "eval_runtime": 1.4545, "eval_samples_per_second": 13.75, "eval_steps_per_second": 6.875, "step": 44 }, { "epoch": 2.966292134831461, "grad_norm": 9.708001136779785, "learning_rate": 5e-05, "loss": 1.5959, "step": 66 }, { "epoch": 2.966292134831461, "eval_accuracy": 0.35, "eval_loss": 1.6903650760650635, "eval_runtime": 1.4059, "eval_samples_per_second": 14.226, "eval_steps_per_second": 7.113, "step": 66 }, { "epoch": 4.0, "grad_norm": 12.472097396850586, "learning_rate": 4.806397306397307e-05, "loss": 1.1695, "step": 89 }, { "epoch": 4.0, "eval_accuracy": 0.45, "eval_loss": 1.5546133518218994, "eval_runtime": 1.4757, "eval_samples_per_second": 13.553, "eval_steps_per_second": 6.776, "step": 89 }, { "epoch": 4.98876404494382, "grad_norm": 12.761962890625, "learning_rate": 4.621212121212121e-05, "loss": 0.8869, "step": 111 }, { "epoch": 4.98876404494382, "eval_accuracy": 0.45, "eval_loss": 1.5020616054534912, "eval_runtime": 1.4294, "eval_samples_per_second": 13.992, "eval_steps_per_second": 6.996, "step": 111 }, { "epoch": 5.97752808988764, "grad_norm": 7.685186386108398, "learning_rate": 4.436026936026936e-05, "loss": 0.6398, "step": 133 }, { "epoch": 5.97752808988764, "eval_accuracy": 0.5, "eval_loss": 1.5211018323898315, "eval_runtime": 1.4329, "eval_samples_per_second": 13.958, "eval_steps_per_second": 6.979, "step": 133 }, { "epoch": 6.966292134831461, "grad_norm": 8.553827285766602, "learning_rate": 4.250841750841751e-05, "loss": 0.5405, "step": 155 }, { "epoch": 6.966292134831461, "eval_accuracy": 0.55, "eval_loss": 1.4225242137908936, "eval_runtime": 1.4838, "eval_samples_per_second": 13.479, "eval_steps_per_second": 6.74, "step": 155 }, { "epoch": 8.0, "grad_norm": 4.784798622131348, "learning_rate": 4.0572390572390575e-05, "loss": 0.3626, "step": 178 }, { "epoch": 8.0, "eval_accuracy": 0.45, "eval_loss": 1.3762075901031494, "eval_runtime": 1.4684, "eval_samples_per_second": 13.62, "eval_steps_per_second": 6.81, "step": 178 }, { "epoch": 8.98876404494382, "grad_norm": 1.4554903507232666, "learning_rate": 3.872053872053872e-05, "loss": 0.2971, "step": 200 }, { "epoch": 8.98876404494382, "eval_accuracy": 0.55, "eval_loss": 1.5557674169540405, "eval_runtime": 1.4444, "eval_samples_per_second": 13.847, "eval_steps_per_second": 6.923, "step": 200 }, { "epoch": 9.97752808988764, "grad_norm": 7.8950629234313965, "learning_rate": 3.686868686868687e-05, "loss": 0.2324, "step": 222 }, { "epoch": 9.97752808988764, "eval_accuracy": 0.55, "eval_loss": 1.4037200212478638, "eval_runtime": 1.4411, "eval_samples_per_second": 13.878, "eval_steps_per_second": 6.939, "step": 222 }, { "epoch": 10.96629213483146, "grad_norm": 11.152678489685059, "learning_rate": 3.501683501683502e-05, "loss": 0.169, "step": 244 }, { "epoch": 10.96629213483146, "eval_accuracy": 0.55, "eval_loss": 1.4981868267059326, "eval_runtime": 1.4462, "eval_samples_per_second": 13.829, "eval_steps_per_second": 6.915, "step": 244 }, { "epoch": 12.0, "grad_norm": 3.168487310409546, "learning_rate": 3.308080808080809e-05, "loss": 0.1625, "step": 267 }, { "epoch": 12.0, "eval_accuracy": 0.55, "eval_loss": 1.3436851501464844, "eval_runtime": 1.463, "eval_samples_per_second": 13.671, "eval_steps_per_second": 6.835, "step": 267 }, { "epoch": 12.98876404494382, "grad_norm": 3.2774059772491455, "learning_rate": 3.122895622895623e-05, "loss": 0.1088, "step": 289 }, { "epoch": 12.98876404494382, "eval_accuracy": 0.55, "eval_loss": 1.296514630317688, "eval_runtime": 1.5097, "eval_samples_per_second": 13.247, "eval_steps_per_second": 6.624, "step": 289 }, { "epoch": 13.97752808988764, "grad_norm": 0.407360315322876, "learning_rate": 2.9377104377104382e-05, "loss": 0.1924, "step": 311 }, { "epoch": 13.97752808988764, "eval_accuracy": 0.5, "eval_loss": 1.255786657333374, "eval_runtime": 1.4427, "eval_samples_per_second": 13.863, "eval_steps_per_second": 6.932, "step": 311 }, { "epoch": 14.96629213483146, "grad_norm": 0.5581790208816528, "learning_rate": 2.7525252525252528e-05, "loss": 0.124, "step": 333 }, { "epoch": 14.96629213483146, "eval_accuracy": 0.5, "eval_loss": 1.4686721563339233, "eval_runtime": 1.5037, "eval_samples_per_second": 13.3, "eval_steps_per_second": 6.65, "step": 333 }, { "epoch": 16.0, "grad_norm": 12.646324157714844, "learning_rate": 2.5589225589225592e-05, "loss": 0.0901, "step": 356 }, { "epoch": 16.0, "eval_accuracy": 0.6, "eval_loss": 1.5054057836532593, "eval_runtime": 1.4476, "eval_samples_per_second": 13.816, "eval_steps_per_second": 6.908, "step": 356 }, { "epoch": 16.98876404494382, "grad_norm": 0.19395825266838074, "learning_rate": 2.3737373737373738e-05, "loss": 0.0641, "step": 378 }, { "epoch": 16.98876404494382, "eval_accuracy": 0.5, "eval_loss": 1.4898236989974976, "eval_runtime": 1.4446, "eval_samples_per_second": 13.844, "eval_steps_per_second": 6.922, "step": 378 }, { "epoch": 17.97752808988764, "grad_norm": 4.937531471252441, "learning_rate": 2.1885521885521887e-05, "loss": 0.1093, "step": 400 }, { "epoch": 17.97752808988764, "eval_accuracy": 0.55, "eval_loss": 1.3880454301834106, "eval_runtime": 1.4563, "eval_samples_per_second": 13.733, "eval_steps_per_second": 6.867, "step": 400 }, { "epoch": 18.96629213483146, "grad_norm": 0.08288563787937164, "learning_rate": 2.0033670033670036e-05, "loss": 0.1307, "step": 422 }, { "epoch": 18.96629213483146, "eval_accuracy": 0.45, "eval_loss": 1.460898995399475, "eval_runtime": 1.4485, "eval_samples_per_second": 13.807, "eval_steps_per_second": 6.903, "step": 422 }, { "epoch": 20.0, "grad_norm": 0.22473017871379852, "learning_rate": 1.80976430976431e-05, "loss": 0.1268, "step": 445 }, { "epoch": 20.0, "eval_accuracy": 0.6, "eval_loss": 1.5604647397994995, "eval_runtime": 1.4452, "eval_samples_per_second": 13.839, "eval_steps_per_second": 6.919, "step": 445 }, { "epoch": 20.98876404494382, "grad_norm": 0.07075084000825882, "learning_rate": 1.6245791245791246e-05, "loss": 0.0416, "step": 467 }, { "epoch": 20.98876404494382, "eval_accuracy": 0.55, "eval_loss": 1.5643846988677979, "eval_runtime": 1.4571, "eval_samples_per_second": 13.725, "eval_steps_per_second": 6.863, "step": 467 }, { "epoch": 21.97752808988764, "grad_norm": 0.09144988656044006, "learning_rate": 1.4393939393939396e-05, "loss": 0.0538, "step": 489 }, { "epoch": 21.97752808988764, "eval_accuracy": 0.6, "eval_loss": 1.5291801691055298, "eval_runtime": 1.4841, "eval_samples_per_second": 13.476, "eval_steps_per_second": 6.738, "step": 489 }, { "epoch": 22.96629213483146, "grad_norm": 3.7509801387786865, "learning_rate": 1.2542087542087543e-05, "loss": 0.109, "step": 511 }, { "epoch": 22.96629213483146, "eval_accuracy": 0.6, "eval_loss": 1.4048659801483154, "eval_runtime": 1.4957, "eval_samples_per_second": 13.372, "eval_steps_per_second": 6.686, "step": 511 }, { "epoch": 24.0, "grad_norm": 0.06139756739139557, "learning_rate": 1.0606060606060607e-05, "loss": 0.0633, "step": 534 }, { "epoch": 24.0, "eval_accuracy": 0.55, "eval_loss": 1.5016696453094482, "eval_runtime": 1.4463, "eval_samples_per_second": 13.828, "eval_steps_per_second": 6.914, "step": 534 }, { "epoch": 24.98876404494382, "grad_norm": 0.3579421639442444, "learning_rate": 8.754208754208755e-06, "loss": 0.031, "step": 556 }, { "epoch": 24.98876404494382, "eval_accuracy": 0.55, "eval_loss": 1.483483076095581, "eval_runtime": 1.4688, "eval_samples_per_second": 13.617, "eval_steps_per_second": 6.808, "step": 556 }, { "epoch": 25.97752808988764, "grad_norm": 4.043008804321289, "learning_rate": 6.902356902356903e-06, "loss": 0.0926, "step": 578 }, { "epoch": 25.97752808988764, "eval_accuracy": 0.6, "eval_loss": 1.4584197998046875, "eval_runtime": 1.398, "eval_samples_per_second": 14.306, "eval_steps_per_second": 7.153, "step": 578 }, { "epoch": 26.96629213483146, "grad_norm": 0.06617555022239685, "learning_rate": 5.050505050505051e-06, "loss": 0.067, "step": 600 }, { "epoch": 26.96629213483146, "eval_accuracy": 0.65, "eval_loss": 1.5056991577148438, "eval_runtime": 1.444, "eval_samples_per_second": 13.85, "eval_steps_per_second": 6.925, "step": 600 }, { "epoch": 28.0, "grad_norm": 0.05678021162748337, "learning_rate": 3.1144781144781145e-06, "loss": 0.0344, "step": 623 }, { "epoch": 28.0, "eval_accuracy": 0.6, "eval_loss": 1.4680153131484985, "eval_runtime": 1.4795, "eval_samples_per_second": 13.518, "eval_steps_per_second": 6.759, "step": 623 }, { "epoch": 28.98876404494382, "grad_norm": 8.12785816192627, "learning_rate": 1.2626262626262627e-06, "loss": 0.0806, "step": 645 }, { "epoch": 28.98876404494382, "eval_accuracy": 0.6, "eval_loss": 1.4399160146713257, "eval_runtime": 1.4391, "eval_samples_per_second": 13.898, "eval_steps_per_second": 6.949, "step": 645 }, { "epoch": 29.662921348314608, "grad_norm": 0.07079606503248215, "learning_rate": 0.0, "loss": 0.0329, "step": 660 }, { "epoch": 29.662921348314608, "eval_accuracy": 0.6, "eval_loss": 1.4424515962600708, "eval_runtime": 1.4569, "eval_samples_per_second": 13.728, "eval_steps_per_second": 6.864, "step": 660 }, { "epoch": 29.662921348314608, "step": 660, "total_flos": 4.091833752227021e+17, "train_loss": 0.40561601510553646, "train_runtime": 567.9414, "train_samples_per_second": 9.402, "train_steps_per_second": 1.162 } ], "logging_steps": 10, "max_steps": 660, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.091833752227021e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }