vit-base-patch16-224-isic248 / trainer_state.json
vananhle's picture
End of training
fda89f4 verified
{
"best_metric": 0.65,
"best_model_checkpoint": "vit-base-patch16-224-isic248/checkpoint-600",
"epoch": 29.662921348314608,
"eval_steps": 500,
"global_step": 660,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9887640449438202,
"grad_norm": 12.167838096618652,
"learning_rate": 1.6666666666666667e-05,
"loss": 2.4757,
"step": 22
},
{
"epoch": 0.9887640449438202,
"eval_accuracy": 0.15,
"eval_loss": 2.201775312423706,
"eval_runtime": 1.4445,
"eval_samples_per_second": 13.846,
"eval_steps_per_second": 6.923,
"step": 22
},
{
"epoch": 1.9775280898876404,
"grad_norm": 9.576192855834961,
"learning_rate": 3.3333333333333335e-05,
"loss": 2.0035,
"step": 44
},
{
"epoch": 1.9775280898876404,
"eval_accuracy": 0.35,
"eval_loss": 1.9165420532226562,
"eval_runtime": 1.4545,
"eval_samples_per_second": 13.75,
"eval_steps_per_second": 6.875,
"step": 44
},
{
"epoch": 2.966292134831461,
"grad_norm": 9.708001136779785,
"learning_rate": 5e-05,
"loss": 1.5959,
"step": 66
},
{
"epoch": 2.966292134831461,
"eval_accuracy": 0.35,
"eval_loss": 1.6903650760650635,
"eval_runtime": 1.4059,
"eval_samples_per_second": 14.226,
"eval_steps_per_second": 7.113,
"step": 66
},
{
"epoch": 4.0,
"grad_norm": 12.472097396850586,
"learning_rate": 4.806397306397307e-05,
"loss": 1.1695,
"step": 89
},
{
"epoch": 4.0,
"eval_accuracy": 0.45,
"eval_loss": 1.5546133518218994,
"eval_runtime": 1.4757,
"eval_samples_per_second": 13.553,
"eval_steps_per_second": 6.776,
"step": 89
},
{
"epoch": 4.98876404494382,
"grad_norm": 12.761962890625,
"learning_rate": 4.621212121212121e-05,
"loss": 0.8869,
"step": 111
},
{
"epoch": 4.98876404494382,
"eval_accuracy": 0.45,
"eval_loss": 1.5020616054534912,
"eval_runtime": 1.4294,
"eval_samples_per_second": 13.992,
"eval_steps_per_second": 6.996,
"step": 111
},
{
"epoch": 5.97752808988764,
"grad_norm": 7.685186386108398,
"learning_rate": 4.436026936026936e-05,
"loss": 0.6398,
"step": 133
},
{
"epoch": 5.97752808988764,
"eval_accuracy": 0.5,
"eval_loss": 1.5211018323898315,
"eval_runtime": 1.4329,
"eval_samples_per_second": 13.958,
"eval_steps_per_second": 6.979,
"step": 133
},
{
"epoch": 6.966292134831461,
"grad_norm": 8.553827285766602,
"learning_rate": 4.250841750841751e-05,
"loss": 0.5405,
"step": 155
},
{
"epoch": 6.966292134831461,
"eval_accuracy": 0.55,
"eval_loss": 1.4225242137908936,
"eval_runtime": 1.4838,
"eval_samples_per_second": 13.479,
"eval_steps_per_second": 6.74,
"step": 155
},
{
"epoch": 8.0,
"grad_norm": 4.784798622131348,
"learning_rate": 4.0572390572390575e-05,
"loss": 0.3626,
"step": 178
},
{
"epoch": 8.0,
"eval_accuracy": 0.45,
"eval_loss": 1.3762075901031494,
"eval_runtime": 1.4684,
"eval_samples_per_second": 13.62,
"eval_steps_per_second": 6.81,
"step": 178
},
{
"epoch": 8.98876404494382,
"grad_norm": 1.4554903507232666,
"learning_rate": 3.872053872053872e-05,
"loss": 0.2971,
"step": 200
},
{
"epoch": 8.98876404494382,
"eval_accuracy": 0.55,
"eval_loss": 1.5557674169540405,
"eval_runtime": 1.4444,
"eval_samples_per_second": 13.847,
"eval_steps_per_second": 6.923,
"step": 200
},
{
"epoch": 9.97752808988764,
"grad_norm": 7.8950629234313965,
"learning_rate": 3.686868686868687e-05,
"loss": 0.2324,
"step": 222
},
{
"epoch": 9.97752808988764,
"eval_accuracy": 0.55,
"eval_loss": 1.4037200212478638,
"eval_runtime": 1.4411,
"eval_samples_per_second": 13.878,
"eval_steps_per_second": 6.939,
"step": 222
},
{
"epoch": 10.96629213483146,
"grad_norm": 11.152678489685059,
"learning_rate": 3.501683501683502e-05,
"loss": 0.169,
"step": 244
},
{
"epoch": 10.96629213483146,
"eval_accuracy": 0.55,
"eval_loss": 1.4981868267059326,
"eval_runtime": 1.4462,
"eval_samples_per_second": 13.829,
"eval_steps_per_second": 6.915,
"step": 244
},
{
"epoch": 12.0,
"grad_norm": 3.168487310409546,
"learning_rate": 3.308080808080809e-05,
"loss": 0.1625,
"step": 267
},
{
"epoch": 12.0,
"eval_accuracy": 0.55,
"eval_loss": 1.3436851501464844,
"eval_runtime": 1.463,
"eval_samples_per_second": 13.671,
"eval_steps_per_second": 6.835,
"step": 267
},
{
"epoch": 12.98876404494382,
"grad_norm": 3.2774059772491455,
"learning_rate": 3.122895622895623e-05,
"loss": 0.1088,
"step": 289
},
{
"epoch": 12.98876404494382,
"eval_accuracy": 0.55,
"eval_loss": 1.296514630317688,
"eval_runtime": 1.5097,
"eval_samples_per_second": 13.247,
"eval_steps_per_second": 6.624,
"step": 289
},
{
"epoch": 13.97752808988764,
"grad_norm": 0.407360315322876,
"learning_rate": 2.9377104377104382e-05,
"loss": 0.1924,
"step": 311
},
{
"epoch": 13.97752808988764,
"eval_accuracy": 0.5,
"eval_loss": 1.255786657333374,
"eval_runtime": 1.4427,
"eval_samples_per_second": 13.863,
"eval_steps_per_second": 6.932,
"step": 311
},
{
"epoch": 14.96629213483146,
"grad_norm": 0.5581790208816528,
"learning_rate": 2.7525252525252528e-05,
"loss": 0.124,
"step": 333
},
{
"epoch": 14.96629213483146,
"eval_accuracy": 0.5,
"eval_loss": 1.4686721563339233,
"eval_runtime": 1.5037,
"eval_samples_per_second": 13.3,
"eval_steps_per_second": 6.65,
"step": 333
},
{
"epoch": 16.0,
"grad_norm": 12.646324157714844,
"learning_rate": 2.5589225589225592e-05,
"loss": 0.0901,
"step": 356
},
{
"epoch": 16.0,
"eval_accuracy": 0.6,
"eval_loss": 1.5054057836532593,
"eval_runtime": 1.4476,
"eval_samples_per_second": 13.816,
"eval_steps_per_second": 6.908,
"step": 356
},
{
"epoch": 16.98876404494382,
"grad_norm": 0.19395825266838074,
"learning_rate": 2.3737373737373738e-05,
"loss": 0.0641,
"step": 378
},
{
"epoch": 16.98876404494382,
"eval_accuracy": 0.5,
"eval_loss": 1.4898236989974976,
"eval_runtime": 1.4446,
"eval_samples_per_second": 13.844,
"eval_steps_per_second": 6.922,
"step": 378
},
{
"epoch": 17.97752808988764,
"grad_norm": 4.937531471252441,
"learning_rate": 2.1885521885521887e-05,
"loss": 0.1093,
"step": 400
},
{
"epoch": 17.97752808988764,
"eval_accuracy": 0.55,
"eval_loss": 1.3880454301834106,
"eval_runtime": 1.4563,
"eval_samples_per_second": 13.733,
"eval_steps_per_second": 6.867,
"step": 400
},
{
"epoch": 18.96629213483146,
"grad_norm": 0.08288563787937164,
"learning_rate": 2.0033670033670036e-05,
"loss": 0.1307,
"step": 422
},
{
"epoch": 18.96629213483146,
"eval_accuracy": 0.45,
"eval_loss": 1.460898995399475,
"eval_runtime": 1.4485,
"eval_samples_per_second": 13.807,
"eval_steps_per_second": 6.903,
"step": 422
},
{
"epoch": 20.0,
"grad_norm": 0.22473017871379852,
"learning_rate": 1.80976430976431e-05,
"loss": 0.1268,
"step": 445
},
{
"epoch": 20.0,
"eval_accuracy": 0.6,
"eval_loss": 1.5604647397994995,
"eval_runtime": 1.4452,
"eval_samples_per_second": 13.839,
"eval_steps_per_second": 6.919,
"step": 445
},
{
"epoch": 20.98876404494382,
"grad_norm": 0.07075084000825882,
"learning_rate": 1.6245791245791246e-05,
"loss": 0.0416,
"step": 467
},
{
"epoch": 20.98876404494382,
"eval_accuracy": 0.55,
"eval_loss": 1.5643846988677979,
"eval_runtime": 1.4571,
"eval_samples_per_second": 13.725,
"eval_steps_per_second": 6.863,
"step": 467
},
{
"epoch": 21.97752808988764,
"grad_norm": 0.09144988656044006,
"learning_rate": 1.4393939393939396e-05,
"loss": 0.0538,
"step": 489
},
{
"epoch": 21.97752808988764,
"eval_accuracy": 0.6,
"eval_loss": 1.5291801691055298,
"eval_runtime": 1.4841,
"eval_samples_per_second": 13.476,
"eval_steps_per_second": 6.738,
"step": 489
},
{
"epoch": 22.96629213483146,
"grad_norm": 3.7509801387786865,
"learning_rate": 1.2542087542087543e-05,
"loss": 0.109,
"step": 511
},
{
"epoch": 22.96629213483146,
"eval_accuracy": 0.6,
"eval_loss": 1.4048659801483154,
"eval_runtime": 1.4957,
"eval_samples_per_second": 13.372,
"eval_steps_per_second": 6.686,
"step": 511
},
{
"epoch": 24.0,
"grad_norm": 0.06139756739139557,
"learning_rate": 1.0606060606060607e-05,
"loss": 0.0633,
"step": 534
},
{
"epoch": 24.0,
"eval_accuracy": 0.55,
"eval_loss": 1.5016696453094482,
"eval_runtime": 1.4463,
"eval_samples_per_second": 13.828,
"eval_steps_per_second": 6.914,
"step": 534
},
{
"epoch": 24.98876404494382,
"grad_norm": 0.3579421639442444,
"learning_rate": 8.754208754208755e-06,
"loss": 0.031,
"step": 556
},
{
"epoch": 24.98876404494382,
"eval_accuracy": 0.55,
"eval_loss": 1.483483076095581,
"eval_runtime": 1.4688,
"eval_samples_per_second": 13.617,
"eval_steps_per_second": 6.808,
"step": 556
},
{
"epoch": 25.97752808988764,
"grad_norm": 4.043008804321289,
"learning_rate": 6.902356902356903e-06,
"loss": 0.0926,
"step": 578
},
{
"epoch": 25.97752808988764,
"eval_accuracy": 0.6,
"eval_loss": 1.4584197998046875,
"eval_runtime": 1.398,
"eval_samples_per_second": 14.306,
"eval_steps_per_second": 7.153,
"step": 578
},
{
"epoch": 26.96629213483146,
"grad_norm": 0.06617555022239685,
"learning_rate": 5.050505050505051e-06,
"loss": 0.067,
"step": 600
},
{
"epoch": 26.96629213483146,
"eval_accuracy": 0.65,
"eval_loss": 1.5056991577148438,
"eval_runtime": 1.444,
"eval_samples_per_second": 13.85,
"eval_steps_per_second": 6.925,
"step": 600
},
{
"epoch": 28.0,
"grad_norm": 0.05678021162748337,
"learning_rate": 3.1144781144781145e-06,
"loss": 0.0344,
"step": 623
},
{
"epoch": 28.0,
"eval_accuracy": 0.6,
"eval_loss": 1.4680153131484985,
"eval_runtime": 1.4795,
"eval_samples_per_second": 13.518,
"eval_steps_per_second": 6.759,
"step": 623
},
{
"epoch": 28.98876404494382,
"grad_norm": 8.12785816192627,
"learning_rate": 1.2626262626262627e-06,
"loss": 0.0806,
"step": 645
},
{
"epoch": 28.98876404494382,
"eval_accuracy": 0.6,
"eval_loss": 1.4399160146713257,
"eval_runtime": 1.4391,
"eval_samples_per_second": 13.898,
"eval_steps_per_second": 6.949,
"step": 645
},
{
"epoch": 29.662921348314608,
"grad_norm": 0.07079606503248215,
"learning_rate": 0.0,
"loss": 0.0329,
"step": 660
},
{
"epoch": 29.662921348314608,
"eval_accuracy": 0.6,
"eval_loss": 1.4424515962600708,
"eval_runtime": 1.4569,
"eval_samples_per_second": 13.728,
"eval_steps_per_second": 6.864,
"step": 660
},
{
"epoch": 29.662921348314608,
"step": 660,
"total_flos": 4.091833752227021e+17,
"train_loss": 0.40561601510553646,
"train_runtime": 567.9414,
"train_samples_per_second": 9.402,
"train_steps_per_second": 1.162
}
],
"logging_steps": 10,
"max_steps": 660,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.091833752227021e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}