beit-base-patch16-224-RH / trainer_state.json
Augusto777's picture
End of training
a6c8b8e verified
{
"best_metric": 0.8037383177570093,
"best_model_checkpoint": "beit-base-patch16-224-RH\\checkpoint-232",
"epoch": 40.0,
"eval_steps": 500,
"global_step": 320,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.5887850467289719,
"eval_loss": 0.7926577925682068,
"eval_runtime": 2.0185,
"eval_samples_per_second": 53.011,
"eval_steps_per_second": 3.468,
"step": 8
},
{
"epoch": 1.25,
"learning_rate": 1.5625e-05,
"loss": 0.8183,
"step": 10
},
{
"epoch": 2.0,
"eval_accuracy": 0.5887850467289719,
"eval_loss": 0.7411813735961914,
"eval_runtime": 1.969,
"eval_samples_per_second": 54.343,
"eval_steps_per_second": 3.555,
"step": 16
},
{
"epoch": 2.5,
"learning_rate": 3.125e-05,
"loss": 0.7414,
"step": 20
},
{
"epoch": 3.0,
"eval_accuracy": 0.5887850467289719,
"eval_loss": 0.6850691437721252,
"eval_runtime": 2.004,
"eval_samples_per_second": 53.394,
"eval_steps_per_second": 3.493,
"step": 24
},
{
"epoch": 3.75,
"learning_rate": 4.6875e-05,
"loss": 0.6837,
"step": 30
},
{
"epoch": 4.0,
"eval_accuracy": 0.5887850467289719,
"eval_loss": 0.6638379693031311,
"eval_runtime": 2.1135,
"eval_samples_per_second": 50.627,
"eval_steps_per_second": 3.312,
"step": 32
},
{
"epoch": 5.0,
"learning_rate": 4.8611111111111115e-05,
"loss": 0.6621,
"step": 40
},
{
"epoch": 5.0,
"eval_accuracy": 0.5981308411214953,
"eval_loss": 0.6619369387626648,
"eval_runtime": 1.9694,
"eval_samples_per_second": 54.331,
"eval_steps_per_second": 3.554,
"step": 40
},
{
"epoch": 6.0,
"eval_accuracy": 0.6261682242990654,
"eval_loss": 0.6446465253829956,
"eval_runtime": 2.0965,
"eval_samples_per_second": 51.037,
"eval_steps_per_second": 3.339,
"step": 48
},
{
"epoch": 6.25,
"learning_rate": 4.6875e-05,
"loss": 0.6538,
"step": 50
},
{
"epoch": 7.0,
"eval_accuracy": 0.6728971962616822,
"eval_loss": 0.6369854807853699,
"eval_runtime": 1.9695,
"eval_samples_per_second": 54.33,
"eval_steps_per_second": 3.554,
"step": 56
},
{
"epoch": 7.5,
"learning_rate": 4.5138888888888894e-05,
"loss": 0.641,
"step": 60
},
{
"epoch": 8.0,
"eval_accuracy": 0.6635514018691588,
"eval_loss": 0.6484658122062683,
"eval_runtime": 2.082,
"eval_samples_per_second": 51.394,
"eval_steps_per_second": 3.362,
"step": 64
},
{
"epoch": 8.75,
"learning_rate": 4.340277777777778e-05,
"loss": 0.628,
"step": 70
},
{
"epoch": 9.0,
"eval_accuracy": 0.6448598130841121,
"eval_loss": 0.6392771601676941,
"eval_runtime": 1.9785,
"eval_samples_per_second": 54.083,
"eval_steps_per_second": 3.538,
"step": 72
},
{
"epoch": 10.0,
"learning_rate": 4.166666666666667e-05,
"loss": 0.6187,
"step": 80
},
{
"epoch": 10.0,
"eval_accuracy": 0.5794392523364486,
"eval_loss": 0.640946626663208,
"eval_runtime": 1.972,
"eval_samples_per_second": 54.261,
"eval_steps_per_second": 3.55,
"step": 80
},
{
"epoch": 11.0,
"eval_accuracy": 0.5794392523364486,
"eval_loss": 0.6359996795654297,
"eval_runtime": 1.9835,
"eval_samples_per_second": 53.946,
"eval_steps_per_second": 3.529,
"step": 88
},
{
"epoch": 11.25,
"learning_rate": 3.993055555555556e-05,
"loss": 0.6075,
"step": 90
},
{
"epoch": 12.0,
"eval_accuracy": 0.6355140186915887,
"eval_loss": 0.6208956837654114,
"eval_runtime": 1.9903,
"eval_samples_per_second": 53.761,
"eval_steps_per_second": 3.517,
"step": 96
},
{
"epoch": 12.5,
"learning_rate": 3.8194444444444444e-05,
"loss": 0.6081,
"step": 100
},
{
"epoch": 13.0,
"eval_accuracy": 0.6448598130841121,
"eval_loss": 0.6377462148666382,
"eval_runtime": 1.992,
"eval_samples_per_second": 53.716,
"eval_steps_per_second": 3.514,
"step": 104
},
{
"epoch": 13.75,
"learning_rate": 3.6458333333333336e-05,
"loss": 0.5886,
"step": 110
},
{
"epoch": 14.0,
"eval_accuracy": 0.6728971962616822,
"eval_loss": 0.5931380987167358,
"eval_runtime": 1.9725,
"eval_samples_per_second": 54.247,
"eval_steps_per_second": 3.549,
"step": 112
},
{
"epoch": 15.0,
"learning_rate": 3.472222222222222e-05,
"loss": 0.5945,
"step": 120
},
{
"epoch": 15.0,
"eval_accuracy": 0.6635514018691588,
"eval_loss": 0.6107914447784424,
"eval_runtime": 1.983,
"eval_samples_per_second": 53.959,
"eval_steps_per_second": 3.53,
"step": 120
},
{
"epoch": 16.0,
"eval_accuracy": 0.7009345794392523,
"eval_loss": 0.584554135799408,
"eval_runtime": 1.992,
"eval_samples_per_second": 53.716,
"eval_steps_per_second": 3.514,
"step": 128
},
{
"epoch": 16.25,
"learning_rate": 3.2986111111111115e-05,
"loss": 0.5808,
"step": 130
},
{
"epoch": 17.0,
"eval_accuracy": 0.6822429906542056,
"eval_loss": 0.5945072770118713,
"eval_runtime": 1.98,
"eval_samples_per_second": 54.041,
"eval_steps_per_second": 3.535,
"step": 136
},
{
"epoch": 17.5,
"learning_rate": 3.125e-05,
"loss": 0.5636,
"step": 140
},
{
"epoch": 18.0,
"eval_accuracy": 0.6635514018691588,
"eval_loss": 0.7401838302612305,
"eval_runtime": 1.9965,
"eval_samples_per_second": 53.594,
"eval_steps_per_second": 3.506,
"step": 144
},
{
"epoch": 18.75,
"learning_rate": 2.951388888888889e-05,
"loss": 0.5839,
"step": 150
},
{
"epoch": 19.0,
"eval_accuracy": 0.6915887850467289,
"eval_loss": 0.5661436319351196,
"eval_runtime": 2.014,
"eval_samples_per_second": 53.129,
"eval_steps_per_second": 3.476,
"step": 152
},
{
"epoch": 20.0,
"learning_rate": 2.777777777777778e-05,
"loss": 0.5166,
"step": 160
},
{
"epoch": 20.0,
"eval_accuracy": 0.6635514018691588,
"eval_loss": 0.5359623432159424,
"eval_runtime": 1.98,
"eval_samples_per_second": 54.041,
"eval_steps_per_second": 3.535,
"step": 160
},
{
"epoch": 21.0,
"eval_accuracy": 0.6728971962616822,
"eval_loss": 0.5621436238288879,
"eval_runtime": 2.0255,
"eval_samples_per_second": 52.827,
"eval_steps_per_second": 3.456,
"step": 168
},
{
"epoch": 21.25,
"learning_rate": 2.604166666666667e-05,
"loss": 0.5165,
"step": 170
},
{
"epoch": 22.0,
"eval_accuracy": 0.719626168224299,
"eval_loss": 0.5508888363838196,
"eval_runtime": 1.9745,
"eval_samples_per_second": 54.192,
"eval_steps_per_second": 3.545,
"step": 176
},
{
"epoch": 22.5,
"learning_rate": 2.4305555555555558e-05,
"loss": 0.5308,
"step": 180
},
{
"epoch": 23.0,
"eval_accuracy": 0.7570093457943925,
"eval_loss": 0.5601618885993958,
"eval_runtime": 1.9885,
"eval_samples_per_second": 53.81,
"eval_steps_per_second": 3.52,
"step": 184
},
{
"epoch": 23.75,
"learning_rate": 2.2569444444444447e-05,
"loss": 0.4595,
"step": 190
},
{
"epoch": 24.0,
"eval_accuracy": 0.7850467289719626,
"eval_loss": 0.47350895404815674,
"eval_runtime": 1.969,
"eval_samples_per_second": 54.343,
"eval_steps_per_second": 3.555,
"step": 192
},
{
"epoch": 25.0,
"learning_rate": 2.0833333333333336e-05,
"loss": 0.4553,
"step": 200
},
{
"epoch": 25.0,
"eval_accuracy": 0.7663551401869159,
"eval_loss": 0.4696148931980133,
"eval_runtime": 1.974,
"eval_samples_per_second": 54.206,
"eval_steps_per_second": 3.546,
"step": 200
},
{
"epoch": 26.0,
"eval_accuracy": 0.7850467289719626,
"eval_loss": 0.5305755734443665,
"eval_runtime": 1.97,
"eval_samples_per_second": 54.316,
"eval_steps_per_second": 3.553,
"step": 208
},
{
"epoch": 26.25,
"learning_rate": 1.9097222222222222e-05,
"loss": 0.4004,
"step": 210
},
{
"epoch": 27.0,
"eval_accuracy": 0.794392523364486,
"eval_loss": 0.48186224699020386,
"eval_runtime": 1.972,
"eval_samples_per_second": 54.26,
"eval_steps_per_second": 3.55,
"step": 216
},
{
"epoch": 27.5,
"learning_rate": 1.736111111111111e-05,
"loss": 0.3954,
"step": 220
},
{
"epoch": 28.0,
"eval_accuracy": 0.794392523364486,
"eval_loss": 0.483099102973938,
"eval_runtime": 2.0135,
"eval_samples_per_second": 53.142,
"eval_steps_per_second": 3.477,
"step": 224
},
{
"epoch": 28.75,
"learning_rate": 1.5625e-05,
"loss": 0.3521,
"step": 230
},
{
"epoch": 29.0,
"eval_accuracy": 0.8037383177570093,
"eval_loss": 0.43397656083106995,
"eval_runtime": 1.9725,
"eval_samples_per_second": 54.246,
"eval_steps_per_second": 3.549,
"step": 232
},
{
"epoch": 30.0,
"learning_rate": 1.388888888888889e-05,
"loss": 0.3436,
"step": 240
},
{
"epoch": 30.0,
"eval_accuracy": 0.7757009345794392,
"eval_loss": 0.4789830446243286,
"eval_runtime": 2.015,
"eval_samples_per_second": 53.102,
"eval_steps_per_second": 3.474,
"step": 240
},
{
"epoch": 31.0,
"eval_accuracy": 0.7757009345794392,
"eval_loss": 0.4719656705856323,
"eval_runtime": 1.9775,
"eval_samples_per_second": 54.11,
"eval_steps_per_second": 3.54,
"step": 248
},
{
"epoch": 31.25,
"learning_rate": 1.2152777777777779e-05,
"loss": 0.34,
"step": 250
},
{
"epoch": 32.0,
"eval_accuracy": 0.7850467289719626,
"eval_loss": 0.5282993912696838,
"eval_runtime": 1.992,
"eval_samples_per_second": 53.716,
"eval_steps_per_second": 3.514,
"step": 256
},
{
"epoch": 32.5,
"learning_rate": 1.0416666666666668e-05,
"loss": 0.2995,
"step": 260
},
{
"epoch": 33.0,
"eval_accuracy": 0.794392523364486,
"eval_loss": 0.43829986453056335,
"eval_runtime": 1.968,
"eval_samples_per_second": 54.371,
"eval_steps_per_second": 3.557,
"step": 264
},
{
"epoch": 33.75,
"learning_rate": 8.680555555555556e-06,
"loss": 0.2951,
"step": 270
},
{
"epoch": 34.0,
"eval_accuracy": 0.794392523364486,
"eval_loss": 0.4739982783794403,
"eval_runtime": 1.9943,
"eval_samples_per_second": 53.653,
"eval_steps_per_second": 3.51,
"step": 272
},
{
"epoch": 35.0,
"learning_rate": 6.944444444444445e-06,
"loss": 0.3094,
"step": 280
},
{
"epoch": 35.0,
"eval_accuracy": 0.7663551401869159,
"eval_loss": 0.5863447785377502,
"eval_runtime": 1.9985,
"eval_samples_per_second": 53.541,
"eval_steps_per_second": 3.503,
"step": 280
},
{
"epoch": 36.0,
"eval_accuracy": 0.7850467289719626,
"eval_loss": 0.4483156204223633,
"eval_runtime": 1.98,
"eval_samples_per_second": 54.041,
"eval_steps_per_second": 3.535,
"step": 288
},
{
"epoch": 36.25,
"learning_rate": 5.208333333333334e-06,
"loss": 0.2963,
"step": 290
},
{
"epoch": 37.0,
"eval_accuracy": 0.794392523364486,
"eval_loss": 0.4758929908275604,
"eval_runtime": 1.989,
"eval_samples_per_second": 53.797,
"eval_steps_per_second": 3.519,
"step": 296
},
{
"epoch": 37.5,
"learning_rate": 3.4722222222222224e-06,
"loss": 0.3045,
"step": 300
},
{
"epoch": 38.0,
"eval_accuracy": 0.794392523364486,
"eval_loss": 0.446876585483551,
"eval_runtime": 2.006,
"eval_samples_per_second": 53.341,
"eval_steps_per_second": 3.49,
"step": 304
},
{
"epoch": 38.75,
"learning_rate": 1.7361111111111112e-06,
"loss": 0.2739,
"step": 310
},
{
"epoch": 39.0,
"eval_accuracy": 0.7850467289719626,
"eval_loss": 0.4517090618610382,
"eval_runtime": 1.971,
"eval_samples_per_second": 54.288,
"eval_steps_per_second": 3.552,
"step": 312
},
{
"epoch": 40.0,
"learning_rate": 0.0,
"loss": 0.2717,
"step": 320
},
{
"epoch": 40.0,
"eval_accuracy": 0.794392523364486,
"eval_loss": 0.4654483497142792,
"eval_runtime": 2.08,
"eval_samples_per_second": 51.442,
"eval_steps_per_second": 3.365,
"step": 320
},
{
"epoch": 40.0,
"step": 320,
"total_flos": 1.5429806632629043e+18,
"train_loss": 0.4979598492383957,
"train_runtime": 690.9933,
"train_samples_per_second": 28.828,
"train_steps_per_second": 0.463
}
],
"logging_steps": 10,
"max_steps": 320,
"num_input_tokens_seen": 0,
"num_train_epochs": 40,
"save_steps": 500,
"total_flos": 1.5429806632629043e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}