RobertoSonic's picture
End of training
7176992 verified
{
"best_metric": 0.958904109589041,
"best_model_checkpoint": "swinv2-tiny-patch4-window8-256-dmae-humeda-DAV3/checkpoint-525",
"epoch": 28.585365853658537,
"eval_steps": 500,
"global_step": 600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.4878048780487805,
"grad_norm": 12.429728507995605,
"learning_rate": 9.999999999999999e-06,
"loss": 4.7086,
"step": 10
},
{
"epoch": 0.975609756097561,
"grad_norm": 15.98269271850586,
"learning_rate": 1.9999999999999998e-05,
"loss": 3.9845,
"step": 20
},
{
"epoch": 1.0,
"eval_accuracy": 0.3424657534246575,
"eval_loss": 1.6832486391067505,
"eval_runtime": 0.6873,
"eval_samples_per_second": 106.209,
"eval_steps_per_second": 7.275,
"step": 21
},
{
"epoch": 1.4390243902439024,
"grad_norm": 19.85782814025879,
"learning_rate": 3e-05,
"loss": 2.9173,
"step": 30
},
{
"epoch": 1.9268292682926829,
"grad_norm": 23.9186954498291,
"learning_rate": 2.9473684210526314e-05,
"loss": 2.4369,
"step": 40
},
{
"epoch": 2.0,
"eval_accuracy": 0.4383561643835616,
"eval_loss": 1.1981052160263062,
"eval_runtime": 0.7433,
"eval_samples_per_second": 98.212,
"eval_steps_per_second": 6.727,
"step": 42
},
{
"epoch": 2.3902439024390243,
"grad_norm": 39.31980895996094,
"learning_rate": 2.8947368421052634e-05,
"loss": 1.9976,
"step": 50
},
{
"epoch": 2.8780487804878048,
"grad_norm": 34.83562469482422,
"learning_rate": 2.8421052631578946e-05,
"loss": 1.7752,
"step": 60
},
{
"epoch": 3.0,
"eval_accuracy": 0.6301369863013698,
"eval_loss": 0.8411616086959839,
"eval_runtime": 0.7016,
"eval_samples_per_second": 104.055,
"eval_steps_per_second": 7.127,
"step": 63
},
{
"epoch": 3.341463414634146,
"grad_norm": 24.540483474731445,
"learning_rate": 2.7894736842105263e-05,
"loss": 1.5326,
"step": 70
},
{
"epoch": 3.8292682926829267,
"grad_norm": 33.162715911865234,
"learning_rate": 2.736842105263158e-05,
"loss": 1.3772,
"step": 80
},
{
"epoch": 4.0,
"eval_accuracy": 0.7123287671232876,
"eval_loss": 0.7895165681838989,
"eval_runtime": 0.6266,
"eval_samples_per_second": 116.511,
"eval_steps_per_second": 7.98,
"step": 84
},
{
"epoch": 4.2926829268292686,
"grad_norm": 21.98711585998535,
"learning_rate": 2.6842105263157896e-05,
"loss": 1.414,
"step": 90
},
{
"epoch": 4.780487804878049,
"grad_norm": 27.87204360961914,
"learning_rate": 2.631578947368421e-05,
"loss": 1.1556,
"step": 100
},
{
"epoch": 5.0,
"eval_accuracy": 0.7808219178082192,
"eval_loss": 0.7384896874427795,
"eval_runtime": 0.6327,
"eval_samples_per_second": 115.385,
"eval_steps_per_second": 7.903,
"step": 105
},
{
"epoch": 5.2439024390243905,
"grad_norm": 24.45848274230957,
"learning_rate": 2.578947368421053e-05,
"loss": 1.0818,
"step": 110
},
{
"epoch": 5.7317073170731705,
"grad_norm": 16.963436126708984,
"learning_rate": 2.526315789473684e-05,
"loss": 1.0059,
"step": 120
},
{
"epoch": 6.0,
"eval_accuracy": 0.8082191780821918,
"eval_loss": 0.6626368165016174,
"eval_runtime": 0.6356,
"eval_samples_per_second": 114.844,
"eval_steps_per_second": 7.866,
"step": 126
},
{
"epoch": 6.195121951219512,
"grad_norm": 45.89384460449219,
"learning_rate": 2.4736842105263158e-05,
"loss": 0.9054,
"step": 130
},
{
"epoch": 6.682926829268292,
"grad_norm": 27.633718490600586,
"learning_rate": 2.4210526315789474e-05,
"loss": 0.8598,
"step": 140
},
{
"epoch": 7.0,
"eval_accuracy": 0.7808219178082192,
"eval_loss": 0.5402742624282837,
"eval_runtime": 0.6334,
"eval_samples_per_second": 115.247,
"eval_steps_per_second": 7.894,
"step": 147
},
{
"epoch": 7.146341463414634,
"grad_norm": 25.943758010864258,
"learning_rate": 2.368421052631579e-05,
"loss": 0.8268,
"step": 150
},
{
"epoch": 7.634146341463414,
"grad_norm": 37.037078857421875,
"learning_rate": 2.3157894736842103e-05,
"loss": 0.8724,
"step": 160
},
{
"epoch": 8.0,
"eval_accuracy": 0.821917808219178,
"eval_loss": 0.5519894361495972,
"eval_runtime": 0.6379,
"eval_samples_per_second": 114.432,
"eval_steps_per_second": 7.838,
"step": 168
},
{
"epoch": 8.097560975609756,
"grad_norm": 22.22051429748535,
"learning_rate": 2.2631578947368423e-05,
"loss": 0.7427,
"step": 170
},
{
"epoch": 8.585365853658537,
"grad_norm": 33.7209587097168,
"learning_rate": 2.2105263157894736e-05,
"loss": 0.7096,
"step": 180
},
{
"epoch": 9.0,
"eval_accuracy": 0.8356164383561644,
"eval_loss": 0.5182141661643982,
"eval_runtime": 0.7436,
"eval_samples_per_second": 98.169,
"eval_steps_per_second": 6.724,
"step": 189
},
{
"epoch": 9.048780487804878,
"grad_norm": 17.677778244018555,
"learning_rate": 2.1578947368421053e-05,
"loss": 0.5608,
"step": 190
},
{
"epoch": 9.536585365853659,
"grad_norm": 34.34571075439453,
"learning_rate": 2.105263157894737e-05,
"loss": 0.5748,
"step": 200
},
{
"epoch": 10.0,
"grad_norm": 9.038191795349121,
"learning_rate": 2.0526315789473685e-05,
"loss": 0.5038,
"step": 210
},
{
"epoch": 10.0,
"eval_accuracy": 0.8493150684931506,
"eval_loss": 0.4132954180240631,
"eval_runtime": 0.6251,
"eval_samples_per_second": 116.788,
"eval_steps_per_second": 7.999,
"step": 210
},
{
"epoch": 10.487804878048781,
"grad_norm": 31.543596267700195,
"learning_rate": 1.9999999999999998e-05,
"loss": 0.5488,
"step": 220
},
{
"epoch": 10.975609756097562,
"grad_norm": 23.798019409179688,
"learning_rate": 1.9473684210526318e-05,
"loss": 0.4951,
"step": 230
},
{
"epoch": 11.0,
"eval_accuracy": 0.8767123287671232,
"eval_loss": 0.3548040986061096,
"eval_runtime": 0.6389,
"eval_samples_per_second": 114.257,
"eval_steps_per_second": 7.826,
"step": 231
},
{
"epoch": 11.439024390243903,
"grad_norm": 16.509632110595703,
"learning_rate": 1.894736842105263e-05,
"loss": 0.5018,
"step": 240
},
{
"epoch": 11.926829268292684,
"grad_norm": 17.9029598236084,
"learning_rate": 1.8421052631578947e-05,
"loss": 0.4692,
"step": 250
},
{
"epoch": 12.0,
"eval_accuracy": 0.8493150684931506,
"eval_loss": 0.38450247049331665,
"eval_runtime": 0.6355,
"eval_samples_per_second": 114.878,
"eval_steps_per_second": 7.868,
"step": 252
},
{
"epoch": 12.390243902439025,
"grad_norm": 29.795612335205078,
"learning_rate": 1.7894736842105264e-05,
"loss": 0.5316,
"step": 260
},
{
"epoch": 12.878048780487806,
"grad_norm": 14.258842468261719,
"learning_rate": 1.736842105263158e-05,
"loss": 0.5339,
"step": 270
},
{
"epoch": 13.0,
"eval_accuracy": 0.8904109589041096,
"eval_loss": 0.3178386390209198,
"eval_runtime": 0.6456,
"eval_samples_per_second": 113.071,
"eval_steps_per_second": 7.745,
"step": 273
},
{
"epoch": 13.341463414634147,
"grad_norm": 28.944801330566406,
"learning_rate": 1.6842105263157893e-05,
"loss": 0.5021,
"step": 280
},
{
"epoch": 13.829268292682928,
"grad_norm": 19.169776916503906,
"learning_rate": 1.6315789473684213e-05,
"loss": 0.4536,
"step": 290
},
{
"epoch": 14.0,
"eval_accuracy": 0.8904109589041096,
"eval_loss": 0.3252336084842682,
"eval_runtime": 0.6543,
"eval_samples_per_second": 111.574,
"eval_steps_per_second": 7.642,
"step": 294
},
{
"epoch": 14.292682926829269,
"grad_norm": 19.354726791381836,
"learning_rate": 1.5789473684210526e-05,
"loss": 0.3055,
"step": 300
},
{
"epoch": 14.78048780487805,
"grad_norm": 18.675071716308594,
"learning_rate": 1.5263157894736842e-05,
"loss": 0.4369,
"step": 310
},
{
"epoch": 15.0,
"eval_accuracy": 0.8904109589041096,
"eval_loss": 0.27849265933036804,
"eval_runtime": 0.6484,
"eval_samples_per_second": 112.585,
"eval_steps_per_second": 7.711,
"step": 315
},
{
"epoch": 15.24390243902439,
"grad_norm": 29.19399642944336,
"learning_rate": 1.4736842105263157e-05,
"loss": 0.447,
"step": 320
},
{
"epoch": 15.731707317073171,
"grad_norm": 5.669158935546875,
"learning_rate": 1.4210526315789473e-05,
"loss": 0.3941,
"step": 330
},
{
"epoch": 16.0,
"eval_accuracy": 0.9041095890410958,
"eval_loss": 0.28995245695114136,
"eval_runtime": 0.6348,
"eval_samples_per_second": 114.996,
"eval_steps_per_second": 7.876,
"step": 336
},
{
"epoch": 16.195121951219512,
"grad_norm": 21.81118392944336,
"learning_rate": 1.368421052631579e-05,
"loss": 0.3228,
"step": 340
},
{
"epoch": 16.682926829268293,
"grad_norm": 34.80079650878906,
"learning_rate": 1.3157894736842104e-05,
"loss": 0.4363,
"step": 350
},
{
"epoch": 17.0,
"eval_accuracy": 0.863013698630137,
"eval_loss": 0.3426441252231598,
"eval_runtime": 0.6573,
"eval_samples_per_second": 111.053,
"eval_steps_per_second": 7.606,
"step": 357
},
{
"epoch": 17.146341463414632,
"grad_norm": 25.49156379699707,
"learning_rate": 1.263157894736842e-05,
"loss": 0.3729,
"step": 360
},
{
"epoch": 17.634146341463413,
"grad_norm": 25.156068801879883,
"learning_rate": 1.2105263157894737e-05,
"loss": 0.2819,
"step": 370
},
{
"epoch": 18.0,
"eval_accuracy": 0.9041095890410958,
"eval_loss": 0.283920019865036,
"eval_runtime": 0.7572,
"eval_samples_per_second": 96.41,
"eval_steps_per_second": 6.603,
"step": 378
},
{
"epoch": 18.097560975609756,
"grad_norm": 36.39301300048828,
"learning_rate": 1.1578947368421052e-05,
"loss": 0.3253,
"step": 380
},
{
"epoch": 18.585365853658537,
"grad_norm": 24.839868545532227,
"learning_rate": 1.1052631578947368e-05,
"loss": 0.361,
"step": 390
},
{
"epoch": 19.0,
"eval_accuracy": 0.9041095890410958,
"eval_loss": 0.22234712541103363,
"eval_runtime": 0.7512,
"eval_samples_per_second": 97.175,
"eval_steps_per_second": 6.656,
"step": 399
},
{
"epoch": 19.048780487804876,
"grad_norm": 22.352935791015625,
"learning_rate": 1.0526315789473684e-05,
"loss": 0.3204,
"step": 400
},
{
"epoch": 19.536585365853657,
"grad_norm": 12.527485847473145,
"learning_rate": 9.999999999999999e-06,
"loss": 0.2965,
"step": 410
},
{
"epoch": 20.0,
"grad_norm": 16.140165328979492,
"learning_rate": 9.473684210526315e-06,
"loss": 0.1857,
"step": 420
},
{
"epoch": 20.0,
"eval_accuracy": 0.9178082191780822,
"eval_loss": 0.25217577815055847,
"eval_runtime": 0.6292,
"eval_samples_per_second": 116.026,
"eval_steps_per_second": 7.947,
"step": 420
},
{
"epoch": 20.48780487804878,
"grad_norm": 12.794975280761719,
"learning_rate": 8.947368421052632e-06,
"loss": 0.1904,
"step": 430
},
{
"epoch": 20.975609756097562,
"grad_norm": 32.452125549316406,
"learning_rate": 8.421052631578947e-06,
"loss": 0.3161,
"step": 440
},
{
"epoch": 21.0,
"eval_accuracy": 0.9178082191780822,
"eval_loss": 0.21637919545173645,
"eval_runtime": 0.6487,
"eval_samples_per_second": 112.528,
"eval_steps_per_second": 7.707,
"step": 441
},
{
"epoch": 21.4390243902439,
"grad_norm": 17.289514541625977,
"learning_rate": 7.894736842105263e-06,
"loss": 0.2523,
"step": 450
},
{
"epoch": 21.926829268292682,
"grad_norm": 42.501861572265625,
"learning_rate": 7.3684210526315784e-06,
"loss": 0.3273,
"step": 460
},
{
"epoch": 22.0,
"eval_accuracy": 0.9315068493150684,
"eval_loss": 0.22238127887248993,
"eval_runtime": 0.6384,
"eval_samples_per_second": 114.34,
"eval_steps_per_second": 7.831,
"step": 462
},
{
"epoch": 22.390243902439025,
"grad_norm": 32.69398498535156,
"learning_rate": 6.842105263157895e-06,
"loss": 0.1996,
"step": 470
},
{
"epoch": 22.878048780487806,
"grad_norm": 21.498504638671875,
"learning_rate": 6.31578947368421e-06,
"loss": 0.3458,
"step": 480
},
{
"epoch": 23.0,
"eval_accuracy": 0.9452054794520548,
"eval_loss": 0.21988777816295624,
"eval_runtime": 0.7497,
"eval_samples_per_second": 97.368,
"eval_steps_per_second": 6.669,
"step": 483
},
{
"epoch": 23.341463414634145,
"grad_norm": 8.661595344543457,
"learning_rate": 5.789473684210526e-06,
"loss": 0.2248,
"step": 490
},
{
"epoch": 23.829268292682926,
"grad_norm": 23.505203247070312,
"learning_rate": 5.263157894736842e-06,
"loss": 0.337,
"step": 500
},
{
"epoch": 24.0,
"eval_accuracy": 0.9315068493150684,
"eval_loss": 0.23766528069972992,
"eval_runtime": 0.768,
"eval_samples_per_second": 95.058,
"eval_steps_per_second": 6.511,
"step": 504
},
{
"epoch": 24.29268292682927,
"grad_norm": 13.873770713806152,
"learning_rate": 4.736842105263158e-06,
"loss": 0.201,
"step": 510
},
{
"epoch": 24.78048780487805,
"grad_norm": 16.176956176757812,
"learning_rate": 4.210526315789473e-06,
"loss": 0.1801,
"step": 520
},
{
"epoch": 25.0,
"eval_accuracy": 0.958904109589041,
"eval_loss": 0.20672719180583954,
"eval_runtime": 0.6363,
"eval_samples_per_second": 114.732,
"eval_steps_per_second": 7.858,
"step": 525
},
{
"epoch": 25.24390243902439,
"grad_norm": 39.25809097290039,
"learning_rate": 3.6842105263157892e-06,
"loss": 0.2077,
"step": 530
},
{
"epoch": 25.73170731707317,
"grad_norm": 46.6181755065918,
"learning_rate": 3.157894736842105e-06,
"loss": 0.3283,
"step": 540
},
{
"epoch": 26.0,
"eval_accuracy": 0.9315068493150684,
"eval_loss": 0.24006159603595734,
"eval_runtime": 0.6339,
"eval_samples_per_second": 115.158,
"eval_steps_per_second": 7.888,
"step": 546
},
{
"epoch": 26.195121951219512,
"grad_norm": 10.685463905334473,
"learning_rate": 2.631578947368421e-06,
"loss": 0.3054,
"step": 550
},
{
"epoch": 26.682926829268293,
"grad_norm": 28.955364227294922,
"learning_rate": 2.1052631578947366e-06,
"loss": 0.2211,
"step": 560
},
{
"epoch": 27.0,
"eval_accuracy": 0.9315068493150684,
"eval_loss": 0.21667610108852386,
"eval_runtime": 0.6327,
"eval_samples_per_second": 115.37,
"eval_steps_per_second": 7.902,
"step": 567
},
{
"epoch": 27.146341463414632,
"grad_norm": 20.726945877075195,
"learning_rate": 1.5789473684210526e-06,
"loss": 0.2556,
"step": 570
},
{
"epoch": 27.634146341463413,
"grad_norm": 6.582526206970215,
"learning_rate": 1.0526315789473683e-06,
"loss": 0.1783,
"step": 580
},
{
"epoch": 28.0,
"eval_accuracy": 0.9315068493150684,
"eval_loss": 0.21801576018333435,
"eval_runtime": 0.6499,
"eval_samples_per_second": 112.334,
"eval_steps_per_second": 7.694,
"step": 588
},
{
"epoch": 28.097560975609756,
"grad_norm": 16.22028923034668,
"learning_rate": 5.263157894736842e-07,
"loss": 0.1982,
"step": 590
},
{
"epoch": 28.585365853658537,
"grad_norm": 15.770591735839844,
"learning_rate": 0.0,
"loss": 0.2783,
"step": 600
},
{
"epoch": 28.585365853658537,
"eval_accuracy": 0.9315068493150684,
"eval_loss": 0.22231744229793549,
"eval_runtime": 0.8149,
"eval_samples_per_second": 89.581,
"eval_steps_per_second": 6.136,
"step": 600
},
{
"epoch": 28.585365853658537,
"step": 600,
"total_flos": 6.102198151010058e+17,
"train_loss": 0.7502146526177724,
"train_runtime": 483.2384,
"train_samples_per_second": 40.725,
"train_steps_per_second": 1.242
}
],
"logging_steps": 10,
"max_steps": 600,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.102198151010058e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}