Augusto777's picture
End of training
9ce8871 verified
{
"best_metric": 0.7608695652173914,
"best_model_checkpoint": "swinv2-tiny-patch4-window8-256-DMAE-da2-colab/checkpoint-180",
"epoch": 39.108695652173914,
"eval_steps": 500,
"global_step": 440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.8695652173913043,
"grad_norm": 22.860248565673828,
"learning_rate": 9.090909090909091e-06,
"loss": 1.4149,
"step": 10
},
{
"epoch": 0.9565217391304348,
"eval_accuracy": 0.21739130434782608,
"eval_loss": 1.3904842138290405,
"eval_runtime": 0.7623,
"eval_samples_per_second": 60.346,
"eval_steps_per_second": 3.936,
"step": 11
},
{
"epoch": 1.7608695652173914,
"grad_norm": 21.9100341796875,
"learning_rate": 1.8181818181818182e-05,
"loss": 1.3431,
"step": 20
},
{
"epoch": 1.9347826086956523,
"eval_accuracy": 0.30434782608695654,
"eval_loss": 1.3828414678573608,
"eval_runtime": 0.7435,
"eval_samples_per_second": 61.87,
"eval_steps_per_second": 4.035,
"step": 22
},
{
"epoch": 2.6521739130434785,
"grad_norm": 25.524051666259766,
"learning_rate": 2.7272727272727273e-05,
"loss": 1.2396,
"step": 30
},
{
"epoch": 2.9130434782608696,
"eval_accuracy": 0.43478260869565216,
"eval_loss": 1.2675390243530273,
"eval_runtime": 0.7435,
"eval_samples_per_second": 61.871,
"eval_steps_per_second": 4.035,
"step": 33
},
{
"epoch": 3.5434782608695654,
"grad_norm": 39.9882926940918,
"learning_rate": 3.6363636363636364e-05,
"loss": 1.1377,
"step": 40
},
{
"epoch": 3.9782608695652173,
"eval_accuracy": 0.34782608695652173,
"eval_loss": 1.2067060470581055,
"eval_runtime": 1.0097,
"eval_samples_per_second": 45.56,
"eval_steps_per_second": 2.971,
"step": 45
},
{
"epoch": 4.434782608695652,
"grad_norm": 32.264034271240234,
"learning_rate": 3.93939393939394e-05,
"loss": 1.0144,
"step": 50
},
{
"epoch": 4.956521739130435,
"eval_accuracy": 0.6086956521739131,
"eval_loss": 0.9060260057449341,
"eval_runtime": 0.7487,
"eval_samples_per_second": 61.443,
"eval_steps_per_second": 4.007,
"step": 56
},
{
"epoch": 5.326086956521739,
"grad_norm": 38.01457977294922,
"learning_rate": 3.838383838383839e-05,
"loss": 0.9016,
"step": 60
},
{
"epoch": 5.934782608695652,
"eval_accuracy": 0.6739130434782609,
"eval_loss": 0.8024644255638123,
"eval_runtime": 0.7779,
"eval_samples_per_second": 59.132,
"eval_steps_per_second": 3.856,
"step": 67
},
{
"epoch": 6.217391304347826,
"grad_norm": 36.355648040771484,
"learning_rate": 3.7373737373737376e-05,
"loss": 0.7941,
"step": 70
},
{
"epoch": 6.913043478260869,
"eval_accuracy": 0.6956521739130435,
"eval_loss": 0.7811539769172668,
"eval_runtime": 0.8396,
"eval_samples_per_second": 54.787,
"eval_steps_per_second": 3.573,
"step": 78
},
{
"epoch": 7.108695652173913,
"grad_norm": 41.1641845703125,
"learning_rate": 3.6363636363636364e-05,
"loss": 0.7031,
"step": 80
},
{
"epoch": 7.978260869565218,
"grad_norm": 51.79063415527344,
"learning_rate": 3.535353535353536e-05,
"loss": 0.6986,
"step": 90
},
{
"epoch": 7.978260869565218,
"eval_accuracy": 0.5869565217391305,
"eval_loss": 0.9441068768501282,
"eval_runtime": 0.7666,
"eval_samples_per_second": 60.006,
"eval_steps_per_second": 3.913,
"step": 90
},
{
"epoch": 8.869565217391305,
"grad_norm": 71.17655181884766,
"learning_rate": 3.434343434343435e-05,
"loss": 0.6245,
"step": 100
},
{
"epoch": 8.956521739130435,
"eval_accuracy": 0.6956521739130435,
"eval_loss": 0.864142894744873,
"eval_runtime": 0.7557,
"eval_samples_per_second": 60.867,
"eval_steps_per_second": 3.97,
"step": 101
},
{
"epoch": 9.76086956521739,
"grad_norm": 41.29151916503906,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.6044,
"step": 110
},
{
"epoch": 9.934782608695652,
"eval_accuracy": 0.6086956521739131,
"eval_loss": 0.8647807836532593,
"eval_runtime": 0.7668,
"eval_samples_per_second": 59.987,
"eval_steps_per_second": 3.912,
"step": 112
},
{
"epoch": 10.652173913043478,
"grad_norm": 35.93144989013672,
"learning_rate": 3.232323232323232e-05,
"loss": 0.536,
"step": 120
},
{
"epoch": 10.91304347826087,
"eval_accuracy": 0.5869565217391305,
"eval_loss": 0.8800004124641418,
"eval_runtime": 0.7619,
"eval_samples_per_second": 60.377,
"eval_steps_per_second": 3.938,
"step": 123
},
{
"epoch": 11.543478260869565,
"grad_norm": 42.50069808959961,
"learning_rate": 3.131313131313132e-05,
"loss": 0.4825,
"step": 130
},
{
"epoch": 11.978260869565217,
"eval_accuracy": 0.7391304347826086,
"eval_loss": 0.8387995958328247,
"eval_runtime": 0.7698,
"eval_samples_per_second": 59.754,
"eval_steps_per_second": 3.897,
"step": 135
},
{
"epoch": 12.434782608695652,
"grad_norm": 82.49532318115234,
"learning_rate": 3.0303030303030306e-05,
"loss": 0.4972,
"step": 140
},
{
"epoch": 12.956521739130435,
"eval_accuracy": 0.717391304347826,
"eval_loss": 0.8763275146484375,
"eval_runtime": 0.7588,
"eval_samples_per_second": 60.62,
"eval_steps_per_second": 3.953,
"step": 146
},
{
"epoch": 13.326086956521738,
"grad_norm": 44.642826080322266,
"learning_rate": 2.9292929292929297e-05,
"loss": 0.4284,
"step": 150
},
{
"epoch": 13.934782608695652,
"eval_accuracy": 0.6956521739130435,
"eval_loss": 0.8228161931037903,
"eval_runtime": 1.0261,
"eval_samples_per_second": 44.832,
"eval_steps_per_second": 2.924,
"step": 157
},
{
"epoch": 14.217391304347826,
"grad_norm": 57.17488479614258,
"learning_rate": 2.8282828282828285e-05,
"loss": 0.3961,
"step": 160
},
{
"epoch": 14.91304347826087,
"eval_accuracy": 0.717391304347826,
"eval_loss": 0.8260169625282288,
"eval_runtime": 0.8274,
"eval_samples_per_second": 55.597,
"eval_steps_per_second": 3.626,
"step": 168
},
{
"epoch": 15.108695652173912,
"grad_norm": 31.25395393371582,
"learning_rate": 2.7272727272727273e-05,
"loss": 0.4208,
"step": 170
},
{
"epoch": 15.978260869565217,
"grad_norm": 42.941837310791016,
"learning_rate": 2.6262626262626265e-05,
"loss": 0.3877,
"step": 180
},
{
"epoch": 15.978260869565217,
"eval_accuracy": 0.7608695652173914,
"eval_loss": 0.9367595911026001,
"eval_runtime": 0.7841,
"eval_samples_per_second": 58.667,
"eval_steps_per_second": 3.826,
"step": 180
},
{
"epoch": 16.869565217391305,
"grad_norm": 72.04235076904297,
"learning_rate": 2.5252525252525253e-05,
"loss": 0.3744,
"step": 190
},
{
"epoch": 16.956521739130434,
"eval_accuracy": 0.6304347826086957,
"eval_loss": 1.1220568418502808,
"eval_runtime": 1.0796,
"eval_samples_per_second": 42.606,
"eval_steps_per_second": 2.779,
"step": 191
},
{
"epoch": 17.76086956521739,
"grad_norm": 43.762760162353516,
"learning_rate": 2.4242424242424244e-05,
"loss": 0.3266,
"step": 200
},
{
"epoch": 17.934782608695652,
"eval_accuracy": 0.6739130434782609,
"eval_loss": 1.0177233219146729,
"eval_runtime": 0.7517,
"eval_samples_per_second": 61.193,
"eval_steps_per_second": 3.991,
"step": 202
},
{
"epoch": 18.652173913043477,
"grad_norm": 76.85489654541016,
"learning_rate": 2.3232323232323232e-05,
"loss": 0.3257,
"step": 210
},
{
"epoch": 18.91304347826087,
"eval_accuracy": 0.6956521739130435,
"eval_loss": 1.0300357341766357,
"eval_runtime": 0.7634,
"eval_samples_per_second": 60.254,
"eval_steps_per_second": 3.93,
"step": 213
},
{
"epoch": 19.543478260869566,
"grad_norm": 40.13884353637695,
"learning_rate": 2.2222222222222227e-05,
"loss": 0.3164,
"step": 220
},
{
"epoch": 19.97826086956522,
"eval_accuracy": 0.6956521739130435,
"eval_loss": 1.134386658668518,
"eval_runtime": 0.8709,
"eval_samples_per_second": 52.817,
"eval_steps_per_second": 3.445,
"step": 225
},
{
"epoch": 20.434782608695652,
"grad_norm": 50.51103591918945,
"learning_rate": 2.121212121212121e-05,
"loss": 0.2965,
"step": 230
},
{
"epoch": 20.956521739130434,
"eval_accuracy": 0.7391304347826086,
"eval_loss": 0.9283356070518494,
"eval_runtime": 0.7697,
"eval_samples_per_second": 59.76,
"eval_steps_per_second": 3.897,
"step": 236
},
{
"epoch": 21.32608695652174,
"grad_norm": 40.85714340209961,
"learning_rate": 2.0202020202020206e-05,
"loss": 0.293,
"step": 240
},
{
"epoch": 21.934782608695652,
"eval_accuracy": 0.6956521739130435,
"eval_loss": 1.012825608253479,
"eval_runtime": 0.7852,
"eval_samples_per_second": 58.582,
"eval_steps_per_second": 3.821,
"step": 247
},
{
"epoch": 22.217391304347824,
"grad_norm": 31.803544998168945,
"learning_rate": 1.9191919191919194e-05,
"loss": 0.2929,
"step": 250
},
{
"epoch": 22.91304347826087,
"eval_accuracy": 0.7608695652173914,
"eval_loss": 1.0449855327606201,
"eval_runtime": 1.4131,
"eval_samples_per_second": 32.553,
"eval_steps_per_second": 2.123,
"step": 258
},
{
"epoch": 23.108695652173914,
"grad_norm": 44.02550506591797,
"learning_rate": 1.8181818181818182e-05,
"loss": 0.2661,
"step": 260
},
{
"epoch": 23.97826086956522,
"grad_norm": 62.81007766723633,
"learning_rate": 1.7171717171717173e-05,
"loss": 0.2878,
"step": 270
},
{
"epoch": 23.97826086956522,
"eval_accuracy": 0.717391304347826,
"eval_loss": 1.1481518745422363,
"eval_runtime": 0.7494,
"eval_samples_per_second": 61.383,
"eval_steps_per_second": 4.003,
"step": 270
},
{
"epoch": 24.869565217391305,
"grad_norm": 46.08440017700195,
"learning_rate": 1.616161616161616e-05,
"loss": 0.2447,
"step": 280
},
{
"epoch": 24.956521739130434,
"eval_accuracy": 0.717391304347826,
"eval_loss": 1.0715558528900146,
"eval_runtime": 0.7605,
"eval_samples_per_second": 60.483,
"eval_steps_per_second": 3.945,
"step": 281
},
{
"epoch": 25.76086956521739,
"grad_norm": 65.74066162109375,
"learning_rate": 1.5151515151515153e-05,
"loss": 0.2601,
"step": 290
},
{
"epoch": 25.934782608695652,
"eval_accuracy": 0.6956521739130435,
"eval_loss": 1.0769968032836914,
"eval_runtime": 0.9896,
"eval_samples_per_second": 46.483,
"eval_steps_per_second": 3.032,
"step": 292
},
{
"epoch": 26.652173913043477,
"grad_norm": 39.677207946777344,
"learning_rate": 1.4141414141414143e-05,
"loss": 0.2299,
"step": 300
},
{
"epoch": 26.91304347826087,
"eval_accuracy": 0.7391304347826086,
"eval_loss": 1.176945686340332,
"eval_runtime": 1.0303,
"eval_samples_per_second": 44.647,
"eval_steps_per_second": 2.912,
"step": 303
},
{
"epoch": 27.543478260869566,
"grad_norm": 51.47335433959961,
"learning_rate": 1.3131313131313132e-05,
"loss": 0.2401,
"step": 310
},
{
"epoch": 27.97826086956522,
"eval_accuracy": 0.717391304347826,
"eval_loss": 1.140651822090149,
"eval_runtime": 0.7578,
"eval_samples_per_second": 60.699,
"eval_steps_per_second": 3.959,
"step": 315
},
{
"epoch": 28.434782608695652,
"grad_norm": 35.28546905517578,
"learning_rate": 1.2121212121212122e-05,
"loss": 0.2347,
"step": 320
},
{
"epoch": 28.956521739130434,
"eval_accuracy": 0.6956521739130435,
"eval_loss": 1.1928998231887817,
"eval_runtime": 0.7598,
"eval_samples_per_second": 60.544,
"eval_steps_per_second": 3.949,
"step": 326
},
{
"epoch": 29.32608695652174,
"grad_norm": 34.29880905151367,
"learning_rate": 1.1111111111111113e-05,
"loss": 0.2584,
"step": 330
},
{
"epoch": 29.934782608695652,
"eval_accuracy": 0.6739130434782609,
"eval_loss": 1.095699667930603,
"eval_runtime": 0.7593,
"eval_samples_per_second": 60.584,
"eval_steps_per_second": 3.951,
"step": 337
},
{
"epoch": 30.217391304347824,
"grad_norm": 35.58168411254883,
"learning_rate": 1.0101010101010103e-05,
"loss": 0.2204,
"step": 340
},
{
"epoch": 30.91304347826087,
"eval_accuracy": 0.6739130434782609,
"eval_loss": 1.172129511833191,
"eval_runtime": 1.009,
"eval_samples_per_second": 45.59,
"eval_steps_per_second": 2.973,
"step": 348
},
{
"epoch": 31.108695652173914,
"grad_norm": 40.2222900390625,
"learning_rate": 9.090909090909091e-06,
"loss": 0.2057,
"step": 350
},
{
"epoch": 31.97826086956522,
"grad_norm": 61.76468276977539,
"learning_rate": 8.08080808080808e-06,
"loss": 0.2031,
"step": 360
},
{
"epoch": 31.97826086956522,
"eval_accuracy": 0.6739130434782609,
"eval_loss": 1.0842841863632202,
"eval_runtime": 0.7718,
"eval_samples_per_second": 59.603,
"eval_steps_per_second": 3.887,
"step": 360
},
{
"epoch": 32.869565217391305,
"grad_norm": 50.13585662841797,
"learning_rate": 7.070707070707071e-06,
"loss": 0.2241,
"step": 370
},
{
"epoch": 32.95652173913044,
"eval_accuracy": 0.6956521739130435,
"eval_loss": 1.1350224018096924,
"eval_runtime": 0.7479,
"eval_samples_per_second": 61.506,
"eval_steps_per_second": 4.011,
"step": 371
},
{
"epoch": 33.76086956521739,
"grad_norm": 45.946346282958984,
"learning_rate": 6.060606060606061e-06,
"loss": 0.1798,
"step": 380
},
{
"epoch": 33.93478260869565,
"eval_accuracy": 0.6956521739130435,
"eval_loss": 1.2418912649154663,
"eval_runtime": 0.8117,
"eval_samples_per_second": 56.668,
"eval_steps_per_second": 3.696,
"step": 382
},
{
"epoch": 34.65217391304348,
"grad_norm": 53.28645324707031,
"learning_rate": 5.0505050505050515e-06,
"loss": 0.2435,
"step": 390
},
{
"epoch": 34.91304347826087,
"eval_accuracy": 0.6956521739130435,
"eval_loss": 1.1522233486175537,
"eval_runtime": 1.0835,
"eval_samples_per_second": 42.454,
"eval_steps_per_second": 2.769,
"step": 393
},
{
"epoch": 35.54347826086956,
"grad_norm": 119.8594970703125,
"learning_rate": 4.04040404040404e-06,
"loss": 0.1857,
"step": 400
},
{
"epoch": 35.97826086956522,
"eval_accuracy": 0.6956521739130435,
"eval_loss": 1.1206859350204468,
"eval_runtime": 0.8139,
"eval_samples_per_second": 56.519,
"eval_steps_per_second": 3.686,
"step": 405
},
{
"epoch": 36.43478260869565,
"grad_norm": 62.90666198730469,
"learning_rate": 3.0303030303030305e-06,
"loss": 0.1889,
"step": 410
},
{
"epoch": 36.95652173913044,
"eval_accuracy": 0.6956521739130435,
"eval_loss": 1.1711089611053467,
"eval_runtime": 0.8068,
"eval_samples_per_second": 57.018,
"eval_steps_per_second": 3.719,
"step": 416
},
{
"epoch": 37.32608695652174,
"grad_norm": 36.55961608886719,
"learning_rate": 2.02020202020202e-06,
"loss": 0.2043,
"step": 420
},
{
"epoch": 37.93478260869565,
"eval_accuracy": 0.6956521739130435,
"eval_loss": 1.1978377103805542,
"eval_runtime": 0.8416,
"eval_samples_per_second": 54.66,
"eval_steps_per_second": 3.565,
"step": 427
},
{
"epoch": 38.21739130434783,
"grad_norm": 41.95515441894531,
"learning_rate": 1.01010101010101e-06,
"loss": 0.1951,
"step": 430
},
{
"epoch": 38.91304347826087,
"eval_accuracy": 0.717391304347826,
"eval_loss": 1.2106621265411377,
"eval_runtime": 1.1122,
"eval_samples_per_second": 41.361,
"eval_steps_per_second": 2.697,
"step": 438
},
{
"epoch": 39.108695652173914,
"grad_norm": 97.34293365478516,
"learning_rate": 0.0,
"loss": 0.1901,
"step": 440
},
{
"epoch": 39.108695652173914,
"eval_accuracy": 0.717391304347826,
"eval_loss": 1.2108197212219238,
"eval_runtime": 0.8571,
"eval_samples_per_second": 53.667,
"eval_steps_per_second": 3.5,
"step": 440
},
{
"epoch": 39.108695652173914,
"step": 440,
"total_flos": 9.162177814462464e+17,
"train_loss": 0.45710954666137693,
"train_runtime": 931.6489,
"train_samples_per_second": 30.956,
"train_steps_per_second": 0.472
}
],
"logging_steps": 10,
"max_steps": 440,
"num_input_tokens_seen": 0,
"num_train_epochs": 40,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.162177814462464e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}