RobertoSonic's picture
End of training
e64367c verified
{
"best_metric": 0.7840909090909091,
"best_model_checkpoint": "swinv2-tiny-patch4-window8-256-dmae-humeda-DAV45/checkpoint-140",
"epoch": 39.935064935064936,
"eval_steps": 500,
"global_step": 798,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.6233766233766234,
"grad_norm": 18.71070671081543,
"learning_rate": 7.5e-06,
"loss": 6.1945,
"step": 12
},
{
"epoch": 1.0,
"eval_accuracy": 0.45454545454545453,
"eval_loss": 1.2588036060333252,
"eval_runtime": 4.3451,
"eval_samples_per_second": 20.253,
"eval_steps_per_second": 0.69,
"step": 20
},
{
"epoch": 1.2077922077922079,
"grad_norm": 14.680241584777832,
"learning_rate": 1.5e-05,
"loss": 4.9168,
"step": 24
},
{
"epoch": 1.8311688311688312,
"grad_norm": 19.30341148376465,
"learning_rate": 2.25e-05,
"loss": 4.5836,
"step": 36
},
{
"epoch": 2.0,
"eval_accuracy": 0.7159090909090909,
"eval_loss": 0.9657976031303406,
"eval_runtime": 2.9181,
"eval_samples_per_second": 30.157,
"eval_steps_per_second": 1.028,
"step": 40
},
{
"epoch": 2.4155844155844157,
"grad_norm": 25.428447723388672,
"learning_rate": 3e-05,
"loss": 3.527,
"step": 48
},
{
"epoch": 3.0,
"grad_norm": 19.649690628051758,
"learning_rate": 3.7500000000000003e-05,
"loss": 2.9056,
"step": 60
},
{
"epoch": 3.0,
"eval_accuracy": 0.7272727272727273,
"eval_loss": 0.7736997008323669,
"eval_runtime": 1.6707,
"eval_samples_per_second": 52.672,
"eval_steps_per_second": 1.796,
"step": 60
},
{
"epoch": 3.6233766233766236,
"grad_norm": 30.83364486694336,
"learning_rate": 4.5e-05,
"loss": 2.8061,
"step": 72
},
{
"epoch": 4.0,
"eval_accuracy": 0.7727272727272727,
"eval_loss": 0.673829972743988,
"eval_runtime": 1.6507,
"eval_samples_per_second": 53.31,
"eval_steps_per_second": 1.817,
"step": 80
},
{
"epoch": 4.207792207792208,
"grad_norm": 31.61625099182129,
"learning_rate": 4.999617113753456e-05,
"loss": 2.0484,
"step": 84
},
{
"epoch": 4.8311688311688314,
"grad_norm": 37.459957122802734,
"learning_rate": 4.99387616539795e-05,
"loss": 1.9405,
"step": 96
},
{
"epoch": 5.0,
"eval_accuracy": 0.7613636363636364,
"eval_loss": 0.626100480556488,
"eval_runtime": 1.6495,
"eval_samples_per_second": 53.351,
"eval_steps_per_second": 1.819,
"step": 100
},
{
"epoch": 5.415584415584416,
"grad_norm": 29.627758026123047,
"learning_rate": 4.981261550534304e-05,
"loss": 1.6502,
"step": 108
},
{
"epoch": 6.0,
"grad_norm": 15.884918212890625,
"learning_rate": 4.9618080377917326e-05,
"loss": 1.4425,
"step": 120
},
{
"epoch": 6.0,
"eval_accuracy": 0.75,
"eval_loss": 0.8126522302627563,
"eval_runtime": 1.6403,
"eval_samples_per_second": 53.65,
"eval_steps_per_second": 1.829,
"step": 120
},
{
"epoch": 6.623376623376624,
"grad_norm": 30.541797637939453,
"learning_rate": 4.935569245293244e-05,
"loss": 1.3554,
"step": 132
},
{
"epoch": 7.0,
"eval_accuracy": 0.7840909090909091,
"eval_loss": 0.7812299728393555,
"eval_runtime": 1.6388,
"eval_samples_per_second": 53.696,
"eval_steps_per_second": 1.831,
"step": 140
},
{
"epoch": 7.207792207792208,
"grad_norm": 37.37839126586914,
"learning_rate": 4.902617492872402e-05,
"loss": 1.1744,
"step": 144
},
{
"epoch": 7.8311688311688314,
"grad_norm": 54.830101013183594,
"learning_rate": 4.863043602744095e-05,
"loss": 1.2975,
"step": 156
},
{
"epoch": 8.0,
"eval_accuracy": 0.75,
"eval_loss": 0.8404767513275146,
"eval_runtime": 2.1899,
"eval_samples_per_second": 40.185,
"eval_steps_per_second": 1.37,
"step": 160
},
{
"epoch": 8.415584415584416,
"grad_norm": 39.1025390625,
"learning_rate": 4.81695664917871e-05,
"loss": 1.0383,
"step": 168
},
{
"epoch": 9.0,
"grad_norm": 33.006813049316406,
"learning_rate": 4.764483657869654e-05,
"loss": 0.812,
"step": 180
},
{
"epoch": 9.0,
"eval_accuracy": 0.7159090909090909,
"eval_loss": 1.0776864290237427,
"eval_runtime": 1.6116,
"eval_samples_per_second": 54.605,
"eval_steps_per_second": 1.862,
"step": 180
},
{
"epoch": 9.623376623376624,
"grad_norm": 22.014991760253906,
"learning_rate": 4.705769255822849e-05,
"loss": 0.7984,
"step": 192
},
{
"epoch": 10.0,
"eval_accuracy": 0.7159090909090909,
"eval_loss": 0.9403623342514038,
"eval_runtime": 1.6439,
"eval_samples_per_second": 53.532,
"eval_steps_per_second": 1.825,
"step": 200
},
{
"epoch": 10.207792207792208,
"grad_norm": 37.21240234375,
"learning_rate": 4.640975272733168e-05,
"loss": 0.7945,
"step": 204
},
{
"epoch": 10.831168831168831,
"grad_norm": 25.584922790527344,
"learning_rate": 4.570280294946506e-05,
"loss": 0.7895,
"step": 216
},
{
"epoch": 11.0,
"eval_accuracy": 0.7045454545454546,
"eval_loss": 1.0901598930358887,
"eval_runtime": 1.6182,
"eval_samples_per_second": 54.381,
"eval_steps_per_second": 1.854,
"step": 220
},
{
"epoch": 11.415584415584416,
"grad_norm": 42.140926361083984,
"learning_rate": 4.493879173236869e-05,
"loss": 0.6275,
"step": 228
},
{
"epoch": 12.0,
"grad_norm": 0.09648128598928452,
"learning_rate": 4.411982485755156e-05,
"loss": 0.7333,
"step": 240
},
{
"epoch": 12.0,
"eval_accuracy": 0.75,
"eval_loss": 1.099798321723938,
"eval_runtime": 1.619,
"eval_samples_per_second": 54.354,
"eval_steps_per_second": 1.853,
"step": 240
},
{
"epoch": 12.623376623376624,
"grad_norm": 25.823577880859375,
"learning_rate": 4.3248159576298576e-05,
"loss": 0.6073,
"step": 252
},
{
"epoch": 13.0,
"eval_accuracy": 0.7386363636363636,
"eval_loss": 1.2733994722366333,
"eval_runtime": 1.6351,
"eval_samples_per_second": 53.82,
"eval_steps_per_second": 1.835,
"step": 260
},
{
"epoch": 13.207792207792208,
"grad_norm": 19.644241333007812,
"learning_rate": 4.232619838819377e-05,
"loss": 0.5408,
"step": 264
},
{
"epoch": 13.831168831168831,
"grad_norm": 36.02125549316406,
"learning_rate": 4.135648241930766e-05,
"loss": 0.6548,
"step": 276
},
{
"epoch": 14.0,
"eval_accuracy": 0.7159090909090909,
"eval_loss": 1.303389549255371,
"eval_runtime": 2.1993,
"eval_samples_per_second": 40.012,
"eval_steps_per_second": 1.364,
"step": 280
},
{
"epoch": 14.415584415584416,
"grad_norm": 27.597558975219727,
"learning_rate": 4.034168441829963e-05,
"loss": 0.5159,
"step": 288
},
{
"epoch": 15.0,
"grad_norm": 5.691001892089844,
"learning_rate": 3.928460138973984e-05,
"loss": 0.5538,
"step": 300
},
{
"epoch": 15.0,
"eval_accuracy": 0.75,
"eval_loss": 1.189049243927002,
"eval_runtime": 1.6811,
"eval_samples_per_second": 52.348,
"eval_steps_per_second": 1.785,
"step": 300
},
{
"epoch": 15.623376623376624,
"grad_norm": 21.679243087768555,
"learning_rate": 3.818814688495475e-05,
"loss": 0.556,
"step": 312
},
{
"epoch": 16.0,
"eval_accuracy": 0.75,
"eval_loss": 1.3661515712738037,
"eval_runtime": 2.2932,
"eval_samples_per_second": 38.374,
"eval_steps_per_second": 1.308,
"step": 320
},
{
"epoch": 16.207792207792206,
"grad_norm": 29.273984909057617,
"learning_rate": 3.705534297164438e-05,
"loss": 0.4476,
"step": 324
},
{
"epoch": 16.83116883116883,
"grad_norm": 23.221296310424805,
"learning_rate": 3.5889311904404676e-05,
"loss": 0.5273,
"step": 336
},
{
"epoch": 17.0,
"eval_accuracy": 0.7272727272727273,
"eval_loss": 1.2832838296890259,
"eval_runtime": 2.0423,
"eval_samples_per_second": 43.089,
"eval_steps_per_second": 1.469,
"step": 340
},
{
"epoch": 17.415584415584416,
"grad_norm": 23.35724449157715,
"learning_rate": 3.469326751911314e-05,
"loss": 0.4096,
"step": 348
},
{
"epoch": 18.0,
"grad_norm": 0.7785102128982544,
"learning_rate": 3.347050637489627e-05,
"loss": 0.3863,
"step": 360
},
{
"epoch": 18.0,
"eval_accuracy": 0.7159090909090909,
"eval_loss": 1.2975611686706543,
"eval_runtime": 2.2746,
"eval_samples_per_second": 38.689,
"eval_steps_per_second": 1.319,
"step": 360
},
{
"epoch": 18.623376623376622,
"grad_norm": 17.82123565673828,
"learning_rate": 3.222439866809383e-05,
"loss": 0.5185,
"step": 372
},
{
"epoch": 19.0,
"eval_accuracy": 0.7386363636363636,
"eval_loss": 1.2460757493972778,
"eval_runtime": 1.6484,
"eval_samples_per_second": 53.384,
"eval_steps_per_second": 1.82,
"step": 380
},
{
"epoch": 19.207792207792206,
"grad_norm": 20.32648468017578,
"learning_rate": 3.095837894326287e-05,
"loss": 0.4961,
"step": 384
},
{
"epoch": 19.83116883116883,
"grad_norm": 23.85995101928711,
"learning_rate": 2.967593662682395e-05,
"loss": 0.475,
"step": 396
},
{
"epoch": 20.0,
"eval_accuracy": 0.7386363636363636,
"eval_loss": 1.2543120384216309,
"eval_runtime": 1.6377,
"eval_samples_per_second": 53.735,
"eval_steps_per_second": 1.832,
"step": 400
},
{
"epoch": 20.415584415584416,
"grad_norm": 7.2037811279296875,
"learning_rate": 2.838060640944115e-05,
"loss": 0.32,
"step": 408
},
{
"epoch": 21.0,
"grad_norm": 0.6710865497589111,
"learning_rate": 2.7075958503643745e-05,
"loss": 0.3021,
"step": 420
},
{
"epoch": 21.0,
"eval_accuracy": 0.7727272727272727,
"eval_loss": 1.31428062915802,
"eval_runtime": 2.2517,
"eval_samples_per_second": 39.081,
"eval_steps_per_second": 1.332,
"step": 420
},
{
"epoch": 21.623376623376622,
"grad_norm": 12.076529502868652,
"learning_rate": 2.576558880354205e-05,
"loss": 0.3334,
"step": 432
},
{
"epoch": 22.0,
"eval_accuracy": 0.75,
"eval_loss": 1.287263035774231,
"eval_runtime": 1.6412,
"eval_samples_per_second": 53.62,
"eval_steps_per_second": 1.828,
"step": 440
},
{
"epoch": 22.207792207792206,
"grad_norm": 21.53441047668457,
"learning_rate": 2.4453108973759122e-05,
"loss": 0.3226,
"step": 444
},
{
"epoch": 22.83116883116883,
"grad_norm": 17.34014892578125,
"learning_rate": 2.3142136494895552e-05,
"loss": 0.3773,
"step": 456
},
{
"epoch": 23.0,
"eval_accuracy": 0.7386363636363636,
"eval_loss": 1.3992358446121216,
"eval_runtime": 3.8978,
"eval_samples_per_second": 22.577,
"eval_steps_per_second": 0.77,
"step": 460
},
{
"epoch": 23.415584415584416,
"grad_norm": 28.858182907104492,
"learning_rate": 2.183628469296411e-05,
"loss": 0.3067,
"step": 468
},
{
"epoch": 24.0,
"grad_norm": 10.506282806396484,
"learning_rate": 2.0539152780275357e-05,
"loss": 0.2606,
"step": 480
},
{
"epoch": 24.0,
"eval_accuracy": 0.7159090909090909,
"eval_loss": 1.5181022882461548,
"eval_runtime": 1.6351,
"eval_samples_per_second": 53.819,
"eval_steps_per_second": 1.835,
"step": 480
},
{
"epoch": 24.623376623376622,
"grad_norm": 14.785501480102539,
"learning_rate": 1.9254315935223725e-05,
"loss": 0.3344,
"step": 492
},
{
"epoch": 25.0,
"eval_accuracy": 0.7613636363636364,
"eval_loss": 1.432998538017273,
"eval_runtime": 1.6323,
"eval_samples_per_second": 53.911,
"eval_steps_per_second": 1.838,
"step": 500
},
{
"epoch": 25.207792207792206,
"grad_norm": 25.188377380371094,
"learning_rate": 1.7985315448316243e-05,
"loss": 0.3907,
"step": 504
},
{
"epoch": 25.83116883116883,
"grad_norm": 25.56688117980957,
"learning_rate": 1.673564896160374e-05,
"loss": 0.3349,
"step": 516
},
{
"epoch": 26.0,
"eval_accuracy": 0.7840909090909091,
"eval_loss": 1.416541576385498,
"eval_runtime": 1.6809,
"eval_samples_per_second": 52.353,
"eval_steps_per_second": 1.785,
"step": 520
},
{
"epoch": 26.415584415584416,
"grad_norm": 15.779555320739746,
"learning_rate": 1.550876082841669e-05,
"loss": 0.3379,
"step": 528
},
{
"epoch": 27.0,
"grad_norm": 1.8928312063217163,
"learning_rate": 1.430803261997638e-05,
"loss": 0.3246,
"step": 540
},
{
"epoch": 27.0,
"eval_accuracy": 0.7613636363636364,
"eval_loss": 1.3633875846862793,
"eval_runtime": 1.6308,
"eval_samples_per_second": 53.962,
"eval_steps_per_second": 1.84,
"step": 540
},
{
"epoch": 27.623376623376622,
"grad_norm": 20.64362335205078,
"learning_rate": 1.3136773805047203e-05,
"loss": 0.3395,
"step": 552
},
{
"epoch": 28.0,
"eval_accuracy": 0.7613636363636364,
"eval_loss": 1.3985376358032227,
"eval_runtime": 1.6495,
"eval_samples_per_second": 53.349,
"eval_steps_per_second": 1.819,
"step": 560
},
{
"epoch": 28.207792207792206,
"grad_norm": 12.24038314819336,
"learning_rate": 1.1998212628319214e-05,
"loss": 0.2321,
"step": 564
},
{
"epoch": 28.83116883116883,
"grad_norm": 21.42084312438965,
"learning_rate": 1.0895487212661759e-05,
"loss": 0.2606,
"step": 576
},
{
"epoch": 29.0,
"eval_accuracy": 0.7613636363636364,
"eval_loss": 1.386614203453064,
"eval_runtime": 1.661,
"eval_samples_per_second": 52.981,
"eval_steps_per_second": 1.806,
"step": 580
},
{
"epoch": 29.415584415584416,
"grad_norm": 17.57806396484375,
"learning_rate": 9.831636909772579e-06,
"loss": 0.254,
"step": 588
},
{
"epoch": 30.0,
"grad_norm": 15.217145919799805,
"learning_rate": 8.809593923061812e-06,
"loss": 0.2212,
"step": 600
},
{
"epoch": 30.0,
"eval_accuracy": 0.75,
"eval_loss": 1.484897494316101,
"eval_runtime": 1.6678,
"eval_samples_per_second": 52.763,
"eval_steps_per_second": 1.799,
"step": 600
},
{
"epoch": 30.623376623376622,
"grad_norm": 19.78946304321289,
"learning_rate": 7.832175225860012e-06,
"loss": 0.2266,
"step": 612
},
{
"epoch": 31.0,
"eval_accuracy": 0.7727272727272727,
"eval_loss": 1.4229885339736938,
"eval_runtime": 1.6298,
"eval_samples_per_second": 53.994,
"eval_steps_per_second": 1.841,
"step": 620
},
{
"epoch": 31.207792207792206,
"grad_norm": 18.53005027770996,
"learning_rate": 6.902074797225408e-06,
"loss": 0.1926,
"step": 624
},
{
"epoch": 31.83116883116883,
"grad_norm": 36.56352615356445,
"learning_rate": 6.021856196750178e-06,
"loss": 0.2525,
"step": 636
},
{
"epoch": 32.0,
"eval_accuracy": 0.7727272727272727,
"eval_loss": 1.4287593364715576,
"eval_runtime": 1.6702,
"eval_samples_per_second": 52.688,
"eval_steps_per_second": 1.796,
"step": 640
},
{
"epoch": 32.41558441558441,
"grad_norm": 22.782241821289062,
"learning_rate": 5.1939454988312206e-06,
"loss": 0.2129,
"step": 648
},
{
"epoch": 33.0,
"grad_norm": 3.140561580657959,
"learning_rate": 4.420624605880136e-06,
"loss": 0.2241,
"step": 660
},
{
"epoch": 33.0,
"eval_accuracy": 0.7613636363636364,
"eval_loss": 1.4496763944625854,
"eval_runtime": 1.6312,
"eval_samples_per_second": 53.947,
"eval_steps_per_second": 1.839,
"step": 660
},
{
"epoch": 33.62337662337662,
"grad_norm": 10.7069730758667,
"learning_rate": 3.7040249589025523e-06,
"loss": 0.1816,
"step": 672
},
{
"epoch": 34.0,
"eval_accuracy": 0.7613636363636364,
"eval_loss": 1.4346766471862793,
"eval_runtime": 1.6494,
"eval_samples_per_second": 53.352,
"eval_steps_per_second": 1.819,
"step": 680
},
{
"epoch": 34.20779220779221,
"grad_norm": 6.652499198913574,
"learning_rate": 3.0461216627820032e-06,
"loss": 0.1774,
"step": 684
},
{
"epoch": 34.83116883116883,
"grad_norm": 15.814424514770508,
"learning_rate": 2.448728042460141e-06,
"loss": 0.2529,
"step": 696
},
{
"epoch": 35.0,
"eval_accuracy": 0.75,
"eval_loss": 1.427807331085205,
"eval_runtime": 2.282,
"eval_samples_per_second": 38.562,
"eval_steps_per_second": 1.315,
"step": 700
},
{
"epoch": 35.41558441558441,
"grad_norm": 27.73215103149414,
"learning_rate": 1.913490645017846e-06,
"loss": 0.2284,
"step": 708
},
{
"epoch": 36.0,
"grad_norm": 1.1545226573944092,
"learning_rate": 1.4418847014323944e-06,
"loss": 0.189,
"step": 720
},
{
"epoch": 36.0,
"eval_accuracy": 0.75,
"eval_loss": 1.428978681564331,
"eval_runtime": 1.6065,
"eval_samples_per_second": 54.776,
"eval_steps_per_second": 1.867,
"step": 720
},
{
"epoch": 36.62337662337662,
"grad_norm": 11.000960350036621,
"learning_rate": 1.0352100605192315e-06,
"loss": 0.2491,
"step": 732
},
{
"epoch": 37.0,
"eval_accuracy": 0.7613636363636364,
"eval_loss": 1.4449158906936646,
"eval_runtime": 1.636,
"eval_samples_per_second": 53.788,
"eval_steps_per_second": 1.834,
"step": 740
},
{
"epoch": 37.20779220779221,
"grad_norm": 10.077706336975098,
"learning_rate": 6.945876062651985e-07,
"loss": 0.2124,
"step": 744
},
{
"epoch": 37.83116883116883,
"grad_norm": 12.775538444519043,
"learning_rate": 4.209561684278496e-07,
"loss": 0.2562,
"step": 756
},
{
"epoch": 38.0,
"eval_accuracy": 0.75,
"eval_loss": 1.4514336585998535,
"eval_runtime": 1.6491,
"eval_samples_per_second": 53.364,
"eval_steps_per_second": 1.819,
"step": 760
},
{
"epoch": 38.41558441558441,
"grad_norm": 16.541933059692383,
"learning_rate": 2.1506993491592354e-07,
"loss": 0.2026,
"step": 768
},
{
"epoch": 39.0,
"grad_norm": 10.78030776977539,
"learning_rate": 7.749637308301361e-08,
"loss": 0.1872,
"step": 780
},
{
"epoch": 39.0,
"eval_accuracy": 0.75,
"eval_loss": 1.4522464275360107,
"eval_runtime": 1.6558,
"eval_samples_per_second": 53.147,
"eval_steps_per_second": 1.812,
"step": 780
},
{
"epoch": 39.62337662337662,
"grad_norm": 10.336092948913574,
"learning_rate": 8.614665663816968e-09,
"loss": 0.223,
"step": 792
},
{
"epoch": 39.935064935064936,
"eval_accuracy": 0.75,
"eval_loss": 1.45267653465271,
"eval_runtime": 1.9683,
"eval_samples_per_second": 44.708,
"eval_steps_per_second": 1.524,
"step": 798
},
{
"epoch": 39.935064935064936,
"step": 798,
"total_flos": 3.181342596532273e+18,
"train_loss": 0.8569460838360894,
"train_runtime": 2907.2025,
"train_samples_per_second": 35.366,
"train_steps_per_second": 0.274
}
],
"logging_steps": 12,
"max_steps": 798,
"num_input_tokens_seen": 0,
"num_train_epochs": 42,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.181342596532273e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}