Augusto777's picture
End of training
b922848 verified
{
"best_metric": 0.7608695652173914,
"best_model_checkpoint": "swinv2-tiny-patch4-window8-256-DMAE-da-colab2/checkpoint-230",
"epoch": 38.26086956521739,
"eval_steps": 500,
"global_step": 440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.8695652173913043,
"grad_norm": 20.089263916015625,
"learning_rate": 1.465909090909091e-05,
"loss": 1.357,
"step": 10
},
{
"epoch": 0.9565217391304348,
"eval_accuracy": 0.391304347826087,
"eval_loss": 1.3905678987503052,
"eval_runtime": 2.4821,
"eval_samples_per_second": 18.532,
"eval_steps_per_second": 1.209,
"step": 11
},
{
"epoch": 1.7391304347826086,
"grad_norm": 24.629512786865234,
"learning_rate": 1.4318181818181818e-05,
"loss": 1.2964,
"step": 20
},
{
"epoch": 2.0,
"eval_accuracy": 0.43478260869565216,
"eval_loss": 1.2818552255630493,
"eval_runtime": 0.9951,
"eval_samples_per_second": 46.227,
"eval_steps_per_second": 3.015,
"step": 23
},
{
"epoch": 2.608695652173913,
"grad_norm": 26.742063522338867,
"learning_rate": 1.3977272727272727e-05,
"loss": 1.1609,
"step": 30
},
{
"epoch": 2.9565217391304346,
"eval_accuracy": 0.4782608695652174,
"eval_loss": 1.1804310083389282,
"eval_runtime": 0.8607,
"eval_samples_per_second": 53.446,
"eval_steps_per_second": 3.486,
"step": 34
},
{
"epoch": 3.4782608695652173,
"grad_norm": 26.6147403717041,
"learning_rate": 1.3636363636363637e-05,
"loss": 1.0747,
"step": 40
},
{
"epoch": 4.0,
"eval_accuracy": 0.6086956521739131,
"eval_loss": 1.0910847187042236,
"eval_runtime": 1.1974,
"eval_samples_per_second": 38.416,
"eval_steps_per_second": 2.505,
"step": 46
},
{
"epoch": 4.3478260869565215,
"grad_norm": 31.382036209106445,
"learning_rate": 1.3295454545454546e-05,
"loss": 1.027,
"step": 50
},
{
"epoch": 4.956521739130435,
"eval_accuracy": 0.6304347826086957,
"eval_loss": 1.0176359415054321,
"eval_runtime": 0.8717,
"eval_samples_per_second": 52.771,
"eval_steps_per_second": 3.442,
"step": 57
},
{
"epoch": 5.217391304347826,
"grad_norm": 30.808866500854492,
"learning_rate": 1.2954545454545455e-05,
"loss": 0.8985,
"step": 60
},
{
"epoch": 6.0,
"eval_accuracy": 0.6739130434782609,
"eval_loss": 0.8963044285774231,
"eval_runtime": 0.8493,
"eval_samples_per_second": 54.159,
"eval_steps_per_second": 3.532,
"step": 69
},
{
"epoch": 6.086956521739131,
"grad_norm": 38.13185501098633,
"learning_rate": 1.2613636363636365e-05,
"loss": 0.8661,
"step": 70
},
{
"epoch": 6.956521739130435,
"grad_norm": 51.77964782714844,
"learning_rate": 1.2272727272727274e-05,
"loss": 0.8031,
"step": 80
},
{
"epoch": 6.956521739130435,
"eval_accuracy": 0.6739130434782609,
"eval_loss": 0.9867271184921265,
"eval_runtime": 0.8593,
"eval_samples_per_second": 53.529,
"eval_steps_per_second": 3.491,
"step": 80
},
{
"epoch": 7.826086956521739,
"grad_norm": 35.04009246826172,
"learning_rate": 1.1931818181818181e-05,
"loss": 0.7744,
"step": 90
},
{
"epoch": 8.0,
"eval_accuracy": 0.6521739130434783,
"eval_loss": 0.8709511756896973,
"eval_runtime": 1.0941,
"eval_samples_per_second": 42.042,
"eval_steps_per_second": 2.742,
"step": 92
},
{
"epoch": 8.695652173913043,
"grad_norm": 39.17298889160156,
"learning_rate": 1.159090909090909e-05,
"loss": 0.7488,
"step": 100
},
{
"epoch": 8.956521739130435,
"eval_accuracy": 0.6956521739130435,
"eval_loss": 0.8845287561416626,
"eval_runtime": 0.8331,
"eval_samples_per_second": 55.218,
"eval_steps_per_second": 3.601,
"step": 103
},
{
"epoch": 9.565217391304348,
"grad_norm": 65.02764892578125,
"learning_rate": 1.125e-05,
"loss": 0.6767,
"step": 110
},
{
"epoch": 10.0,
"eval_accuracy": 0.6956521739130435,
"eval_loss": 0.8693321943283081,
"eval_runtime": 0.8369,
"eval_samples_per_second": 54.964,
"eval_steps_per_second": 3.585,
"step": 115
},
{
"epoch": 10.434782608695652,
"grad_norm": 41.53019332885742,
"learning_rate": 1.0909090909090909e-05,
"loss": 0.6082,
"step": 120
},
{
"epoch": 10.956521739130435,
"eval_accuracy": 0.6739130434782609,
"eval_loss": 0.8132839798927307,
"eval_runtime": 1.1162,
"eval_samples_per_second": 41.21,
"eval_steps_per_second": 2.688,
"step": 126
},
{
"epoch": 11.304347826086957,
"grad_norm": 45.66834259033203,
"learning_rate": 1.0568181818181819e-05,
"loss": 0.6354,
"step": 130
},
{
"epoch": 12.0,
"eval_accuracy": 0.6739130434782609,
"eval_loss": 0.877053439617157,
"eval_runtime": 1.1215,
"eval_samples_per_second": 41.016,
"eval_steps_per_second": 2.675,
"step": 138
},
{
"epoch": 12.173913043478262,
"grad_norm": 52.994873046875,
"learning_rate": 1.0227272727272727e-05,
"loss": 0.6422,
"step": 140
},
{
"epoch": 12.956521739130435,
"eval_accuracy": 0.717391304347826,
"eval_loss": 0.8137139678001404,
"eval_runtime": 0.8373,
"eval_samples_per_second": 54.937,
"eval_steps_per_second": 3.583,
"step": 149
},
{
"epoch": 13.043478260869565,
"grad_norm": 124.10873413085938,
"learning_rate": 9.886363636363637e-06,
"loss": 0.5812,
"step": 150
},
{
"epoch": 13.91304347826087,
"grad_norm": 62.76710510253906,
"learning_rate": 9.545454545454545e-06,
"loss": 0.584,
"step": 160
},
{
"epoch": 14.0,
"eval_accuracy": 0.6521739130434783,
"eval_loss": 0.8860651254653931,
"eval_runtime": 1.1309,
"eval_samples_per_second": 40.674,
"eval_steps_per_second": 2.653,
"step": 161
},
{
"epoch": 14.782608695652174,
"grad_norm": 60.06248474121094,
"learning_rate": 9.204545454545455e-06,
"loss": 0.5763,
"step": 170
},
{
"epoch": 14.956521739130435,
"eval_accuracy": 0.7391304347826086,
"eval_loss": 0.8459398746490479,
"eval_runtime": 0.8491,
"eval_samples_per_second": 54.178,
"eval_steps_per_second": 3.533,
"step": 172
},
{
"epoch": 15.652173913043478,
"grad_norm": 60.97280502319336,
"learning_rate": 8.863636363636365e-06,
"loss": 0.5238,
"step": 180
},
{
"epoch": 16.0,
"eval_accuracy": 0.717391304347826,
"eval_loss": 0.8590155839920044,
"eval_runtime": 0.8527,
"eval_samples_per_second": 53.948,
"eval_steps_per_second": 3.518,
"step": 184
},
{
"epoch": 16.52173913043478,
"grad_norm": 64.43607330322266,
"learning_rate": 8.522727272727273e-06,
"loss": 0.528,
"step": 190
},
{
"epoch": 16.956521739130434,
"eval_accuracy": 0.717391304347826,
"eval_loss": 0.8705419301986694,
"eval_runtime": 0.8385,
"eval_samples_per_second": 54.858,
"eval_steps_per_second": 3.578,
"step": 195
},
{
"epoch": 17.391304347826086,
"grad_norm": 68.5125732421875,
"learning_rate": 8.181818181818181e-06,
"loss": 0.5626,
"step": 200
},
{
"epoch": 18.0,
"eval_accuracy": 0.717391304347826,
"eval_loss": 0.8636245131492615,
"eval_runtime": 0.8439,
"eval_samples_per_second": 54.506,
"eval_steps_per_second": 3.555,
"step": 207
},
{
"epoch": 18.26086956521739,
"grad_norm": 52.141483306884766,
"learning_rate": 7.840909090909091e-06,
"loss": 0.5395,
"step": 210
},
{
"epoch": 18.956521739130434,
"eval_accuracy": 0.6956521739130435,
"eval_loss": 0.8793612122535706,
"eval_runtime": 0.835,
"eval_samples_per_second": 55.093,
"eval_steps_per_second": 3.593,
"step": 218
},
{
"epoch": 19.130434782608695,
"grad_norm": 43.43910598754883,
"learning_rate": 7.5e-06,
"loss": 0.5132,
"step": 220
},
{
"epoch": 20.0,
"grad_norm": 44.14228820800781,
"learning_rate": 7.159090909090909e-06,
"loss": 0.4696,
"step": 230
},
{
"epoch": 20.0,
"eval_accuracy": 0.7608695652173914,
"eval_loss": 0.8834987878799438,
"eval_runtime": 0.9864,
"eval_samples_per_second": 46.635,
"eval_steps_per_second": 3.041,
"step": 230
},
{
"epoch": 20.869565217391305,
"grad_norm": 54.529273986816406,
"learning_rate": 6.818181818181818e-06,
"loss": 0.488,
"step": 240
},
{
"epoch": 20.956521739130434,
"eval_accuracy": 0.7391304347826086,
"eval_loss": 0.8888704180717468,
"eval_runtime": 0.8371,
"eval_samples_per_second": 54.954,
"eval_steps_per_second": 3.584,
"step": 241
},
{
"epoch": 21.73913043478261,
"grad_norm": 44.353816986083984,
"learning_rate": 6.477272727272727e-06,
"loss": 0.4764,
"step": 250
},
{
"epoch": 22.0,
"eval_accuracy": 0.717391304347826,
"eval_loss": 0.9109261631965637,
"eval_runtime": 0.8385,
"eval_samples_per_second": 54.857,
"eval_steps_per_second": 3.578,
"step": 253
},
{
"epoch": 22.608695652173914,
"grad_norm": 46.19450378417969,
"learning_rate": 6.136363636363637e-06,
"loss": 0.4668,
"step": 260
},
{
"epoch": 22.956521739130434,
"eval_accuracy": 0.7391304347826086,
"eval_loss": 0.8892802596092224,
"eval_runtime": 1.1324,
"eval_samples_per_second": 40.621,
"eval_steps_per_second": 2.649,
"step": 264
},
{
"epoch": 23.47826086956522,
"grad_norm": 43.89274597167969,
"learning_rate": 5.795454545454545e-06,
"loss": 0.4676,
"step": 270
},
{
"epoch": 24.0,
"eval_accuracy": 0.6956521739130435,
"eval_loss": 0.908248245716095,
"eval_runtime": 0.8948,
"eval_samples_per_second": 51.41,
"eval_steps_per_second": 3.353,
"step": 276
},
{
"epoch": 24.347826086956523,
"grad_norm": 39.96305847167969,
"learning_rate": 5.4545454545454545e-06,
"loss": 0.4619,
"step": 280
},
{
"epoch": 24.956521739130434,
"eval_accuracy": 0.717391304347826,
"eval_loss": 0.9352836608886719,
"eval_runtime": 0.8548,
"eval_samples_per_second": 53.815,
"eval_steps_per_second": 3.51,
"step": 287
},
{
"epoch": 25.217391304347824,
"grad_norm": 54.07424545288086,
"learning_rate": 5.1136363636363635e-06,
"loss": 0.4727,
"step": 290
},
{
"epoch": 26.0,
"eval_accuracy": 0.717391304347826,
"eval_loss": 0.9331096410751343,
"eval_runtime": 0.833,
"eval_samples_per_second": 55.223,
"eval_steps_per_second": 3.602,
"step": 299
},
{
"epoch": 26.08695652173913,
"grad_norm": 67.8470687866211,
"learning_rate": 4.7727272727272725e-06,
"loss": 0.4644,
"step": 300
},
{
"epoch": 26.956521739130434,
"grad_norm": 59.70988082885742,
"learning_rate": 4.4318181818181824e-06,
"loss": 0.4461,
"step": 310
},
{
"epoch": 26.956521739130434,
"eval_accuracy": 0.7391304347826086,
"eval_loss": 0.8937391042709351,
"eval_runtime": 0.8465,
"eval_samples_per_second": 54.339,
"eval_steps_per_second": 3.544,
"step": 310
},
{
"epoch": 27.82608695652174,
"grad_norm": 54.89842987060547,
"learning_rate": 4.090909090909091e-06,
"loss": 0.428,
"step": 320
},
{
"epoch": 28.0,
"eval_accuracy": 0.717391304347826,
"eval_loss": 0.91752028465271,
"eval_runtime": 0.8414,
"eval_samples_per_second": 54.668,
"eval_steps_per_second": 3.565,
"step": 322
},
{
"epoch": 28.695652173913043,
"grad_norm": 47.60584259033203,
"learning_rate": 3.75e-06,
"loss": 0.4694,
"step": 330
},
{
"epoch": 28.956521739130434,
"eval_accuracy": 0.6956521739130435,
"eval_loss": 0.934003472328186,
"eval_runtime": 1.1124,
"eval_samples_per_second": 41.351,
"eval_steps_per_second": 2.697,
"step": 333
},
{
"epoch": 29.565217391304348,
"grad_norm": 65.11713409423828,
"learning_rate": 3.409090909090909e-06,
"loss": 0.3812,
"step": 340
},
{
"epoch": 30.0,
"eval_accuracy": 0.6739130434782609,
"eval_loss": 0.9721739292144775,
"eval_runtime": 1.1376,
"eval_samples_per_second": 40.437,
"eval_steps_per_second": 2.637,
"step": 345
},
{
"epoch": 30.434782608695652,
"grad_norm": 70.5523452758789,
"learning_rate": 3.0681818181818186e-06,
"loss": 0.4252,
"step": 350
},
{
"epoch": 30.956521739130434,
"eval_accuracy": 0.717391304347826,
"eval_loss": 0.9432597756385803,
"eval_runtime": 0.8436,
"eval_samples_per_second": 54.525,
"eval_steps_per_second": 3.556,
"step": 356
},
{
"epoch": 31.304347826086957,
"grad_norm": 53.673583984375,
"learning_rate": 2.7272727272727272e-06,
"loss": 0.3883,
"step": 360
},
{
"epoch": 32.0,
"eval_accuracy": 0.7391304347826086,
"eval_loss": 0.9419939517974854,
"eval_runtime": 1.1286,
"eval_samples_per_second": 40.758,
"eval_steps_per_second": 2.658,
"step": 368
},
{
"epoch": 32.17391304347826,
"grad_norm": 51.48313522338867,
"learning_rate": 2.3863636363636363e-06,
"loss": 0.4228,
"step": 370
},
{
"epoch": 32.95652173913044,
"eval_accuracy": 0.6739130434782609,
"eval_loss": 0.9482960104942322,
"eval_runtime": 0.8227,
"eval_samples_per_second": 55.913,
"eval_steps_per_second": 3.647,
"step": 379
},
{
"epoch": 33.04347826086956,
"grad_norm": 52.699462890625,
"learning_rate": 2.0454545454545453e-06,
"loss": 0.4058,
"step": 380
},
{
"epoch": 33.91304347826087,
"grad_norm": 69.62663269042969,
"learning_rate": 1.7045454545454546e-06,
"loss": 0.4288,
"step": 390
},
{
"epoch": 34.0,
"eval_accuracy": 0.717391304347826,
"eval_loss": 0.9529407620429993,
"eval_runtime": 0.8435,
"eval_samples_per_second": 54.537,
"eval_steps_per_second": 3.557,
"step": 391
},
{
"epoch": 34.78260869565217,
"grad_norm": 52.298667907714844,
"learning_rate": 1.3636363636363636e-06,
"loss": 0.3982,
"step": 400
},
{
"epoch": 34.95652173913044,
"eval_accuracy": 0.717391304347826,
"eval_loss": 0.950598418712616,
"eval_runtime": 0.8563,
"eval_samples_per_second": 53.721,
"eval_steps_per_second": 3.504,
"step": 402
},
{
"epoch": 35.65217391304348,
"grad_norm": 69.20966339111328,
"learning_rate": 1.0227272727272727e-06,
"loss": 0.3935,
"step": 410
},
{
"epoch": 36.0,
"eval_accuracy": 0.6739130434782609,
"eval_loss": 0.9538769721984863,
"eval_runtime": 0.8438,
"eval_samples_per_second": 54.514,
"eval_steps_per_second": 3.555,
"step": 414
},
{
"epoch": 36.52173913043478,
"grad_norm": 35.139495849609375,
"learning_rate": 6.818181818181818e-07,
"loss": 0.3974,
"step": 420
},
{
"epoch": 36.95652173913044,
"eval_accuracy": 0.6956521739130435,
"eval_loss": 0.9598949551582336,
"eval_runtime": 0.8321,
"eval_samples_per_second": 55.282,
"eval_steps_per_second": 3.605,
"step": 425
},
{
"epoch": 37.391304347826086,
"grad_norm": 41.308441162109375,
"learning_rate": 3.409090909090909e-07,
"loss": 0.3893,
"step": 430
},
{
"epoch": 38.0,
"eval_accuracy": 0.6956521739130435,
"eval_loss": 0.9607635140419006,
"eval_runtime": 1.127,
"eval_samples_per_second": 40.816,
"eval_steps_per_second": 2.662,
"step": 437
},
{
"epoch": 38.26086956521739,
"grad_norm": 44.250675201416016,
"learning_rate": 0.0,
"loss": 0.4201,
"step": 440
},
{
"epoch": 38.26086956521739,
"eval_accuracy": 0.6956521739130435,
"eval_loss": 0.960838794708252,
"eval_runtime": 1.097,
"eval_samples_per_second": 41.934,
"eval_steps_per_second": 2.735,
"step": 440
},
{
"epoch": 38.26086956521739,
"step": 440,
"total_flos": 8.989085534729011e+17,
"train_loss": 0.6031668256629598,
"train_runtime": 1022.1031,
"train_samples_per_second": 28.255,
"train_steps_per_second": 0.43
}
],
"logging_steps": 10,
"max_steps": 440,
"num_input_tokens_seen": 0,
"num_train_epochs": 40,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.989085534729011e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}