{ "best_metric": 0.7608695652173914, "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-DMAE-da-colab2/checkpoint-230", "epoch": 38.26086956521739, "eval_steps": 500, "global_step": 440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8695652173913043, "grad_norm": 20.089263916015625, "learning_rate": 1.465909090909091e-05, "loss": 1.357, "step": 10 }, { "epoch": 0.9565217391304348, "eval_accuracy": 0.391304347826087, "eval_loss": 1.3905678987503052, "eval_runtime": 2.4821, "eval_samples_per_second": 18.532, "eval_steps_per_second": 1.209, "step": 11 }, { "epoch": 1.7391304347826086, "grad_norm": 24.629512786865234, "learning_rate": 1.4318181818181818e-05, "loss": 1.2964, "step": 20 }, { "epoch": 2.0, "eval_accuracy": 0.43478260869565216, "eval_loss": 1.2818552255630493, "eval_runtime": 0.9951, "eval_samples_per_second": 46.227, "eval_steps_per_second": 3.015, "step": 23 }, { "epoch": 2.608695652173913, "grad_norm": 26.742063522338867, "learning_rate": 1.3977272727272727e-05, "loss": 1.1609, "step": 30 }, { "epoch": 2.9565217391304346, "eval_accuracy": 0.4782608695652174, "eval_loss": 1.1804310083389282, "eval_runtime": 0.8607, "eval_samples_per_second": 53.446, "eval_steps_per_second": 3.486, "step": 34 }, { "epoch": 3.4782608695652173, "grad_norm": 26.6147403717041, "learning_rate": 1.3636363636363637e-05, "loss": 1.0747, "step": 40 }, { "epoch": 4.0, "eval_accuracy": 0.6086956521739131, "eval_loss": 1.0910847187042236, "eval_runtime": 1.1974, "eval_samples_per_second": 38.416, "eval_steps_per_second": 2.505, "step": 46 }, { "epoch": 4.3478260869565215, "grad_norm": 31.382036209106445, "learning_rate": 1.3295454545454546e-05, "loss": 1.027, "step": 50 }, { "epoch": 4.956521739130435, "eval_accuracy": 0.6304347826086957, "eval_loss": 1.0176359415054321, "eval_runtime": 0.8717, "eval_samples_per_second": 52.771, "eval_steps_per_second": 3.442, "step": 57 }, { "epoch": 5.217391304347826, "grad_norm": 30.808866500854492, "learning_rate": 1.2954545454545455e-05, "loss": 0.8985, "step": 60 }, { "epoch": 6.0, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.8963044285774231, "eval_runtime": 0.8493, "eval_samples_per_second": 54.159, "eval_steps_per_second": 3.532, "step": 69 }, { "epoch": 6.086956521739131, "grad_norm": 38.13185501098633, "learning_rate": 1.2613636363636365e-05, "loss": 0.8661, "step": 70 }, { "epoch": 6.956521739130435, "grad_norm": 51.77964782714844, "learning_rate": 1.2272727272727274e-05, "loss": 0.8031, "step": 80 }, { "epoch": 6.956521739130435, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.9867271184921265, "eval_runtime": 0.8593, "eval_samples_per_second": 53.529, "eval_steps_per_second": 3.491, "step": 80 }, { "epoch": 7.826086956521739, "grad_norm": 35.04009246826172, "learning_rate": 1.1931818181818181e-05, "loss": 0.7744, "step": 90 }, { "epoch": 8.0, "eval_accuracy": 0.6521739130434783, "eval_loss": 0.8709511756896973, "eval_runtime": 1.0941, "eval_samples_per_second": 42.042, "eval_steps_per_second": 2.742, "step": 92 }, { "epoch": 8.695652173913043, "grad_norm": 39.17298889160156, "learning_rate": 1.159090909090909e-05, "loss": 0.7488, "step": 100 }, { "epoch": 8.956521739130435, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.8845287561416626, "eval_runtime": 0.8331, "eval_samples_per_second": 55.218, "eval_steps_per_second": 3.601, "step": 103 }, { "epoch": 9.565217391304348, "grad_norm": 65.02764892578125, "learning_rate": 1.125e-05, "loss": 0.6767, "step": 110 }, { "epoch": 10.0, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.8693321943283081, "eval_runtime": 0.8369, "eval_samples_per_second": 54.964, "eval_steps_per_second": 3.585, "step": 115 }, { "epoch": 10.434782608695652, "grad_norm": 41.53019332885742, "learning_rate": 1.0909090909090909e-05, "loss": 0.6082, "step": 120 }, { "epoch": 10.956521739130435, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.8132839798927307, "eval_runtime": 1.1162, "eval_samples_per_second": 41.21, "eval_steps_per_second": 2.688, "step": 126 }, { "epoch": 11.304347826086957, "grad_norm": 45.66834259033203, "learning_rate": 1.0568181818181819e-05, "loss": 0.6354, "step": 130 }, { "epoch": 12.0, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.877053439617157, "eval_runtime": 1.1215, "eval_samples_per_second": 41.016, "eval_steps_per_second": 2.675, "step": 138 }, { "epoch": 12.173913043478262, "grad_norm": 52.994873046875, "learning_rate": 1.0227272727272727e-05, "loss": 0.6422, "step": 140 }, { "epoch": 12.956521739130435, "eval_accuracy": 0.717391304347826, "eval_loss": 0.8137139678001404, "eval_runtime": 0.8373, "eval_samples_per_second": 54.937, "eval_steps_per_second": 3.583, "step": 149 }, { "epoch": 13.043478260869565, "grad_norm": 124.10873413085938, "learning_rate": 9.886363636363637e-06, "loss": 0.5812, "step": 150 }, { "epoch": 13.91304347826087, "grad_norm": 62.76710510253906, "learning_rate": 9.545454545454545e-06, "loss": 0.584, "step": 160 }, { "epoch": 14.0, "eval_accuracy": 0.6521739130434783, "eval_loss": 0.8860651254653931, "eval_runtime": 1.1309, "eval_samples_per_second": 40.674, "eval_steps_per_second": 2.653, "step": 161 }, { "epoch": 14.782608695652174, "grad_norm": 60.06248474121094, "learning_rate": 9.204545454545455e-06, "loss": 0.5763, "step": 170 }, { "epoch": 14.956521739130435, "eval_accuracy": 0.7391304347826086, "eval_loss": 0.8459398746490479, "eval_runtime": 0.8491, "eval_samples_per_second": 54.178, "eval_steps_per_second": 3.533, "step": 172 }, { "epoch": 15.652173913043478, "grad_norm": 60.97280502319336, "learning_rate": 8.863636363636365e-06, "loss": 0.5238, "step": 180 }, { "epoch": 16.0, "eval_accuracy": 0.717391304347826, "eval_loss": 0.8590155839920044, "eval_runtime": 0.8527, "eval_samples_per_second": 53.948, "eval_steps_per_second": 3.518, "step": 184 }, { "epoch": 16.52173913043478, "grad_norm": 64.43607330322266, "learning_rate": 8.522727272727273e-06, "loss": 0.528, "step": 190 }, { "epoch": 16.956521739130434, "eval_accuracy": 0.717391304347826, "eval_loss": 0.8705419301986694, "eval_runtime": 0.8385, "eval_samples_per_second": 54.858, "eval_steps_per_second": 3.578, "step": 195 }, { "epoch": 17.391304347826086, "grad_norm": 68.5125732421875, "learning_rate": 8.181818181818181e-06, "loss": 0.5626, "step": 200 }, { "epoch": 18.0, "eval_accuracy": 0.717391304347826, "eval_loss": 0.8636245131492615, "eval_runtime": 0.8439, "eval_samples_per_second": 54.506, "eval_steps_per_second": 3.555, "step": 207 }, { "epoch": 18.26086956521739, "grad_norm": 52.141483306884766, "learning_rate": 7.840909090909091e-06, "loss": 0.5395, "step": 210 }, { "epoch": 18.956521739130434, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.8793612122535706, "eval_runtime": 0.835, "eval_samples_per_second": 55.093, "eval_steps_per_second": 3.593, "step": 218 }, { "epoch": 19.130434782608695, "grad_norm": 43.43910598754883, "learning_rate": 7.5e-06, "loss": 0.5132, "step": 220 }, { "epoch": 20.0, "grad_norm": 44.14228820800781, "learning_rate": 7.159090909090909e-06, "loss": 0.4696, "step": 230 }, { "epoch": 20.0, "eval_accuracy": 0.7608695652173914, "eval_loss": 0.8834987878799438, "eval_runtime": 0.9864, "eval_samples_per_second": 46.635, "eval_steps_per_second": 3.041, "step": 230 }, { "epoch": 20.869565217391305, "grad_norm": 54.529273986816406, "learning_rate": 6.818181818181818e-06, "loss": 0.488, "step": 240 }, { "epoch": 20.956521739130434, "eval_accuracy": 0.7391304347826086, "eval_loss": 0.8888704180717468, "eval_runtime": 0.8371, "eval_samples_per_second": 54.954, "eval_steps_per_second": 3.584, "step": 241 }, { "epoch": 21.73913043478261, "grad_norm": 44.353816986083984, "learning_rate": 6.477272727272727e-06, "loss": 0.4764, "step": 250 }, { "epoch": 22.0, "eval_accuracy": 0.717391304347826, "eval_loss": 0.9109261631965637, "eval_runtime": 0.8385, "eval_samples_per_second": 54.857, "eval_steps_per_second": 3.578, "step": 253 }, { "epoch": 22.608695652173914, "grad_norm": 46.19450378417969, "learning_rate": 6.136363636363637e-06, "loss": 0.4668, "step": 260 }, { "epoch": 22.956521739130434, "eval_accuracy": 0.7391304347826086, "eval_loss": 0.8892802596092224, "eval_runtime": 1.1324, "eval_samples_per_second": 40.621, "eval_steps_per_second": 2.649, "step": 264 }, { "epoch": 23.47826086956522, "grad_norm": 43.89274597167969, "learning_rate": 5.795454545454545e-06, "loss": 0.4676, "step": 270 }, { "epoch": 24.0, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.908248245716095, "eval_runtime": 0.8948, "eval_samples_per_second": 51.41, "eval_steps_per_second": 3.353, "step": 276 }, { "epoch": 24.347826086956523, "grad_norm": 39.96305847167969, "learning_rate": 5.4545454545454545e-06, "loss": 0.4619, "step": 280 }, { "epoch": 24.956521739130434, "eval_accuracy": 0.717391304347826, "eval_loss": 0.9352836608886719, "eval_runtime": 0.8548, "eval_samples_per_second": 53.815, "eval_steps_per_second": 3.51, "step": 287 }, { "epoch": 25.217391304347824, "grad_norm": 54.07424545288086, "learning_rate": 5.1136363636363635e-06, "loss": 0.4727, "step": 290 }, { "epoch": 26.0, "eval_accuracy": 0.717391304347826, "eval_loss": 0.9331096410751343, "eval_runtime": 0.833, "eval_samples_per_second": 55.223, "eval_steps_per_second": 3.602, "step": 299 }, { "epoch": 26.08695652173913, "grad_norm": 67.8470687866211, "learning_rate": 4.7727272727272725e-06, "loss": 0.4644, "step": 300 }, { "epoch": 26.956521739130434, "grad_norm": 59.70988082885742, "learning_rate": 4.4318181818181824e-06, "loss": 0.4461, "step": 310 }, { "epoch": 26.956521739130434, "eval_accuracy": 0.7391304347826086, "eval_loss": 0.8937391042709351, "eval_runtime": 0.8465, "eval_samples_per_second": 54.339, "eval_steps_per_second": 3.544, "step": 310 }, { "epoch": 27.82608695652174, "grad_norm": 54.89842987060547, "learning_rate": 4.090909090909091e-06, "loss": 0.428, "step": 320 }, { "epoch": 28.0, "eval_accuracy": 0.717391304347826, "eval_loss": 0.91752028465271, "eval_runtime": 0.8414, "eval_samples_per_second": 54.668, "eval_steps_per_second": 3.565, "step": 322 }, { "epoch": 28.695652173913043, "grad_norm": 47.60584259033203, "learning_rate": 3.75e-06, "loss": 0.4694, "step": 330 }, { "epoch": 28.956521739130434, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.934003472328186, "eval_runtime": 1.1124, "eval_samples_per_second": 41.351, "eval_steps_per_second": 2.697, "step": 333 }, { "epoch": 29.565217391304348, "grad_norm": 65.11713409423828, "learning_rate": 3.409090909090909e-06, "loss": 0.3812, "step": 340 }, { "epoch": 30.0, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.9721739292144775, "eval_runtime": 1.1376, "eval_samples_per_second": 40.437, "eval_steps_per_second": 2.637, "step": 345 }, { "epoch": 30.434782608695652, "grad_norm": 70.5523452758789, "learning_rate": 3.0681818181818186e-06, "loss": 0.4252, "step": 350 }, { "epoch": 30.956521739130434, "eval_accuracy": 0.717391304347826, "eval_loss": 0.9432597756385803, "eval_runtime": 0.8436, "eval_samples_per_second": 54.525, "eval_steps_per_second": 3.556, "step": 356 }, { "epoch": 31.304347826086957, "grad_norm": 53.673583984375, "learning_rate": 2.7272727272727272e-06, "loss": 0.3883, "step": 360 }, { "epoch": 32.0, "eval_accuracy": 0.7391304347826086, "eval_loss": 0.9419939517974854, "eval_runtime": 1.1286, "eval_samples_per_second": 40.758, "eval_steps_per_second": 2.658, "step": 368 }, { "epoch": 32.17391304347826, "grad_norm": 51.48313522338867, "learning_rate": 2.3863636363636363e-06, "loss": 0.4228, "step": 370 }, { "epoch": 32.95652173913044, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.9482960104942322, "eval_runtime": 0.8227, "eval_samples_per_second": 55.913, "eval_steps_per_second": 3.647, "step": 379 }, { "epoch": 33.04347826086956, "grad_norm": 52.699462890625, "learning_rate": 2.0454545454545453e-06, "loss": 0.4058, "step": 380 }, { "epoch": 33.91304347826087, "grad_norm": 69.62663269042969, "learning_rate": 1.7045454545454546e-06, "loss": 0.4288, "step": 390 }, { "epoch": 34.0, "eval_accuracy": 0.717391304347826, "eval_loss": 0.9529407620429993, "eval_runtime": 0.8435, "eval_samples_per_second": 54.537, "eval_steps_per_second": 3.557, "step": 391 }, { "epoch": 34.78260869565217, "grad_norm": 52.298667907714844, "learning_rate": 1.3636363636363636e-06, "loss": 0.3982, "step": 400 }, { "epoch": 34.95652173913044, "eval_accuracy": 0.717391304347826, "eval_loss": 0.950598418712616, "eval_runtime": 0.8563, "eval_samples_per_second": 53.721, "eval_steps_per_second": 3.504, "step": 402 }, { "epoch": 35.65217391304348, "grad_norm": 69.20966339111328, "learning_rate": 1.0227272727272727e-06, "loss": 0.3935, "step": 410 }, { "epoch": 36.0, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.9538769721984863, "eval_runtime": 0.8438, "eval_samples_per_second": 54.514, "eval_steps_per_second": 3.555, "step": 414 }, { "epoch": 36.52173913043478, "grad_norm": 35.139495849609375, "learning_rate": 6.818181818181818e-07, "loss": 0.3974, "step": 420 }, { "epoch": 36.95652173913044, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.9598949551582336, "eval_runtime": 0.8321, "eval_samples_per_second": 55.282, "eval_steps_per_second": 3.605, "step": 425 }, { "epoch": 37.391304347826086, "grad_norm": 41.308441162109375, "learning_rate": 3.409090909090909e-07, "loss": 0.3893, "step": 430 }, { "epoch": 38.0, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.9607635140419006, "eval_runtime": 1.127, "eval_samples_per_second": 40.816, "eval_steps_per_second": 2.662, "step": 437 }, { "epoch": 38.26086956521739, "grad_norm": 44.250675201416016, "learning_rate": 0.0, "loss": 0.4201, "step": 440 }, { "epoch": 38.26086956521739, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.960838794708252, "eval_runtime": 1.097, "eval_samples_per_second": 41.934, "eval_steps_per_second": 2.735, "step": 440 }, { "epoch": 38.26086956521739, "step": 440, "total_flos": 8.989085534729011e+17, "train_loss": 0.6031668256629598, "train_runtime": 1022.1031, "train_samples_per_second": 28.255, "train_steps_per_second": 0.43 } ], "logging_steps": 10, "max_steps": 440, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.989085534729011e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }