| { | |
| "best_metric": 0.7608695652173914, | |
| "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-DMAE-da-colab2/checkpoint-230", | |
| "epoch": 38.26086956521739, | |
| "eval_steps": 500, | |
| "global_step": 440, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.8695652173913043, | |
| "grad_norm": 20.089263916015625, | |
| "learning_rate": 1.465909090909091e-05, | |
| "loss": 1.357, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.9565217391304348, | |
| "eval_accuracy": 0.391304347826087, | |
| "eval_loss": 1.3905678987503052, | |
| "eval_runtime": 2.4821, | |
| "eval_samples_per_second": 18.532, | |
| "eval_steps_per_second": 1.209, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 1.7391304347826086, | |
| "grad_norm": 24.629512786865234, | |
| "learning_rate": 1.4318181818181818e-05, | |
| "loss": 1.2964, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.43478260869565216, | |
| "eval_loss": 1.2818552255630493, | |
| "eval_runtime": 0.9951, | |
| "eval_samples_per_second": 46.227, | |
| "eval_steps_per_second": 3.015, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 2.608695652173913, | |
| "grad_norm": 26.742063522338867, | |
| "learning_rate": 1.3977272727272727e-05, | |
| "loss": 1.1609, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 2.9565217391304346, | |
| "eval_accuracy": 0.4782608695652174, | |
| "eval_loss": 1.1804310083389282, | |
| "eval_runtime": 0.8607, | |
| "eval_samples_per_second": 53.446, | |
| "eval_steps_per_second": 3.486, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 3.4782608695652173, | |
| "grad_norm": 26.6147403717041, | |
| "learning_rate": 1.3636363636363637e-05, | |
| "loss": 1.0747, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.6086956521739131, | |
| "eval_loss": 1.0910847187042236, | |
| "eval_runtime": 1.1974, | |
| "eval_samples_per_second": 38.416, | |
| "eval_steps_per_second": 2.505, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 4.3478260869565215, | |
| "grad_norm": 31.382036209106445, | |
| "learning_rate": 1.3295454545454546e-05, | |
| "loss": 1.027, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 4.956521739130435, | |
| "eval_accuracy": 0.6304347826086957, | |
| "eval_loss": 1.0176359415054321, | |
| "eval_runtime": 0.8717, | |
| "eval_samples_per_second": 52.771, | |
| "eval_steps_per_second": 3.442, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 5.217391304347826, | |
| "grad_norm": 30.808866500854492, | |
| "learning_rate": 1.2954545454545455e-05, | |
| "loss": 0.8985, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 0.8963044285774231, | |
| "eval_runtime": 0.8493, | |
| "eval_samples_per_second": 54.159, | |
| "eval_steps_per_second": 3.532, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 6.086956521739131, | |
| "grad_norm": 38.13185501098633, | |
| "learning_rate": 1.2613636363636365e-05, | |
| "loss": 0.8661, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 6.956521739130435, | |
| "grad_norm": 51.77964782714844, | |
| "learning_rate": 1.2272727272727274e-05, | |
| "loss": 0.8031, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 6.956521739130435, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 0.9867271184921265, | |
| "eval_runtime": 0.8593, | |
| "eval_samples_per_second": 53.529, | |
| "eval_steps_per_second": 3.491, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 7.826086956521739, | |
| "grad_norm": 35.04009246826172, | |
| "learning_rate": 1.1931818181818181e-05, | |
| "loss": 0.7744, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 0.8709511756896973, | |
| "eval_runtime": 1.0941, | |
| "eval_samples_per_second": 42.042, | |
| "eval_steps_per_second": 2.742, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 8.695652173913043, | |
| "grad_norm": 39.17298889160156, | |
| "learning_rate": 1.159090909090909e-05, | |
| "loss": 0.7488, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 8.956521739130435, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 0.8845287561416626, | |
| "eval_runtime": 0.8331, | |
| "eval_samples_per_second": 55.218, | |
| "eval_steps_per_second": 3.601, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 9.565217391304348, | |
| "grad_norm": 65.02764892578125, | |
| "learning_rate": 1.125e-05, | |
| "loss": 0.6767, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 0.8693321943283081, | |
| "eval_runtime": 0.8369, | |
| "eval_samples_per_second": 54.964, | |
| "eval_steps_per_second": 3.585, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 10.434782608695652, | |
| "grad_norm": 41.53019332885742, | |
| "learning_rate": 1.0909090909090909e-05, | |
| "loss": 0.6082, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 10.956521739130435, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 0.8132839798927307, | |
| "eval_runtime": 1.1162, | |
| "eval_samples_per_second": 41.21, | |
| "eval_steps_per_second": 2.688, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 11.304347826086957, | |
| "grad_norm": 45.66834259033203, | |
| "learning_rate": 1.0568181818181819e-05, | |
| "loss": 0.6354, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 0.877053439617157, | |
| "eval_runtime": 1.1215, | |
| "eval_samples_per_second": 41.016, | |
| "eval_steps_per_second": 2.675, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 12.173913043478262, | |
| "grad_norm": 52.994873046875, | |
| "learning_rate": 1.0227272727272727e-05, | |
| "loss": 0.6422, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 12.956521739130435, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 0.8137139678001404, | |
| "eval_runtime": 0.8373, | |
| "eval_samples_per_second": 54.937, | |
| "eval_steps_per_second": 3.583, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 13.043478260869565, | |
| "grad_norm": 124.10873413085938, | |
| "learning_rate": 9.886363636363637e-06, | |
| "loss": 0.5812, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 13.91304347826087, | |
| "grad_norm": 62.76710510253906, | |
| "learning_rate": 9.545454545454545e-06, | |
| "loss": 0.584, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.6521739130434783, | |
| "eval_loss": 0.8860651254653931, | |
| "eval_runtime": 1.1309, | |
| "eval_samples_per_second": 40.674, | |
| "eval_steps_per_second": 2.653, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 14.782608695652174, | |
| "grad_norm": 60.06248474121094, | |
| "learning_rate": 9.204545454545455e-06, | |
| "loss": 0.5763, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 14.956521739130435, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.8459398746490479, | |
| "eval_runtime": 0.8491, | |
| "eval_samples_per_second": 54.178, | |
| "eval_steps_per_second": 3.533, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 15.652173913043478, | |
| "grad_norm": 60.97280502319336, | |
| "learning_rate": 8.863636363636365e-06, | |
| "loss": 0.5238, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 0.8590155839920044, | |
| "eval_runtime": 0.8527, | |
| "eval_samples_per_second": 53.948, | |
| "eval_steps_per_second": 3.518, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 16.52173913043478, | |
| "grad_norm": 64.43607330322266, | |
| "learning_rate": 8.522727272727273e-06, | |
| "loss": 0.528, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 16.956521739130434, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 0.8705419301986694, | |
| "eval_runtime": 0.8385, | |
| "eval_samples_per_second": 54.858, | |
| "eval_steps_per_second": 3.578, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 17.391304347826086, | |
| "grad_norm": 68.5125732421875, | |
| "learning_rate": 8.181818181818181e-06, | |
| "loss": 0.5626, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 0.8636245131492615, | |
| "eval_runtime": 0.8439, | |
| "eval_samples_per_second": 54.506, | |
| "eval_steps_per_second": 3.555, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 18.26086956521739, | |
| "grad_norm": 52.141483306884766, | |
| "learning_rate": 7.840909090909091e-06, | |
| "loss": 0.5395, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 18.956521739130434, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 0.8793612122535706, | |
| "eval_runtime": 0.835, | |
| "eval_samples_per_second": 55.093, | |
| "eval_steps_per_second": 3.593, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 19.130434782608695, | |
| "grad_norm": 43.43910598754883, | |
| "learning_rate": 7.5e-06, | |
| "loss": 0.5132, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 44.14228820800781, | |
| "learning_rate": 7.159090909090909e-06, | |
| "loss": 0.4696, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.7608695652173914, | |
| "eval_loss": 0.8834987878799438, | |
| "eval_runtime": 0.9864, | |
| "eval_samples_per_second": 46.635, | |
| "eval_steps_per_second": 3.041, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 20.869565217391305, | |
| "grad_norm": 54.529273986816406, | |
| "learning_rate": 6.818181818181818e-06, | |
| "loss": 0.488, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 20.956521739130434, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.8888704180717468, | |
| "eval_runtime": 0.8371, | |
| "eval_samples_per_second": 54.954, | |
| "eval_steps_per_second": 3.584, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 21.73913043478261, | |
| "grad_norm": 44.353816986083984, | |
| "learning_rate": 6.477272727272727e-06, | |
| "loss": 0.4764, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 0.9109261631965637, | |
| "eval_runtime": 0.8385, | |
| "eval_samples_per_second": 54.857, | |
| "eval_steps_per_second": 3.578, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 22.608695652173914, | |
| "grad_norm": 46.19450378417969, | |
| "learning_rate": 6.136363636363637e-06, | |
| "loss": 0.4668, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 22.956521739130434, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.8892802596092224, | |
| "eval_runtime": 1.1324, | |
| "eval_samples_per_second": 40.621, | |
| "eval_steps_per_second": 2.649, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 23.47826086956522, | |
| "grad_norm": 43.89274597167969, | |
| "learning_rate": 5.795454545454545e-06, | |
| "loss": 0.4676, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 0.908248245716095, | |
| "eval_runtime": 0.8948, | |
| "eval_samples_per_second": 51.41, | |
| "eval_steps_per_second": 3.353, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 24.347826086956523, | |
| "grad_norm": 39.96305847167969, | |
| "learning_rate": 5.4545454545454545e-06, | |
| "loss": 0.4619, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 24.956521739130434, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 0.9352836608886719, | |
| "eval_runtime": 0.8548, | |
| "eval_samples_per_second": 53.815, | |
| "eval_steps_per_second": 3.51, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 25.217391304347824, | |
| "grad_norm": 54.07424545288086, | |
| "learning_rate": 5.1136363636363635e-06, | |
| "loss": 0.4727, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 0.9331096410751343, | |
| "eval_runtime": 0.833, | |
| "eval_samples_per_second": 55.223, | |
| "eval_steps_per_second": 3.602, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 26.08695652173913, | |
| "grad_norm": 67.8470687866211, | |
| "learning_rate": 4.7727272727272725e-06, | |
| "loss": 0.4644, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 26.956521739130434, | |
| "grad_norm": 59.70988082885742, | |
| "learning_rate": 4.4318181818181824e-06, | |
| "loss": 0.4461, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 26.956521739130434, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.8937391042709351, | |
| "eval_runtime": 0.8465, | |
| "eval_samples_per_second": 54.339, | |
| "eval_steps_per_second": 3.544, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 27.82608695652174, | |
| "grad_norm": 54.89842987060547, | |
| "learning_rate": 4.090909090909091e-06, | |
| "loss": 0.428, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 0.91752028465271, | |
| "eval_runtime": 0.8414, | |
| "eval_samples_per_second": 54.668, | |
| "eval_steps_per_second": 3.565, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 28.695652173913043, | |
| "grad_norm": 47.60584259033203, | |
| "learning_rate": 3.75e-06, | |
| "loss": 0.4694, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 28.956521739130434, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 0.934003472328186, | |
| "eval_runtime": 1.1124, | |
| "eval_samples_per_second": 41.351, | |
| "eval_steps_per_second": 2.697, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 29.565217391304348, | |
| "grad_norm": 65.11713409423828, | |
| "learning_rate": 3.409090909090909e-06, | |
| "loss": 0.3812, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 0.9721739292144775, | |
| "eval_runtime": 1.1376, | |
| "eval_samples_per_second": 40.437, | |
| "eval_steps_per_second": 2.637, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 30.434782608695652, | |
| "grad_norm": 70.5523452758789, | |
| "learning_rate": 3.0681818181818186e-06, | |
| "loss": 0.4252, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 30.956521739130434, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 0.9432597756385803, | |
| "eval_runtime": 0.8436, | |
| "eval_samples_per_second": 54.525, | |
| "eval_steps_per_second": 3.556, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 31.304347826086957, | |
| "grad_norm": 53.673583984375, | |
| "learning_rate": 2.7272727272727272e-06, | |
| "loss": 0.3883, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.7391304347826086, | |
| "eval_loss": 0.9419939517974854, | |
| "eval_runtime": 1.1286, | |
| "eval_samples_per_second": 40.758, | |
| "eval_steps_per_second": 2.658, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 32.17391304347826, | |
| "grad_norm": 51.48313522338867, | |
| "learning_rate": 2.3863636363636363e-06, | |
| "loss": 0.4228, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 32.95652173913044, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 0.9482960104942322, | |
| "eval_runtime": 0.8227, | |
| "eval_samples_per_second": 55.913, | |
| "eval_steps_per_second": 3.647, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 33.04347826086956, | |
| "grad_norm": 52.699462890625, | |
| "learning_rate": 2.0454545454545453e-06, | |
| "loss": 0.4058, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 33.91304347826087, | |
| "grad_norm": 69.62663269042969, | |
| "learning_rate": 1.7045454545454546e-06, | |
| "loss": 0.4288, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 0.9529407620429993, | |
| "eval_runtime": 0.8435, | |
| "eval_samples_per_second": 54.537, | |
| "eval_steps_per_second": 3.557, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 34.78260869565217, | |
| "grad_norm": 52.298667907714844, | |
| "learning_rate": 1.3636363636363636e-06, | |
| "loss": 0.3982, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 34.95652173913044, | |
| "eval_accuracy": 0.717391304347826, | |
| "eval_loss": 0.950598418712616, | |
| "eval_runtime": 0.8563, | |
| "eval_samples_per_second": 53.721, | |
| "eval_steps_per_second": 3.504, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 35.65217391304348, | |
| "grad_norm": 69.20966339111328, | |
| "learning_rate": 1.0227272727272727e-06, | |
| "loss": 0.3935, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.6739130434782609, | |
| "eval_loss": 0.9538769721984863, | |
| "eval_runtime": 0.8438, | |
| "eval_samples_per_second": 54.514, | |
| "eval_steps_per_second": 3.555, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 36.52173913043478, | |
| "grad_norm": 35.139495849609375, | |
| "learning_rate": 6.818181818181818e-07, | |
| "loss": 0.3974, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 36.95652173913044, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 0.9598949551582336, | |
| "eval_runtime": 0.8321, | |
| "eval_samples_per_second": 55.282, | |
| "eval_steps_per_second": 3.605, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 37.391304347826086, | |
| "grad_norm": 41.308441162109375, | |
| "learning_rate": 3.409090909090909e-07, | |
| "loss": 0.3893, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 0.9607635140419006, | |
| "eval_runtime": 1.127, | |
| "eval_samples_per_second": 40.816, | |
| "eval_steps_per_second": 2.662, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 38.26086956521739, | |
| "grad_norm": 44.250675201416016, | |
| "learning_rate": 0.0, | |
| "loss": 0.4201, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 38.26086956521739, | |
| "eval_accuracy": 0.6956521739130435, | |
| "eval_loss": 0.960838794708252, | |
| "eval_runtime": 1.097, | |
| "eval_samples_per_second": 41.934, | |
| "eval_steps_per_second": 2.735, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 38.26086956521739, | |
| "step": 440, | |
| "total_flos": 8.989085534729011e+17, | |
| "train_loss": 0.6031668256629598, | |
| "train_runtime": 1022.1031, | |
| "train_samples_per_second": 28.255, | |
| "train_steps_per_second": 0.43 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 440, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 40, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.989085534729011e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |