{ "best_metric": 0.7608695652173914, "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-DMAE-da2-colab/checkpoint-180", "epoch": 39.108695652173914, "eval_steps": 500, "global_step": 440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8695652173913043, "grad_norm": 22.860248565673828, "learning_rate": 9.090909090909091e-06, "loss": 1.4149, "step": 10 }, { "epoch": 0.9565217391304348, "eval_accuracy": 0.21739130434782608, "eval_loss": 1.3904842138290405, "eval_runtime": 0.7623, "eval_samples_per_second": 60.346, "eval_steps_per_second": 3.936, "step": 11 }, { "epoch": 1.7608695652173914, "grad_norm": 21.9100341796875, "learning_rate": 1.8181818181818182e-05, "loss": 1.3431, "step": 20 }, { "epoch": 1.9347826086956523, "eval_accuracy": 0.30434782608695654, "eval_loss": 1.3828414678573608, "eval_runtime": 0.7435, "eval_samples_per_second": 61.87, "eval_steps_per_second": 4.035, "step": 22 }, { "epoch": 2.6521739130434785, "grad_norm": 25.524051666259766, "learning_rate": 2.7272727272727273e-05, "loss": 1.2396, "step": 30 }, { "epoch": 2.9130434782608696, "eval_accuracy": 0.43478260869565216, "eval_loss": 1.2675390243530273, "eval_runtime": 0.7435, "eval_samples_per_second": 61.871, "eval_steps_per_second": 4.035, "step": 33 }, { "epoch": 3.5434782608695654, "grad_norm": 39.9882926940918, "learning_rate": 3.6363636363636364e-05, "loss": 1.1377, "step": 40 }, { "epoch": 3.9782608695652173, "eval_accuracy": 0.34782608695652173, "eval_loss": 1.2067060470581055, "eval_runtime": 1.0097, "eval_samples_per_second": 45.56, "eval_steps_per_second": 2.971, "step": 45 }, { "epoch": 4.434782608695652, "grad_norm": 32.264034271240234, "learning_rate": 3.93939393939394e-05, "loss": 1.0144, "step": 50 }, { "epoch": 4.956521739130435, "eval_accuracy": 0.6086956521739131, "eval_loss": 0.9060260057449341, "eval_runtime": 0.7487, "eval_samples_per_second": 61.443, "eval_steps_per_second": 4.007, "step": 56 }, { "epoch": 5.326086956521739, "grad_norm": 38.01457977294922, "learning_rate": 3.838383838383839e-05, "loss": 0.9016, "step": 60 }, { "epoch": 5.934782608695652, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.8024644255638123, "eval_runtime": 0.7779, "eval_samples_per_second": 59.132, "eval_steps_per_second": 3.856, "step": 67 }, { "epoch": 6.217391304347826, "grad_norm": 36.355648040771484, "learning_rate": 3.7373737373737376e-05, "loss": 0.7941, "step": 70 }, { "epoch": 6.913043478260869, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.7811539769172668, "eval_runtime": 0.8396, "eval_samples_per_second": 54.787, "eval_steps_per_second": 3.573, "step": 78 }, { "epoch": 7.108695652173913, "grad_norm": 41.1641845703125, "learning_rate": 3.6363636363636364e-05, "loss": 0.7031, "step": 80 }, { "epoch": 7.978260869565218, "grad_norm": 51.79063415527344, "learning_rate": 3.535353535353536e-05, "loss": 0.6986, "step": 90 }, { "epoch": 7.978260869565218, "eval_accuracy": 0.5869565217391305, "eval_loss": 0.9441068768501282, "eval_runtime": 0.7666, "eval_samples_per_second": 60.006, "eval_steps_per_second": 3.913, "step": 90 }, { "epoch": 8.869565217391305, "grad_norm": 71.17655181884766, "learning_rate": 3.434343434343435e-05, "loss": 0.6245, "step": 100 }, { "epoch": 8.956521739130435, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.864142894744873, "eval_runtime": 0.7557, "eval_samples_per_second": 60.867, "eval_steps_per_second": 3.97, "step": 101 }, { "epoch": 9.76086956521739, "grad_norm": 41.29151916503906, "learning_rate": 3.3333333333333335e-05, "loss": 0.6044, "step": 110 }, { "epoch": 9.934782608695652, "eval_accuracy": 0.6086956521739131, "eval_loss": 0.8647807836532593, "eval_runtime": 0.7668, "eval_samples_per_second": 59.987, "eval_steps_per_second": 3.912, "step": 112 }, { "epoch": 10.652173913043478, "grad_norm": 35.93144989013672, "learning_rate": 3.232323232323232e-05, "loss": 0.536, "step": 120 }, { "epoch": 10.91304347826087, "eval_accuracy": 0.5869565217391305, "eval_loss": 0.8800004124641418, "eval_runtime": 0.7619, "eval_samples_per_second": 60.377, "eval_steps_per_second": 3.938, "step": 123 }, { "epoch": 11.543478260869565, "grad_norm": 42.50069808959961, "learning_rate": 3.131313131313132e-05, "loss": 0.4825, "step": 130 }, { "epoch": 11.978260869565217, "eval_accuracy": 0.7391304347826086, "eval_loss": 0.8387995958328247, "eval_runtime": 0.7698, "eval_samples_per_second": 59.754, "eval_steps_per_second": 3.897, "step": 135 }, { "epoch": 12.434782608695652, "grad_norm": 82.49532318115234, "learning_rate": 3.0303030303030306e-05, "loss": 0.4972, "step": 140 }, { "epoch": 12.956521739130435, "eval_accuracy": 0.717391304347826, "eval_loss": 0.8763275146484375, "eval_runtime": 0.7588, "eval_samples_per_second": 60.62, "eval_steps_per_second": 3.953, "step": 146 }, { "epoch": 13.326086956521738, "grad_norm": 44.642826080322266, "learning_rate": 2.9292929292929297e-05, "loss": 0.4284, "step": 150 }, { "epoch": 13.934782608695652, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.8228161931037903, "eval_runtime": 1.0261, "eval_samples_per_second": 44.832, "eval_steps_per_second": 2.924, "step": 157 }, { "epoch": 14.217391304347826, "grad_norm": 57.17488479614258, "learning_rate": 2.8282828282828285e-05, "loss": 0.3961, "step": 160 }, { "epoch": 14.91304347826087, "eval_accuracy": 0.717391304347826, "eval_loss": 0.8260169625282288, "eval_runtime": 0.8274, "eval_samples_per_second": 55.597, "eval_steps_per_second": 3.626, "step": 168 }, { "epoch": 15.108695652173912, "grad_norm": 31.25395393371582, "learning_rate": 2.7272727272727273e-05, "loss": 0.4208, "step": 170 }, { "epoch": 15.978260869565217, "grad_norm": 42.941837310791016, "learning_rate": 2.6262626262626265e-05, "loss": 0.3877, "step": 180 }, { "epoch": 15.978260869565217, "eval_accuracy": 0.7608695652173914, "eval_loss": 0.9367595911026001, "eval_runtime": 0.7841, "eval_samples_per_second": 58.667, "eval_steps_per_second": 3.826, "step": 180 }, { "epoch": 16.869565217391305, "grad_norm": 72.04235076904297, "learning_rate": 2.5252525252525253e-05, "loss": 0.3744, "step": 190 }, { "epoch": 16.956521739130434, "eval_accuracy": 0.6304347826086957, "eval_loss": 1.1220568418502808, "eval_runtime": 1.0796, "eval_samples_per_second": 42.606, "eval_steps_per_second": 2.779, "step": 191 }, { "epoch": 17.76086956521739, "grad_norm": 43.762760162353516, "learning_rate": 2.4242424242424244e-05, "loss": 0.3266, "step": 200 }, { "epoch": 17.934782608695652, "eval_accuracy": 0.6739130434782609, "eval_loss": 1.0177233219146729, "eval_runtime": 0.7517, "eval_samples_per_second": 61.193, "eval_steps_per_second": 3.991, "step": 202 }, { "epoch": 18.652173913043477, "grad_norm": 76.85489654541016, "learning_rate": 2.3232323232323232e-05, "loss": 0.3257, "step": 210 }, { "epoch": 18.91304347826087, "eval_accuracy": 0.6956521739130435, "eval_loss": 1.0300357341766357, "eval_runtime": 0.7634, "eval_samples_per_second": 60.254, "eval_steps_per_second": 3.93, "step": 213 }, { "epoch": 19.543478260869566, "grad_norm": 40.13884353637695, "learning_rate": 2.2222222222222227e-05, "loss": 0.3164, "step": 220 }, { "epoch": 19.97826086956522, "eval_accuracy": 0.6956521739130435, "eval_loss": 1.134386658668518, "eval_runtime": 0.8709, "eval_samples_per_second": 52.817, "eval_steps_per_second": 3.445, "step": 225 }, { "epoch": 20.434782608695652, "grad_norm": 50.51103591918945, "learning_rate": 2.121212121212121e-05, "loss": 0.2965, "step": 230 }, { "epoch": 20.956521739130434, "eval_accuracy": 0.7391304347826086, "eval_loss": 0.9283356070518494, "eval_runtime": 0.7697, "eval_samples_per_second": 59.76, "eval_steps_per_second": 3.897, "step": 236 }, { "epoch": 21.32608695652174, "grad_norm": 40.85714340209961, "learning_rate": 2.0202020202020206e-05, "loss": 0.293, "step": 240 }, { "epoch": 21.934782608695652, "eval_accuracy": 0.6956521739130435, "eval_loss": 1.012825608253479, "eval_runtime": 0.7852, "eval_samples_per_second": 58.582, "eval_steps_per_second": 3.821, "step": 247 }, { "epoch": 22.217391304347824, "grad_norm": 31.803544998168945, "learning_rate": 1.9191919191919194e-05, "loss": 0.2929, "step": 250 }, { "epoch": 22.91304347826087, "eval_accuracy": 0.7608695652173914, "eval_loss": 1.0449855327606201, "eval_runtime": 1.4131, "eval_samples_per_second": 32.553, "eval_steps_per_second": 2.123, "step": 258 }, { "epoch": 23.108695652173914, "grad_norm": 44.02550506591797, "learning_rate": 1.8181818181818182e-05, "loss": 0.2661, "step": 260 }, { "epoch": 23.97826086956522, "grad_norm": 62.81007766723633, "learning_rate": 1.7171717171717173e-05, "loss": 0.2878, "step": 270 }, { "epoch": 23.97826086956522, "eval_accuracy": 0.717391304347826, "eval_loss": 1.1481518745422363, "eval_runtime": 0.7494, "eval_samples_per_second": 61.383, "eval_steps_per_second": 4.003, "step": 270 }, { "epoch": 24.869565217391305, "grad_norm": 46.08440017700195, "learning_rate": 1.616161616161616e-05, "loss": 0.2447, "step": 280 }, { "epoch": 24.956521739130434, "eval_accuracy": 0.717391304347826, "eval_loss": 1.0715558528900146, "eval_runtime": 0.7605, "eval_samples_per_second": 60.483, "eval_steps_per_second": 3.945, "step": 281 }, { "epoch": 25.76086956521739, "grad_norm": 65.74066162109375, "learning_rate": 1.5151515151515153e-05, "loss": 0.2601, "step": 290 }, { "epoch": 25.934782608695652, "eval_accuracy": 0.6956521739130435, "eval_loss": 1.0769968032836914, "eval_runtime": 0.9896, "eval_samples_per_second": 46.483, "eval_steps_per_second": 3.032, "step": 292 }, { "epoch": 26.652173913043477, "grad_norm": 39.677207946777344, "learning_rate": 1.4141414141414143e-05, "loss": 0.2299, "step": 300 }, { "epoch": 26.91304347826087, "eval_accuracy": 0.7391304347826086, "eval_loss": 1.176945686340332, "eval_runtime": 1.0303, "eval_samples_per_second": 44.647, "eval_steps_per_second": 2.912, "step": 303 }, { "epoch": 27.543478260869566, "grad_norm": 51.47335433959961, "learning_rate": 1.3131313131313132e-05, "loss": 0.2401, "step": 310 }, { "epoch": 27.97826086956522, "eval_accuracy": 0.717391304347826, "eval_loss": 1.140651822090149, "eval_runtime": 0.7578, "eval_samples_per_second": 60.699, "eval_steps_per_second": 3.959, "step": 315 }, { "epoch": 28.434782608695652, "grad_norm": 35.28546905517578, "learning_rate": 1.2121212121212122e-05, "loss": 0.2347, "step": 320 }, { "epoch": 28.956521739130434, "eval_accuracy": 0.6956521739130435, "eval_loss": 1.1928998231887817, "eval_runtime": 0.7598, "eval_samples_per_second": 60.544, "eval_steps_per_second": 3.949, "step": 326 }, { "epoch": 29.32608695652174, "grad_norm": 34.29880905151367, "learning_rate": 1.1111111111111113e-05, "loss": 0.2584, "step": 330 }, { "epoch": 29.934782608695652, "eval_accuracy": 0.6739130434782609, "eval_loss": 1.095699667930603, "eval_runtime": 0.7593, "eval_samples_per_second": 60.584, "eval_steps_per_second": 3.951, "step": 337 }, { "epoch": 30.217391304347824, "grad_norm": 35.58168411254883, "learning_rate": 1.0101010101010103e-05, "loss": 0.2204, "step": 340 }, { "epoch": 30.91304347826087, "eval_accuracy": 0.6739130434782609, "eval_loss": 1.172129511833191, "eval_runtime": 1.009, "eval_samples_per_second": 45.59, "eval_steps_per_second": 2.973, "step": 348 }, { "epoch": 31.108695652173914, "grad_norm": 40.2222900390625, "learning_rate": 9.090909090909091e-06, "loss": 0.2057, "step": 350 }, { "epoch": 31.97826086956522, "grad_norm": 61.76468276977539, "learning_rate": 8.08080808080808e-06, "loss": 0.2031, "step": 360 }, { "epoch": 31.97826086956522, "eval_accuracy": 0.6739130434782609, "eval_loss": 1.0842841863632202, "eval_runtime": 0.7718, "eval_samples_per_second": 59.603, "eval_steps_per_second": 3.887, "step": 360 }, { "epoch": 32.869565217391305, "grad_norm": 50.13585662841797, "learning_rate": 7.070707070707071e-06, "loss": 0.2241, "step": 370 }, { "epoch": 32.95652173913044, "eval_accuracy": 0.6956521739130435, "eval_loss": 1.1350224018096924, "eval_runtime": 0.7479, "eval_samples_per_second": 61.506, "eval_steps_per_second": 4.011, "step": 371 }, { "epoch": 33.76086956521739, "grad_norm": 45.946346282958984, "learning_rate": 6.060606060606061e-06, "loss": 0.1798, "step": 380 }, { "epoch": 33.93478260869565, "eval_accuracy": 0.6956521739130435, "eval_loss": 1.2418912649154663, "eval_runtime": 0.8117, "eval_samples_per_second": 56.668, "eval_steps_per_second": 3.696, "step": 382 }, { "epoch": 34.65217391304348, "grad_norm": 53.28645324707031, "learning_rate": 5.0505050505050515e-06, "loss": 0.2435, "step": 390 }, { "epoch": 34.91304347826087, "eval_accuracy": 0.6956521739130435, "eval_loss": 1.1522233486175537, "eval_runtime": 1.0835, "eval_samples_per_second": 42.454, "eval_steps_per_second": 2.769, "step": 393 }, { "epoch": 35.54347826086956, "grad_norm": 119.8594970703125, "learning_rate": 4.04040404040404e-06, "loss": 0.1857, "step": 400 }, { "epoch": 35.97826086956522, "eval_accuracy": 0.6956521739130435, "eval_loss": 1.1206859350204468, "eval_runtime": 0.8139, "eval_samples_per_second": 56.519, "eval_steps_per_second": 3.686, "step": 405 }, { "epoch": 36.43478260869565, "grad_norm": 62.90666198730469, "learning_rate": 3.0303030303030305e-06, "loss": 0.1889, "step": 410 }, { "epoch": 36.95652173913044, "eval_accuracy": 0.6956521739130435, "eval_loss": 1.1711089611053467, "eval_runtime": 0.8068, "eval_samples_per_second": 57.018, "eval_steps_per_second": 3.719, "step": 416 }, { "epoch": 37.32608695652174, "grad_norm": 36.55961608886719, "learning_rate": 2.02020202020202e-06, "loss": 0.2043, "step": 420 }, { "epoch": 37.93478260869565, "eval_accuracy": 0.6956521739130435, "eval_loss": 1.1978377103805542, "eval_runtime": 0.8416, "eval_samples_per_second": 54.66, "eval_steps_per_second": 3.565, "step": 427 }, { "epoch": 38.21739130434783, "grad_norm": 41.95515441894531, "learning_rate": 1.01010101010101e-06, "loss": 0.1951, "step": 430 }, { "epoch": 38.91304347826087, "eval_accuracy": 0.717391304347826, "eval_loss": 1.2106621265411377, "eval_runtime": 1.1122, "eval_samples_per_second": 41.361, "eval_steps_per_second": 2.697, "step": 438 }, { "epoch": 39.108695652173914, "grad_norm": 97.34293365478516, "learning_rate": 0.0, "loss": 0.1901, "step": 440 }, { "epoch": 39.108695652173914, "eval_accuracy": 0.717391304347826, "eval_loss": 1.2108197212219238, "eval_runtime": 0.8571, "eval_samples_per_second": 53.667, "eval_steps_per_second": 3.5, "step": 440 }, { "epoch": 39.108695652173914, "step": 440, "total_flos": 9.162177814462464e+17, "train_loss": 0.45710954666137693, "train_runtime": 931.6489, "train_samples_per_second": 30.956, "train_steps_per_second": 0.472 } ], "logging_steps": 10, "max_steps": 440, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.162177814462464e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }