{ "best_metric": 0.7307692307692307, "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-dmae-humeda-DAV21/checkpoint-130", "epoch": 41.93023255813954, "eval_steps": 500, "global_step": 420, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9302325581395349, "eval_accuracy": 0.34615384615384615, "eval_loss": 1.550955891609192, "eval_runtime": 0.9413, "eval_samples_per_second": 55.245, "eval_steps_per_second": 2.125, "step": 10 }, { "epoch": 1.1860465116279069, "grad_norm": 13.047734260559082, "learning_rate": 1.4285714285714285e-05, "loss": 6.5929, "step": 12 }, { "epoch": 1.9302325581395348, "eval_accuracy": 0.2692307692307692, "eval_loss": 1.4801901578903198, "eval_runtime": 0.914, "eval_samples_per_second": 56.894, "eval_steps_per_second": 2.188, "step": 20 }, { "epoch": 2.3720930232558137, "grad_norm": 18.362613677978516, "learning_rate": 2.857142857142857e-05, "loss": 5.6252, "step": 24 }, { "epoch": 2.9302325581395348, "eval_accuracy": 0.40384615384615385, "eval_loss": 1.1115041971206665, "eval_runtime": 3.1175, "eval_samples_per_second": 16.68, "eval_steps_per_second": 0.642, "step": 30 }, { "epoch": 3.558139534883721, "grad_norm": 30.29971694946289, "learning_rate": 4.2857142857142856e-05, "loss": 3.874, "step": 36 }, { "epoch": 3.9302325581395348, "eval_accuracy": 0.5576923076923077, "eval_loss": 0.9996148347854614, "eval_runtime": 0.9335, "eval_samples_per_second": 55.705, "eval_steps_per_second": 2.143, "step": 40 }, { "epoch": 4.7441860465116275, "grad_norm": 29.079408645629883, "learning_rate": 4.9206349206349204e-05, "loss": 2.7168, "step": 48 }, { "epoch": 4.930232558139535, "eval_accuracy": 0.6538461538461539, "eval_loss": 0.8436340689659119, "eval_runtime": 1.286, "eval_samples_per_second": 40.437, "eval_steps_per_second": 1.555, "step": 50 }, { "epoch": 5.930232558139535, "grad_norm": 38.7504768371582, "learning_rate": 4.761904761904762e-05, "loss": 2.2435, "step": 60 }, { "epoch": 5.930232558139535, "eval_accuracy": 0.6153846153846154, "eval_loss": 0.9319868087768555, "eval_runtime": 0.9627, "eval_samples_per_second": 54.015, "eval_steps_per_second": 2.078, "step": 60 }, { "epoch": 6.930232558139535, "eval_accuracy": 0.6346153846153846, "eval_loss": 0.8411852121353149, "eval_runtime": 0.9446, "eval_samples_per_second": 55.048, "eval_steps_per_second": 2.117, "step": 70 }, { "epoch": 7.186046511627907, "grad_norm": 27.031049728393555, "learning_rate": 4.603174603174603e-05, "loss": 1.9334, "step": 72 }, { "epoch": 7.930232558139535, "eval_accuracy": 0.6730769230769231, "eval_loss": 0.8622324466705322, "eval_runtime": 1.1865, "eval_samples_per_second": 43.825, "eval_steps_per_second": 1.686, "step": 80 }, { "epoch": 8.372093023255815, "grad_norm": 29.655231475830078, "learning_rate": 4.4444444444444447e-05, "loss": 1.6303, "step": 84 }, { "epoch": 8.930232558139535, "eval_accuracy": 0.7115384615384616, "eval_loss": 0.9151535034179688, "eval_runtime": 0.9237, "eval_samples_per_second": 56.296, "eval_steps_per_second": 2.165, "step": 90 }, { "epoch": 9.55813953488372, "grad_norm": 22.004980087280273, "learning_rate": 4.2857142857142856e-05, "loss": 1.2748, "step": 96 }, { "epoch": 9.930232558139535, "eval_accuracy": 0.6730769230769231, "eval_loss": 0.9720916748046875, "eval_runtime": 0.9318, "eval_samples_per_second": 55.806, "eval_steps_per_second": 2.146, "step": 100 }, { "epoch": 10.744186046511627, "grad_norm": 30.746623992919922, "learning_rate": 4.126984126984127e-05, "loss": 1.0945, "step": 108 }, { "epoch": 10.930232558139535, "eval_accuracy": 0.6538461538461539, "eval_loss": 1.0826632976531982, "eval_runtime": 1.2786, "eval_samples_per_second": 40.671, "eval_steps_per_second": 1.564, "step": 110 }, { "epoch": 11.930232558139535, "grad_norm": 22.60085105895996, "learning_rate": 3.968253968253968e-05, "loss": 0.8395, "step": 120 }, { "epoch": 11.930232558139535, "eval_accuracy": 0.7115384615384616, "eval_loss": 0.9152665734291077, "eval_runtime": 0.9881, "eval_samples_per_second": 52.629, "eval_steps_per_second": 2.024, "step": 120 }, { "epoch": 12.930232558139535, "eval_accuracy": 0.7307692307692307, "eval_loss": 0.8631380796432495, "eval_runtime": 0.9359, "eval_samples_per_second": 55.559, "eval_steps_per_second": 2.137, "step": 130 }, { "epoch": 13.186046511627907, "grad_norm": 28.28727912902832, "learning_rate": 3.809523809523809e-05, "loss": 0.8587, "step": 132 }, { "epoch": 13.930232558139535, "eval_accuracy": 0.6538461538461539, "eval_loss": 1.1038739681243896, "eval_runtime": 1.1762, "eval_samples_per_second": 44.21, "eval_steps_per_second": 1.7, "step": 140 }, { "epoch": 14.372093023255815, "grad_norm": 30.497623443603516, "learning_rate": 3.650793650793651e-05, "loss": 0.8574, "step": 144 }, { "epoch": 14.930232558139535, "eval_accuracy": 0.6923076923076923, "eval_loss": 1.0462819337844849, "eval_runtime": 0.9493, "eval_samples_per_second": 54.776, "eval_steps_per_second": 2.107, "step": 150 }, { "epoch": 15.55813953488372, "grad_norm": 19.123886108398438, "learning_rate": 3.492063492063492e-05, "loss": 0.7096, "step": 156 }, { "epoch": 15.930232558139535, "eval_accuracy": 0.7115384615384616, "eval_loss": 0.9990596175193787, "eval_runtime": 0.9614, "eval_samples_per_second": 54.09, "eval_steps_per_second": 2.08, "step": 160 }, { "epoch": 16.74418604651163, "grad_norm": 34.98741912841797, "learning_rate": 3.3333333333333335e-05, "loss": 0.6606, "step": 168 }, { "epoch": 16.930232558139537, "eval_accuracy": 0.6730769230769231, "eval_loss": 1.051858901977539, "eval_runtime": 1.0499, "eval_samples_per_second": 49.531, "eval_steps_per_second": 1.905, "step": 170 }, { "epoch": 17.930232558139537, "grad_norm": 29.33749771118164, "learning_rate": 3.1746031746031745e-05, "loss": 0.5513, "step": 180 }, { "epoch": 17.930232558139537, "eval_accuracy": 0.7115384615384616, "eval_loss": 1.0864715576171875, "eval_runtime": 0.9297, "eval_samples_per_second": 55.932, "eval_steps_per_second": 2.151, "step": 180 }, { "epoch": 18.930232558139537, "eval_accuracy": 0.6730769230769231, "eval_loss": 1.1139813661575317, "eval_runtime": 0.9356, "eval_samples_per_second": 55.582, "eval_steps_per_second": 2.138, "step": 190 }, { "epoch": 19.186046511627907, "grad_norm": 31.023462295532227, "learning_rate": 3.0158730158730158e-05, "loss": 0.61, "step": 192 }, { "epoch": 19.930232558139537, "eval_accuracy": 0.6730769230769231, "eval_loss": 1.0289796590805054, "eval_runtime": 0.9508, "eval_samples_per_second": 54.693, "eval_steps_per_second": 2.104, "step": 200 }, { "epoch": 20.372093023255815, "grad_norm": 31.7921142578125, "learning_rate": 2.857142857142857e-05, "loss": 0.5278, "step": 204 }, { "epoch": 20.930232558139537, "eval_accuracy": 0.6923076923076923, "eval_loss": 1.1002823114395142, "eval_runtime": 0.9325, "eval_samples_per_second": 55.763, "eval_steps_per_second": 2.145, "step": 210 }, { "epoch": 21.558139534883722, "grad_norm": 33.69935989379883, "learning_rate": 2.6984126984126984e-05, "loss": 0.4639, "step": 216 }, { "epoch": 21.930232558139537, "eval_accuracy": 0.6538461538461539, "eval_loss": 1.2471978664398193, "eval_runtime": 0.9316, "eval_samples_per_second": 55.818, "eval_steps_per_second": 2.147, "step": 220 }, { "epoch": 22.74418604651163, "grad_norm": 20.980804443359375, "learning_rate": 2.5396825396825397e-05, "loss": 0.4719, "step": 228 }, { "epoch": 22.930232558139537, "eval_accuracy": 0.6923076923076923, "eval_loss": 1.1545660495758057, "eval_runtime": 0.9225, "eval_samples_per_second": 56.37, "eval_steps_per_second": 2.168, "step": 230 }, { "epoch": 23.930232558139537, "grad_norm": 26.727859497070312, "learning_rate": 2.380952380952381e-05, "loss": 0.4212, "step": 240 }, { "epoch": 23.930232558139537, "eval_accuracy": 0.7307692307692307, "eval_loss": 1.1084016561508179, "eval_runtime": 0.9361, "eval_samples_per_second": 55.552, "eval_steps_per_second": 2.137, "step": 240 }, { "epoch": 24.930232558139537, "eval_accuracy": 0.6730769230769231, "eval_loss": 1.2952685356140137, "eval_runtime": 1.0838, "eval_samples_per_second": 47.981, "eval_steps_per_second": 1.845, "step": 250 }, { "epoch": 25.186046511627907, "grad_norm": 16.363554000854492, "learning_rate": 2.2222222222222223e-05, "loss": 0.4109, "step": 252 }, { "epoch": 25.930232558139537, "eval_accuracy": 0.7307692307692307, "eval_loss": 1.1867846250534058, "eval_runtime": 0.9096, "eval_samples_per_second": 57.167, "eval_steps_per_second": 2.199, "step": 260 }, { "epoch": 26.372093023255815, "grad_norm": 42.207515716552734, "learning_rate": 2.0634920634920636e-05, "loss": 0.4236, "step": 264 }, { "epoch": 26.930232558139537, "eval_accuracy": 0.6346153846153846, "eval_loss": 1.2559605836868286, "eval_runtime": 0.9101, "eval_samples_per_second": 57.135, "eval_steps_per_second": 2.197, "step": 270 }, { "epoch": 27.558139534883722, "grad_norm": 15.23690414428711, "learning_rate": 1.9047619047619046e-05, "loss": 0.3638, "step": 276 }, { "epoch": 27.930232558139537, "eval_accuracy": 0.7115384615384616, "eval_loss": 1.216145634651184, "eval_runtime": 1.2163, "eval_samples_per_second": 42.753, "eval_steps_per_second": 1.644, "step": 280 }, { "epoch": 28.74418604651163, "grad_norm": 33.352203369140625, "learning_rate": 1.746031746031746e-05, "loss": 0.3944, "step": 288 }, { "epoch": 28.930232558139537, "eval_accuracy": 0.7307692307692307, "eval_loss": 1.1581984758377075, "eval_runtime": 0.9294, "eval_samples_per_second": 55.948, "eval_steps_per_second": 2.152, "step": 290 }, { "epoch": 29.930232558139537, "grad_norm": 19.420169830322266, "learning_rate": 1.5873015873015872e-05, "loss": 0.3621, "step": 300 }, { "epoch": 29.930232558139537, "eval_accuracy": 0.6923076923076923, "eval_loss": 1.2993353605270386, "eval_runtime": 0.912, "eval_samples_per_second": 57.019, "eval_steps_per_second": 2.193, "step": 300 }, { "epoch": 30.930232558139537, "eval_accuracy": 0.7115384615384616, "eval_loss": 1.1401245594024658, "eval_runtime": 1.2431, "eval_samples_per_second": 41.83, "eval_steps_per_second": 1.609, "step": 310 }, { "epoch": 31.186046511627907, "grad_norm": 24.769433975219727, "learning_rate": 1.4285714285714285e-05, "loss": 0.3203, "step": 312 }, { "epoch": 31.930232558139537, "eval_accuracy": 0.7115384615384616, "eval_loss": 1.3228098154067993, "eval_runtime": 0.9358, "eval_samples_per_second": 55.568, "eval_steps_per_second": 2.137, "step": 320 }, { "epoch": 32.372093023255815, "grad_norm": 25.592016220092773, "learning_rate": 1.2698412698412699e-05, "loss": 0.3014, "step": 324 }, { "epoch": 32.93023255813954, "eval_accuracy": 0.6923076923076923, "eval_loss": 1.2812803983688354, "eval_runtime": 1.1414, "eval_samples_per_second": 45.557, "eval_steps_per_second": 1.752, "step": 330 }, { "epoch": 33.55813953488372, "grad_norm": 17.85179328918457, "learning_rate": 1.1111111111111112e-05, "loss": 0.3464, "step": 336 }, { "epoch": 33.93023255813954, "eval_accuracy": 0.6538461538461539, "eval_loss": 1.4768296480178833, "eval_runtime": 0.9065, "eval_samples_per_second": 57.364, "eval_steps_per_second": 2.206, "step": 340 }, { "epoch": 34.74418604651163, "grad_norm": 15.770562171936035, "learning_rate": 9.523809523809523e-06, "loss": 0.2891, "step": 348 }, { "epoch": 34.93023255813954, "eval_accuracy": 0.7307692307692307, "eval_loss": 1.2304089069366455, "eval_runtime": 0.9275, "eval_samples_per_second": 56.067, "eval_steps_per_second": 2.156, "step": 350 }, { "epoch": 35.93023255813954, "grad_norm": 22.76992416381836, "learning_rate": 7.936507936507936e-06, "loss": 0.3153, "step": 360 }, { "epoch": 35.93023255813954, "eval_accuracy": 0.6923076923076923, "eval_loss": 1.3096110820770264, "eval_runtime": 1.2083, "eval_samples_per_second": 43.037, "eval_steps_per_second": 1.655, "step": 360 }, { "epoch": 36.93023255813954, "eval_accuracy": 0.7115384615384616, "eval_loss": 1.3564749956130981, "eval_runtime": 0.9106, "eval_samples_per_second": 57.103, "eval_steps_per_second": 2.196, "step": 370 }, { "epoch": 37.18604651162791, "grad_norm": 32.75940704345703, "learning_rate": 6.349206349206349e-06, "loss": 0.2762, "step": 372 }, { "epoch": 37.93023255813954, "eval_accuracy": 0.6923076923076923, "eval_loss": 1.2931231260299683, "eval_runtime": 0.9165, "eval_samples_per_second": 56.736, "eval_steps_per_second": 2.182, "step": 380 }, { "epoch": 38.372093023255815, "grad_norm": 28.327919006347656, "learning_rate": 4.7619047619047615e-06, "loss": 0.3191, "step": 384 }, { "epoch": 38.93023255813954, "eval_accuracy": 0.7307692307692307, "eval_loss": 1.2440568208694458, "eval_runtime": 1.2601, "eval_samples_per_second": 41.266, "eval_steps_per_second": 1.587, "step": 390 }, { "epoch": 39.55813953488372, "grad_norm": 17.981231689453125, "learning_rate": 3.1746031746031746e-06, "loss": 0.3009, "step": 396 }, { "epoch": 39.93023255813954, "eval_accuracy": 0.7307692307692307, "eval_loss": 1.2109795808792114, "eval_runtime": 0.9344, "eval_samples_per_second": 55.649, "eval_steps_per_second": 2.14, "step": 400 }, { "epoch": 40.74418604651163, "grad_norm": 14.792089462280273, "learning_rate": 1.5873015873015873e-06, "loss": 0.2645, "step": 408 }, { "epoch": 40.93023255813954, "eval_accuracy": 0.7115384615384616, "eval_loss": 1.2433098554611206, "eval_runtime": 0.9244, "eval_samples_per_second": 56.254, "eval_steps_per_second": 2.164, "step": 410 }, { "epoch": 41.93023255813954, "grad_norm": 13.357348442077637, "learning_rate": 0.0, "loss": 0.2497, "step": 420 }, { "epoch": 41.93023255813954, "eval_accuracy": 0.6923076923076923, "eval_loss": 1.2460753917694092, "eval_runtime": 1.8198, "eval_samples_per_second": 28.575, "eval_steps_per_second": 1.099, "step": 420 }, { "epoch": 41.93023255813954, "step": 420, "total_flos": 1.8345690247389512e+18, "train_loss": 1.1112867690268018, "train_runtime": 1368.6695, "train_samples_per_second": 41.795, "train_steps_per_second": 0.307 } ], "logging_steps": 12, "max_steps": 420, "num_input_tokens_seen": 0, "num_train_epochs": 42, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.8345690247389512e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }