{ "best_metric": 0.6445224246502909, "best_model_checkpoint": "swin-tiny-patch4-window7-224-category-classification/checkpoint-868", "epoch": 9.94818652849741, "eval_steps": 500, "global_step": 960, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.10362694300518134, "grad_norm": 4.037997722625732, "learning_rate": 5.208333333333334e-06, "loss": 3.0145, "step": 10 }, { "epoch": 0.20725388601036268, "grad_norm": 3.5954205989837646, "learning_rate": 1.0416666666666668e-05, "loss": 2.9504, "step": 20 }, { "epoch": 0.31088082901554404, "grad_norm": 3.6831040382385254, "learning_rate": 1.5625e-05, "loss": 2.803, "step": 30 }, { "epoch": 0.41450777202072536, "grad_norm": 4.399529933929443, "learning_rate": 2.0833333333333336e-05, "loss": 2.5636, "step": 40 }, { "epoch": 0.5181347150259067, "grad_norm": 5.646263599395752, "learning_rate": 2.604166666666667e-05, "loss": 2.354, "step": 50 }, { "epoch": 0.6217616580310881, "grad_norm": 5.669042110443115, "learning_rate": 3.125e-05, "loss": 2.1137, "step": 60 }, { "epoch": 0.7253886010362695, "grad_norm": 6.388670444488525, "learning_rate": 3.6458333333333336e-05, "loss": 2.0586, "step": 70 }, { "epoch": 0.8290155440414507, "grad_norm": 4.985081672668457, "learning_rate": 4.166666666666667e-05, "loss": 1.9298, "step": 80 }, { "epoch": 0.9326424870466321, "grad_norm": 5.885165691375732, "learning_rate": 4.6875e-05, "loss": 1.89, "step": 90 }, { "epoch": 0.9948186528497409, "eval_accuracy": 0.5441042157381147, "eval_loss": 1.507540225982666, "eval_runtime": 4294.0719, "eval_samples_per_second": 4.845, "eval_steps_per_second": 0.076, "step": 96 }, { "epoch": 1.0362694300518134, "grad_norm": 6.3568572998046875, "learning_rate": 4.976851851851852e-05, "loss": 1.7875, "step": 100 }, { "epoch": 1.1398963730569949, "grad_norm": 5.888931751251221, "learning_rate": 4.9189814814814815e-05, "loss": 1.8005, "step": 110 }, { "epoch": 1.2435233160621761, "grad_norm": 5.636714458465576, "learning_rate": 4.8611111111111115e-05, "loss": 1.7211, "step": 120 }, { "epoch": 1.3471502590673574, "grad_norm": 8.701831817626953, "learning_rate": 4.803240740740741e-05, "loss": 1.7164, "step": 130 }, { "epoch": 1.450777202072539, "grad_norm": 6.59879207611084, "learning_rate": 4.745370370370371e-05, "loss": 1.6864, "step": 140 }, { "epoch": 1.5544041450777202, "grad_norm": 5.239670753479004, "learning_rate": 4.6875e-05, "loss": 1.6295, "step": 150 }, { "epoch": 1.6580310880829017, "grad_norm": 8.442557334899902, "learning_rate": 4.62962962962963e-05, "loss": 1.5609, "step": 160 }, { "epoch": 1.761658031088083, "grad_norm": 4.829947471618652, "learning_rate": 4.5717592592592594e-05, "loss": 1.6158, "step": 170 }, { "epoch": 1.8652849740932642, "grad_norm": 6.158498764038086, "learning_rate": 4.5138888888888894e-05, "loss": 1.6215, "step": 180 }, { "epoch": 1.9689119170984455, "grad_norm": 6.387746810913086, "learning_rate": 4.456018518518519e-05, "loss": 1.6059, "step": 190 }, { "epoch": 2.0, "eval_accuracy": 0.5983271643512955, "eval_loss": 1.3102949857711792, "eval_runtime": 3836.7945, "eval_samples_per_second": 5.422, "eval_steps_per_second": 0.085, "step": 193 }, { "epoch": 2.0725388601036268, "grad_norm": 6.294243812561035, "learning_rate": 4.3981481481481486e-05, "loss": 1.5034, "step": 200 }, { "epoch": 2.1761658031088085, "grad_norm": 5.085219860076904, "learning_rate": 4.340277777777778e-05, "loss": 1.5468, "step": 210 }, { "epoch": 2.2797927461139897, "grad_norm": 5.5421624183654785, "learning_rate": 4.282407407407408e-05, "loss": 1.5022, "step": 220 }, { "epoch": 2.383419689119171, "grad_norm": 8.139309883117676, "learning_rate": 4.224537037037037e-05, "loss": 1.5609, "step": 230 }, { "epoch": 2.4870466321243523, "grad_norm": 6.149181842803955, "learning_rate": 4.166666666666667e-05, "loss": 1.5462, "step": 240 }, { "epoch": 2.5906735751295336, "grad_norm": 4.927513122558594, "learning_rate": 4.1087962962962965e-05, "loss": 1.4448, "step": 250 }, { "epoch": 2.694300518134715, "grad_norm": 5.6315999031066895, "learning_rate": 4.0509259259259265e-05, "loss": 1.5033, "step": 260 }, { "epoch": 2.7979274611398965, "grad_norm": 5.844498634338379, "learning_rate": 3.993055555555556e-05, "loss": 1.4713, "step": 270 }, { "epoch": 2.901554404145078, "grad_norm": 5.9281182289123535, "learning_rate": 3.935185185185186e-05, "loss": 1.4844, "step": 280 }, { "epoch": 2.994818652849741, "eval_accuracy": 0.6106811517569581, "eval_loss": 1.2595337629318237, "eval_runtime": 3906.6265, "eval_samples_per_second": 5.325, "eval_steps_per_second": 0.083, "step": 289 }, { "epoch": 3.005181347150259, "grad_norm": 5.67548942565918, "learning_rate": 3.877314814814815e-05, "loss": 1.453, "step": 290 }, { "epoch": 3.1088082901554404, "grad_norm": 5.950716495513916, "learning_rate": 3.8194444444444444e-05, "loss": 1.4132, "step": 300 }, { "epoch": 3.2124352331606216, "grad_norm": 6.033442497253418, "learning_rate": 3.7615740740740744e-05, "loss": 1.411, "step": 310 }, { "epoch": 3.3160621761658033, "grad_norm": 6.805812358856201, "learning_rate": 3.7037037037037037e-05, "loss": 1.4046, "step": 320 }, { "epoch": 3.4196891191709846, "grad_norm": 6.180249214172363, "learning_rate": 3.6458333333333336e-05, "loss": 1.4112, "step": 330 }, { "epoch": 3.523316062176166, "grad_norm": 5.689445495605469, "learning_rate": 3.587962962962963e-05, "loss": 1.4126, "step": 340 }, { "epoch": 3.626943005181347, "grad_norm": 7.714968204498291, "learning_rate": 3.530092592592593e-05, "loss": 1.4563, "step": 350 }, { "epoch": 3.7305699481865284, "grad_norm": 7.22826623916626, "learning_rate": 3.472222222222222e-05, "loss": 1.3952, "step": 360 }, { "epoch": 3.8341968911917097, "grad_norm": 6.826329231262207, "learning_rate": 3.414351851851852e-05, "loss": 1.4198, "step": 370 }, { "epoch": 3.937823834196891, "grad_norm": 6.155144691467285, "learning_rate": 3.3564814814814815e-05, "loss": 1.4392, "step": 380 }, { "epoch": 4.0, "eval_accuracy": 0.6151516608181512, "eval_loss": 1.241403341293335, "eval_runtime": 3776.8643, "eval_samples_per_second": 5.508, "eval_steps_per_second": 0.086, "step": 386 }, { "epoch": 4.041450777202073, "grad_norm": 5.685715675354004, "learning_rate": 3.2986111111111115e-05, "loss": 1.3702, "step": 390 }, { "epoch": 4.1450777202072535, "grad_norm": 6.5382585525512695, "learning_rate": 3.240740740740741e-05, "loss": 1.3525, "step": 400 }, { "epoch": 4.248704663212435, "grad_norm": 5.420712947845459, "learning_rate": 3.182870370370371e-05, "loss": 1.3657, "step": 410 }, { "epoch": 4.352331606217617, "grad_norm": 5.577756881713867, "learning_rate": 3.125e-05, "loss": 1.3743, "step": 420 }, { "epoch": 4.455958549222798, "grad_norm": 6.49379301071167, "learning_rate": 3.06712962962963e-05, "loss": 1.358, "step": 430 }, { "epoch": 4.5595854922279795, "grad_norm": 5.322962284088135, "learning_rate": 3.0092592592592593e-05, "loss": 1.3334, "step": 440 }, { "epoch": 4.66321243523316, "grad_norm": 5.416570663452148, "learning_rate": 2.951388888888889e-05, "loss": 1.3498, "step": 450 }, { "epoch": 4.766839378238342, "grad_norm": 5.773409843444824, "learning_rate": 2.8935185185185186e-05, "loss": 1.3413, "step": 460 }, { "epoch": 4.870466321243523, "grad_norm": 5.400073528289795, "learning_rate": 2.8356481481481483e-05, "loss": 1.2978, "step": 470 }, { "epoch": 4.974093264248705, "grad_norm": 6.004559516906738, "learning_rate": 2.777777777777778e-05, "loss": 1.3431, "step": 480 }, { "epoch": 4.994818652849741, "eval_accuracy": 0.6285151180118252, "eval_loss": 1.195352554321289, "eval_runtime": 3677.8276, "eval_samples_per_second": 5.656, "eval_steps_per_second": 0.089, "step": 482 }, { "epoch": 5.077720207253886, "grad_norm": 6.405722141265869, "learning_rate": 2.7199074074074076e-05, "loss": 1.3108, "step": 490 }, { "epoch": 5.181347150259067, "grad_norm": 5.789535045623779, "learning_rate": 2.6620370370370372e-05, "loss": 1.2901, "step": 500 }, { "epoch": 5.284974093264249, "grad_norm": 6.413881301879883, "learning_rate": 2.604166666666667e-05, "loss": 1.2965, "step": 510 }, { "epoch": 5.38860103626943, "grad_norm": 5.42963171005249, "learning_rate": 2.5462962962962965e-05, "loss": 1.2921, "step": 520 }, { "epoch": 5.492227979274611, "grad_norm": 5.485264778137207, "learning_rate": 2.488425925925926e-05, "loss": 1.2903, "step": 530 }, { "epoch": 5.595854922279793, "grad_norm": 5.791281223297119, "learning_rate": 2.4305555555555558e-05, "loss": 1.2753, "step": 540 }, { "epoch": 5.699481865284974, "grad_norm": 5.666800498962402, "learning_rate": 2.3726851851851854e-05, "loss": 1.2826, "step": 550 }, { "epoch": 5.803108808290156, "grad_norm": 5.542099475860596, "learning_rate": 2.314814814814815e-05, "loss": 1.3549, "step": 560 }, { "epoch": 5.9067357512953365, "grad_norm": 6.228384017944336, "learning_rate": 2.2569444444444447e-05, "loss": 1.2897, "step": 570 }, { "epoch": 6.0, "eval_accuracy": 0.6383694659424122, "eval_loss": 1.1611359119415283, "eval_runtime": 3648.3627, "eval_samples_per_second": 5.702, "eval_steps_per_second": 0.089, "step": 579 }, { "epoch": 6.010362694300518, "grad_norm": 6.60087776184082, "learning_rate": 2.1990740740740743e-05, "loss": 1.3194, "step": 580 }, { "epoch": 6.1139896373057, "grad_norm": 5.230093479156494, "learning_rate": 2.141203703703704e-05, "loss": 1.2622, "step": 590 }, { "epoch": 6.217616580310881, "grad_norm": 6.67712926864624, "learning_rate": 2.0833333333333336e-05, "loss": 1.2835, "step": 600 }, { "epoch": 6.321243523316062, "grad_norm": 6.0210065841674805, "learning_rate": 2.0254629629629632e-05, "loss": 1.2516, "step": 610 }, { "epoch": 6.424870466321243, "grad_norm": 5.083096981048584, "learning_rate": 1.967592592592593e-05, "loss": 1.2069, "step": 620 }, { "epoch": 6.528497409326425, "grad_norm": 6.067955493927002, "learning_rate": 1.9097222222222222e-05, "loss": 1.2808, "step": 630 }, { "epoch": 6.632124352331607, "grad_norm": 8.958367347717285, "learning_rate": 1.8518518518518518e-05, "loss": 1.2798, "step": 640 }, { "epoch": 6.7357512953367875, "grad_norm": 5.711329460144043, "learning_rate": 1.7939814814814815e-05, "loss": 1.2249, "step": 650 }, { "epoch": 6.839378238341969, "grad_norm": 5.1562957763671875, "learning_rate": 1.736111111111111e-05, "loss": 1.2413, "step": 660 }, { "epoch": 6.94300518134715, "grad_norm": 6.775362968444824, "learning_rate": 1.6782407407407408e-05, "loss": 1.2222, "step": 670 }, { "epoch": 6.994818652849741, "eval_accuracy": 0.6417343652357833, "eval_loss": 1.1575372219085693, "eval_runtime": 3709.0324, "eval_samples_per_second": 5.609, "eval_steps_per_second": 0.088, "step": 675 }, { "epoch": 7.046632124352332, "grad_norm": 6.662919998168945, "learning_rate": 1.6203703703703704e-05, "loss": 1.2678, "step": 680 }, { "epoch": 7.150259067357513, "grad_norm": 5.15730619430542, "learning_rate": 1.5625e-05, "loss": 1.2303, "step": 690 }, { "epoch": 7.253886010362694, "grad_norm": 5.7805867195129395, "learning_rate": 1.5046296296296297e-05, "loss": 1.2413, "step": 700 }, { "epoch": 7.357512953367876, "grad_norm": 7.833797454833984, "learning_rate": 1.4467592592592593e-05, "loss": 1.2475, "step": 710 }, { "epoch": 7.461139896373057, "grad_norm": 5.12682580947876, "learning_rate": 1.388888888888889e-05, "loss": 1.2079, "step": 720 }, { "epoch": 7.564766839378239, "grad_norm": 6.603540897369385, "learning_rate": 1.3310185185185186e-05, "loss": 1.2547, "step": 730 }, { "epoch": 7.668393782383419, "grad_norm": 6.611011981964111, "learning_rate": 1.2731481481481482e-05, "loss": 1.2409, "step": 740 }, { "epoch": 7.772020725388601, "grad_norm": 5.739754676818848, "learning_rate": 1.2152777777777779e-05, "loss": 1.228, "step": 750 }, { "epoch": 7.875647668393782, "grad_norm": 5.423896312713623, "learning_rate": 1.1574074074074075e-05, "loss": 1.1859, "step": 760 }, { "epoch": 7.979274611398964, "grad_norm": 5.076350212097168, "learning_rate": 1.0995370370370372e-05, "loss": 1.212, "step": 770 }, { "epoch": 8.0, "eval_accuracy": 0.6421189251550257, "eval_loss": 1.1474467515945435, "eval_runtime": 3776.6533, "eval_samples_per_second": 5.508, "eval_steps_per_second": 0.086, "step": 772 }, { "epoch": 8.082901554404145, "grad_norm": 5.369143486022949, "learning_rate": 1.0416666666666668e-05, "loss": 1.1907, "step": 780 }, { "epoch": 8.186528497409327, "grad_norm": 6.046126365661621, "learning_rate": 9.837962962962964e-06, "loss": 1.1993, "step": 790 }, { "epoch": 8.290155440414507, "grad_norm": 6.894242286682129, "learning_rate": 9.259259259259259e-06, "loss": 1.2238, "step": 800 }, { "epoch": 8.393782383419689, "grad_norm": 6.160308361053467, "learning_rate": 8.680555555555556e-06, "loss": 1.2258, "step": 810 }, { "epoch": 8.49740932642487, "grad_norm": 5.83083963394165, "learning_rate": 8.101851851851852e-06, "loss": 1.1913, "step": 820 }, { "epoch": 8.601036269430052, "grad_norm": 6.659111499786377, "learning_rate": 7.523148148148148e-06, "loss": 1.1878, "step": 830 }, { "epoch": 8.704663212435234, "grad_norm": 7.118185043334961, "learning_rate": 6.944444444444445e-06, "loss": 1.1841, "step": 840 }, { "epoch": 8.808290155440414, "grad_norm": 5.696249008178711, "learning_rate": 6.365740740740741e-06, "loss": 1.1934, "step": 850 }, { "epoch": 8.911917098445596, "grad_norm": 5.1954545974731445, "learning_rate": 5.787037037037038e-06, "loss": 1.2087, "step": 860 }, { "epoch": 8.994818652849741, "eval_accuracy": 0.6445224246502909, "eval_loss": 1.1410062313079834, "eval_runtime": 3882.2074, "eval_samples_per_second": 5.359, "eval_steps_per_second": 0.084, "step": 868 }, { "epoch": 9.015544041450777, "grad_norm": 6.596940994262695, "learning_rate": 5.208333333333334e-06, "loss": 1.2113, "step": 870 }, { "epoch": 9.119170984455959, "grad_norm": 6.038557052612305, "learning_rate": 4.6296296296296296e-06, "loss": 1.1918, "step": 880 }, { "epoch": 9.22279792746114, "grad_norm": 5.016276836395264, "learning_rate": 4.050925925925926e-06, "loss": 1.2371, "step": 890 }, { "epoch": 9.32642487046632, "grad_norm": 5.792815208435059, "learning_rate": 3.4722222222222224e-06, "loss": 1.231, "step": 900 }, { "epoch": 9.430051813471502, "grad_norm": 5.179474353790283, "learning_rate": 2.893518518518519e-06, "loss": 1.19, "step": 910 }, { "epoch": 9.533678756476684, "grad_norm": 6.146459579467773, "learning_rate": 2.3148148148148148e-06, "loss": 1.1693, "step": 920 }, { "epoch": 9.637305699481866, "grad_norm": 5.89201021194458, "learning_rate": 1.7361111111111112e-06, "loss": 1.1679, "step": 930 }, { "epoch": 9.740932642487046, "grad_norm": 6.085833549499512, "learning_rate": 1.1574074074074074e-06, "loss": 1.1812, "step": 940 }, { "epoch": 9.844559585492227, "grad_norm": 5.505359172821045, "learning_rate": 5.787037037037037e-07, "loss": 1.1552, "step": 950 }, { "epoch": 9.94818652849741, "grad_norm": 6.020310878753662, "learning_rate": 0.0, "loss": 1.1897, "step": 960 }, { "epoch": 9.94818652849741, "eval_accuracy": 0.6431764649329423, "eval_loss": 1.1433619260787964, "eval_runtime": 3906.8512, "eval_samples_per_second": 5.325, "eval_steps_per_second": 0.083, "step": 960 }, { "epoch": 9.94818652849741, "step": 960, "total_flos": 6.10407050089078e+18, "train_loss": 1.447805991768837, "train_runtime": 79883.0648, "train_samples_per_second": 3.088, "train_steps_per_second": 0.012 } ], "logging_steps": 10, "max_steps": 960, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.10407050089078e+18, "train_batch_size": 64, "trial_name": null, "trial_params": null }