djbp's picture
End of training
964caee verified
{
"best_metric": 0.6445224246502909,
"best_model_checkpoint": "swin-tiny-patch4-window7-224-category-classification/checkpoint-868",
"epoch": 9.94818652849741,
"eval_steps": 500,
"global_step": 960,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.10362694300518134,
"grad_norm": 4.037997722625732,
"learning_rate": 5.208333333333334e-06,
"loss": 3.0145,
"step": 10
},
{
"epoch": 0.20725388601036268,
"grad_norm": 3.5954205989837646,
"learning_rate": 1.0416666666666668e-05,
"loss": 2.9504,
"step": 20
},
{
"epoch": 0.31088082901554404,
"grad_norm": 3.6831040382385254,
"learning_rate": 1.5625e-05,
"loss": 2.803,
"step": 30
},
{
"epoch": 0.41450777202072536,
"grad_norm": 4.399529933929443,
"learning_rate": 2.0833333333333336e-05,
"loss": 2.5636,
"step": 40
},
{
"epoch": 0.5181347150259067,
"grad_norm": 5.646263599395752,
"learning_rate": 2.604166666666667e-05,
"loss": 2.354,
"step": 50
},
{
"epoch": 0.6217616580310881,
"grad_norm": 5.669042110443115,
"learning_rate": 3.125e-05,
"loss": 2.1137,
"step": 60
},
{
"epoch": 0.7253886010362695,
"grad_norm": 6.388670444488525,
"learning_rate": 3.6458333333333336e-05,
"loss": 2.0586,
"step": 70
},
{
"epoch": 0.8290155440414507,
"grad_norm": 4.985081672668457,
"learning_rate": 4.166666666666667e-05,
"loss": 1.9298,
"step": 80
},
{
"epoch": 0.9326424870466321,
"grad_norm": 5.885165691375732,
"learning_rate": 4.6875e-05,
"loss": 1.89,
"step": 90
},
{
"epoch": 0.9948186528497409,
"eval_accuracy": 0.5441042157381147,
"eval_loss": 1.507540225982666,
"eval_runtime": 4294.0719,
"eval_samples_per_second": 4.845,
"eval_steps_per_second": 0.076,
"step": 96
},
{
"epoch": 1.0362694300518134,
"grad_norm": 6.3568572998046875,
"learning_rate": 4.976851851851852e-05,
"loss": 1.7875,
"step": 100
},
{
"epoch": 1.1398963730569949,
"grad_norm": 5.888931751251221,
"learning_rate": 4.9189814814814815e-05,
"loss": 1.8005,
"step": 110
},
{
"epoch": 1.2435233160621761,
"grad_norm": 5.636714458465576,
"learning_rate": 4.8611111111111115e-05,
"loss": 1.7211,
"step": 120
},
{
"epoch": 1.3471502590673574,
"grad_norm": 8.701831817626953,
"learning_rate": 4.803240740740741e-05,
"loss": 1.7164,
"step": 130
},
{
"epoch": 1.450777202072539,
"grad_norm": 6.59879207611084,
"learning_rate": 4.745370370370371e-05,
"loss": 1.6864,
"step": 140
},
{
"epoch": 1.5544041450777202,
"grad_norm": 5.239670753479004,
"learning_rate": 4.6875e-05,
"loss": 1.6295,
"step": 150
},
{
"epoch": 1.6580310880829017,
"grad_norm": 8.442557334899902,
"learning_rate": 4.62962962962963e-05,
"loss": 1.5609,
"step": 160
},
{
"epoch": 1.761658031088083,
"grad_norm": 4.829947471618652,
"learning_rate": 4.5717592592592594e-05,
"loss": 1.6158,
"step": 170
},
{
"epoch": 1.8652849740932642,
"grad_norm": 6.158498764038086,
"learning_rate": 4.5138888888888894e-05,
"loss": 1.6215,
"step": 180
},
{
"epoch": 1.9689119170984455,
"grad_norm": 6.387746810913086,
"learning_rate": 4.456018518518519e-05,
"loss": 1.6059,
"step": 190
},
{
"epoch": 2.0,
"eval_accuracy": 0.5983271643512955,
"eval_loss": 1.3102949857711792,
"eval_runtime": 3836.7945,
"eval_samples_per_second": 5.422,
"eval_steps_per_second": 0.085,
"step": 193
},
{
"epoch": 2.0725388601036268,
"grad_norm": 6.294243812561035,
"learning_rate": 4.3981481481481486e-05,
"loss": 1.5034,
"step": 200
},
{
"epoch": 2.1761658031088085,
"grad_norm": 5.085219860076904,
"learning_rate": 4.340277777777778e-05,
"loss": 1.5468,
"step": 210
},
{
"epoch": 2.2797927461139897,
"grad_norm": 5.5421624183654785,
"learning_rate": 4.282407407407408e-05,
"loss": 1.5022,
"step": 220
},
{
"epoch": 2.383419689119171,
"grad_norm": 8.139309883117676,
"learning_rate": 4.224537037037037e-05,
"loss": 1.5609,
"step": 230
},
{
"epoch": 2.4870466321243523,
"grad_norm": 6.149181842803955,
"learning_rate": 4.166666666666667e-05,
"loss": 1.5462,
"step": 240
},
{
"epoch": 2.5906735751295336,
"grad_norm": 4.927513122558594,
"learning_rate": 4.1087962962962965e-05,
"loss": 1.4448,
"step": 250
},
{
"epoch": 2.694300518134715,
"grad_norm": 5.6315999031066895,
"learning_rate": 4.0509259259259265e-05,
"loss": 1.5033,
"step": 260
},
{
"epoch": 2.7979274611398965,
"grad_norm": 5.844498634338379,
"learning_rate": 3.993055555555556e-05,
"loss": 1.4713,
"step": 270
},
{
"epoch": 2.901554404145078,
"grad_norm": 5.9281182289123535,
"learning_rate": 3.935185185185186e-05,
"loss": 1.4844,
"step": 280
},
{
"epoch": 2.994818652849741,
"eval_accuracy": 0.6106811517569581,
"eval_loss": 1.2595337629318237,
"eval_runtime": 3906.6265,
"eval_samples_per_second": 5.325,
"eval_steps_per_second": 0.083,
"step": 289
},
{
"epoch": 3.005181347150259,
"grad_norm": 5.67548942565918,
"learning_rate": 3.877314814814815e-05,
"loss": 1.453,
"step": 290
},
{
"epoch": 3.1088082901554404,
"grad_norm": 5.950716495513916,
"learning_rate": 3.8194444444444444e-05,
"loss": 1.4132,
"step": 300
},
{
"epoch": 3.2124352331606216,
"grad_norm": 6.033442497253418,
"learning_rate": 3.7615740740740744e-05,
"loss": 1.411,
"step": 310
},
{
"epoch": 3.3160621761658033,
"grad_norm": 6.805812358856201,
"learning_rate": 3.7037037037037037e-05,
"loss": 1.4046,
"step": 320
},
{
"epoch": 3.4196891191709846,
"grad_norm": 6.180249214172363,
"learning_rate": 3.6458333333333336e-05,
"loss": 1.4112,
"step": 330
},
{
"epoch": 3.523316062176166,
"grad_norm": 5.689445495605469,
"learning_rate": 3.587962962962963e-05,
"loss": 1.4126,
"step": 340
},
{
"epoch": 3.626943005181347,
"grad_norm": 7.714968204498291,
"learning_rate": 3.530092592592593e-05,
"loss": 1.4563,
"step": 350
},
{
"epoch": 3.7305699481865284,
"grad_norm": 7.22826623916626,
"learning_rate": 3.472222222222222e-05,
"loss": 1.3952,
"step": 360
},
{
"epoch": 3.8341968911917097,
"grad_norm": 6.826329231262207,
"learning_rate": 3.414351851851852e-05,
"loss": 1.4198,
"step": 370
},
{
"epoch": 3.937823834196891,
"grad_norm": 6.155144691467285,
"learning_rate": 3.3564814814814815e-05,
"loss": 1.4392,
"step": 380
},
{
"epoch": 4.0,
"eval_accuracy": 0.6151516608181512,
"eval_loss": 1.241403341293335,
"eval_runtime": 3776.8643,
"eval_samples_per_second": 5.508,
"eval_steps_per_second": 0.086,
"step": 386
},
{
"epoch": 4.041450777202073,
"grad_norm": 5.685715675354004,
"learning_rate": 3.2986111111111115e-05,
"loss": 1.3702,
"step": 390
},
{
"epoch": 4.1450777202072535,
"grad_norm": 6.5382585525512695,
"learning_rate": 3.240740740740741e-05,
"loss": 1.3525,
"step": 400
},
{
"epoch": 4.248704663212435,
"grad_norm": 5.420712947845459,
"learning_rate": 3.182870370370371e-05,
"loss": 1.3657,
"step": 410
},
{
"epoch": 4.352331606217617,
"grad_norm": 5.577756881713867,
"learning_rate": 3.125e-05,
"loss": 1.3743,
"step": 420
},
{
"epoch": 4.455958549222798,
"grad_norm": 6.49379301071167,
"learning_rate": 3.06712962962963e-05,
"loss": 1.358,
"step": 430
},
{
"epoch": 4.5595854922279795,
"grad_norm": 5.322962284088135,
"learning_rate": 3.0092592592592593e-05,
"loss": 1.3334,
"step": 440
},
{
"epoch": 4.66321243523316,
"grad_norm": 5.416570663452148,
"learning_rate": 2.951388888888889e-05,
"loss": 1.3498,
"step": 450
},
{
"epoch": 4.766839378238342,
"grad_norm": 5.773409843444824,
"learning_rate": 2.8935185185185186e-05,
"loss": 1.3413,
"step": 460
},
{
"epoch": 4.870466321243523,
"grad_norm": 5.400073528289795,
"learning_rate": 2.8356481481481483e-05,
"loss": 1.2978,
"step": 470
},
{
"epoch": 4.974093264248705,
"grad_norm": 6.004559516906738,
"learning_rate": 2.777777777777778e-05,
"loss": 1.3431,
"step": 480
},
{
"epoch": 4.994818652849741,
"eval_accuracy": 0.6285151180118252,
"eval_loss": 1.195352554321289,
"eval_runtime": 3677.8276,
"eval_samples_per_second": 5.656,
"eval_steps_per_second": 0.089,
"step": 482
},
{
"epoch": 5.077720207253886,
"grad_norm": 6.405722141265869,
"learning_rate": 2.7199074074074076e-05,
"loss": 1.3108,
"step": 490
},
{
"epoch": 5.181347150259067,
"grad_norm": 5.789535045623779,
"learning_rate": 2.6620370370370372e-05,
"loss": 1.2901,
"step": 500
},
{
"epoch": 5.284974093264249,
"grad_norm": 6.413881301879883,
"learning_rate": 2.604166666666667e-05,
"loss": 1.2965,
"step": 510
},
{
"epoch": 5.38860103626943,
"grad_norm": 5.42963171005249,
"learning_rate": 2.5462962962962965e-05,
"loss": 1.2921,
"step": 520
},
{
"epoch": 5.492227979274611,
"grad_norm": 5.485264778137207,
"learning_rate": 2.488425925925926e-05,
"loss": 1.2903,
"step": 530
},
{
"epoch": 5.595854922279793,
"grad_norm": 5.791281223297119,
"learning_rate": 2.4305555555555558e-05,
"loss": 1.2753,
"step": 540
},
{
"epoch": 5.699481865284974,
"grad_norm": 5.666800498962402,
"learning_rate": 2.3726851851851854e-05,
"loss": 1.2826,
"step": 550
},
{
"epoch": 5.803108808290156,
"grad_norm": 5.542099475860596,
"learning_rate": 2.314814814814815e-05,
"loss": 1.3549,
"step": 560
},
{
"epoch": 5.9067357512953365,
"grad_norm": 6.228384017944336,
"learning_rate": 2.2569444444444447e-05,
"loss": 1.2897,
"step": 570
},
{
"epoch": 6.0,
"eval_accuracy": 0.6383694659424122,
"eval_loss": 1.1611359119415283,
"eval_runtime": 3648.3627,
"eval_samples_per_second": 5.702,
"eval_steps_per_second": 0.089,
"step": 579
},
{
"epoch": 6.010362694300518,
"grad_norm": 6.60087776184082,
"learning_rate": 2.1990740740740743e-05,
"loss": 1.3194,
"step": 580
},
{
"epoch": 6.1139896373057,
"grad_norm": 5.230093479156494,
"learning_rate": 2.141203703703704e-05,
"loss": 1.2622,
"step": 590
},
{
"epoch": 6.217616580310881,
"grad_norm": 6.67712926864624,
"learning_rate": 2.0833333333333336e-05,
"loss": 1.2835,
"step": 600
},
{
"epoch": 6.321243523316062,
"grad_norm": 6.0210065841674805,
"learning_rate": 2.0254629629629632e-05,
"loss": 1.2516,
"step": 610
},
{
"epoch": 6.424870466321243,
"grad_norm": 5.083096981048584,
"learning_rate": 1.967592592592593e-05,
"loss": 1.2069,
"step": 620
},
{
"epoch": 6.528497409326425,
"grad_norm": 6.067955493927002,
"learning_rate": 1.9097222222222222e-05,
"loss": 1.2808,
"step": 630
},
{
"epoch": 6.632124352331607,
"grad_norm": 8.958367347717285,
"learning_rate": 1.8518518518518518e-05,
"loss": 1.2798,
"step": 640
},
{
"epoch": 6.7357512953367875,
"grad_norm": 5.711329460144043,
"learning_rate": 1.7939814814814815e-05,
"loss": 1.2249,
"step": 650
},
{
"epoch": 6.839378238341969,
"grad_norm": 5.1562957763671875,
"learning_rate": 1.736111111111111e-05,
"loss": 1.2413,
"step": 660
},
{
"epoch": 6.94300518134715,
"grad_norm": 6.775362968444824,
"learning_rate": 1.6782407407407408e-05,
"loss": 1.2222,
"step": 670
},
{
"epoch": 6.994818652849741,
"eval_accuracy": 0.6417343652357833,
"eval_loss": 1.1575372219085693,
"eval_runtime": 3709.0324,
"eval_samples_per_second": 5.609,
"eval_steps_per_second": 0.088,
"step": 675
},
{
"epoch": 7.046632124352332,
"grad_norm": 6.662919998168945,
"learning_rate": 1.6203703703703704e-05,
"loss": 1.2678,
"step": 680
},
{
"epoch": 7.150259067357513,
"grad_norm": 5.15730619430542,
"learning_rate": 1.5625e-05,
"loss": 1.2303,
"step": 690
},
{
"epoch": 7.253886010362694,
"grad_norm": 5.7805867195129395,
"learning_rate": 1.5046296296296297e-05,
"loss": 1.2413,
"step": 700
},
{
"epoch": 7.357512953367876,
"grad_norm": 7.833797454833984,
"learning_rate": 1.4467592592592593e-05,
"loss": 1.2475,
"step": 710
},
{
"epoch": 7.461139896373057,
"grad_norm": 5.12682580947876,
"learning_rate": 1.388888888888889e-05,
"loss": 1.2079,
"step": 720
},
{
"epoch": 7.564766839378239,
"grad_norm": 6.603540897369385,
"learning_rate": 1.3310185185185186e-05,
"loss": 1.2547,
"step": 730
},
{
"epoch": 7.668393782383419,
"grad_norm": 6.611011981964111,
"learning_rate": 1.2731481481481482e-05,
"loss": 1.2409,
"step": 740
},
{
"epoch": 7.772020725388601,
"grad_norm": 5.739754676818848,
"learning_rate": 1.2152777777777779e-05,
"loss": 1.228,
"step": 750
},
{
"epoch": 7.875647668393782,
"grad_norm": 5.423896312713623,
"learning_rate": 1.1574074074074075e-05,
"loss": 1.1859,
"step": 760
},
{
"epoch": 7.979274611398964,
"grad_norm": 5.076350212097168,
"learning_rate": 1.0995370370370372e-05,
"loss": 1.212,
"step": 770
},
{
"epoch": 8.0,
"eval_accuracy": 0.6421189251550257,
"eval_loss": 1.1474467515945435,
"eval_runtime": 3776.6533,
"eval_samples_per_second": 5.508,
"eval_steps_per_second": 0.086,
"step": 772
},
{
"epoch": 8.082901554404145,
"grad_norm": 5.369143486022949,
"learning_rate": 1.0416666666666668e-05,
"loss": 1.1907,
"step": 780
},
{
"epoch": 8.186528497409327,
"grad_norm": 6.046126365661621,
"learning_rate": 9.837962962962964e-06,
"loss": 1.1993,
"step": 790
},
{
"epoch": 8.290155440414507,
"grad_norm": 6.894242286682129,
"learning_rate": 9.259259259259259e-06,
"loss": 1.2238,
"step": 800
},
{
"epoch": 8.393782383419689,
"grad_norm": 6.160308361053467,
"learning_rate": 8.680555555555556e-06,
"loss": 1.2258,
"step": 810
},
{
"epoch": 8.49740932642487,
"grad_norm": 5.83083963394165,
"learning_rate": 8.101851851851852e-06,
"loss": 1.1913,
"step": 820
},
{
"epoch": 8.601036269430052,
"grad_norm": 6.659111499786377,
"learning_rate": 7.523148148148148e-06,
"loss": 1.1878,
"step": 830
},
{
"epoch": 8.704663212435234,
"grad_norm": 7.118185043334961,
"learning_rate": 6.944444444444445e-06,
"loss": 1.1841,
"step": 840
},
{
"epoch": 8.808290155440414,
"grad_norm": 5.696249008178711,
"learning_rate": 6.365740740740741e-06,
"loss": 1.1934,
"step": 850
},
{
"epoch": 8.911917098445596,
"grad_norm": 5.1954545974731445,
"learning_rate": 5.787037037037038e-06,
"loss": 1.2087,
"step": 860
},
{
"epoch": 8.994818652849741,
"eval_accuracy": 0.6445224246502909,
"eval_loss": 1.1410062313079834,
"eval_runtime": 3882.2074,
"eval_samples_per_second": 5.359,
"eval_steps_per_second": 0.084,
"step": 868
},
{
"epoch": 9.015544041450777,
"grad_norm": 6.596940994262695,
"learning_rate": 5.208333333333334e-06,
"loss": 1.2113,
"step": 870
},
{
"epoch": 9.119170984455959,
"grad_norm": 6.038557052612305,
"learning_rate": 4.6296296296296296e-06,
"loss": 1.1918,
"step": 880
},
{
"epoch": 9.22279792746114,
"grad_norm": 5.016276836395264,
"learning_rate": 4.050925925925926e-06,
"loss": 1.2371,
"step": 890
},
{
"epoch": 9.32642487046632,
"grad_norm": 5.792815208435059,
"learning_rate": 3.4722222222222224e-06,
"loss": 1.231,
"step": 900
},
{
"epoch": 9.430051813471502,
"grad_norm": 5.179474353790283,
"learning_rate": 2.893518518518519e-06,
"loss": 1.19,
"step": 910
},
{
"epoch": 9.533678756476684,
"grad_norm": 6.146459579467773,
"learning_rate": 2.3148148148148148e-06,
"loss": 1.1693,
"step": 920
},
{
"epoch": 9.637305699481866,
"grad_norm": 5.89201021194458,
"learning_rate": 1.7361111111111112e-06,
"loss": 1.1679,
"step": 930
},
{
"epoch": 9.740932642487046,
"grad_norm": 6.085833549499512,
"learning_rate": 1.1574074074074074e-06,
"loss": 1.1812,
"step": 940
},
{
"epoch": 9.844559585492227,
"grad_norm": 5.505359172821045,
"learning_rate": 5.787037037037037e-07,
"loss": 1.1552,
"step": 950
},
{
"epoch": 9.94818652849741,
"grad_norm": 6.020310878753662,
"learning_rate": 0.0,
"loss": 1.1897,
"step": 960
},
{
"epoch": 9.94818652849741,
"eval_accuracy": 0.6431764649329423,
"eval_loss": 1.1433619260787964,
"eval_runtime": 3906.8512,
"eval_samples_per_second": 5.325,
"eval_steps_per_second": 0.083,
"step": 960
},
{
"epoch": 9.94818652849741,
"step": 960,
"total_flos": 6.10407050089078e+18,
"train_loss": 1.447805991768837,
"train_runtime": 79883.0648,
"train_samples_per_second": 3.088,
"train_steps_per_second": 0.012
}
],
"logging_steps": 10,
"max_steps": 960,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.10407050089078e+18,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}