vit-base-patch16-224-brand / trainer_state.json
barten's picture
End of training
dcb71fd
{
"best_metric": 0.8528925619834711,
"best_model_checkpoint": "vit-base-patch16-224-brand/checkpoint-970",
"epoch": 14.957507082152974,
"eval_steps": 500,
"global_step": 1320,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.11,
"learning_rate": 3.7878787878787882e-06,
"loss": 2.2055,
"step": 10
},
{
"epoch": 0.23,
"learning_rate": 7.5757575757575764e-06,
"loss": 2.162,
"step": 20
},
{
"epoch": 0.34,
"learning_rate": 1.1363636363636365e-05,
"loss": 2.0983,
"step": 30
},
{
"epoch": 0.45,
"learning_rate": 1.5151515151515153e-05,
"loss": 2.0166,
"step": 40
},
{
"epoch": 0.57,
"learning_rate": 1.893939393939394e-05,
"loss": 1.9044,
"step": 50
},
{
"epoch": 0.68,
"learning_rate": 2.272727272727273e-05,
"loss": 1.7381,
"step": 60
},
{
"epoch": 0.79,
"learning_rate": 2.6515151515151516e-05,
"loss": 1.6089,
"step": 70
},
{
"epoch": 0.91,
"learning_rate": 3.0303030303030306e-05,
"loss": 1.4669,
"step": 80
},
{
"epoch": 1.0,
"eval_accuracy": 0.5611570247933885,
"eval_loss": 1.3067070245742798,
"eval_runtime": 16.7756,
"eval_samples_per_second": 72.129,
"eval_steps_per_second": 4.53,
"step": 88
},
{
"epoch": 1.02,
"learning_rate": 3.409090909090909e-05,
"loss": 1.4484,
"step": 90
},
{
"epoch": 1.13,
"learning_rate": 3.787878787878788e-05,
"loss": 1.2513,
"step": 100
},
{
"epoch": 1.25,
"learning_rate": 4.166666666666667e-05,
"loss": 1.1741,
"step": 110
},
{
"epoch": 1.36,
"learning_rate": 4.545454545454546e-05,
"loss": 1.0883,
"step": 120
},
{
"epoch": 1.47,
"learning_rate": 4.9242424242424245e-05,
"loss": 1.0522,
"step": 130
},
{
"epoch": 1.59,
"learning_rate": 4.966329966329967e-05,
"loss": 1.0214,
"step": 140
},
{
"epoch": 1.7,
"learning_rate": 4.9242424242424245e-05,
"loss": 0.9231,
"step": 150
},
{
"epoch": 1.81,
"learning_rate": 4.882154882154882e-05,
"loss": 0.8961,
"step": 160
},
{
"epoch": 1.93,
"learning_rate": 4.84006734006734e-05,
"loss": 0.8898,
"step": 170
},
{
"epoch": 1.99,
"eval_accuracy": 0.7140495867768595,
"eval_loss": 0.8380156755447388,
"eval_runtime": 16.8759,
"eval_samples_per_second": 71.7,
"eval_steps_per_second": 4.503,
"step": 176
},
{
"epoch": 2.04,
"learning_rate": 4.797979797979798e-05,
"loss": 0.8856,
"step": 180
},
{
"epoch": 2.15,
"learning_rate": 4.755892255892256e-05,
"loss": 0.7916,
"step": 190
},
{
"epoch": 2.27,
"learning_rate": 4.713804713804714e-05,
"loss": 0.7614,
"step": 200
},
{
"epoch": 2.38,
"learning_rate": 4.671717171717172e-05,
"loss": 0.7696,
"step": 210
},
{
"epoch": 2.49,
"learning_rate": 4.62962962962963e-05,
"loss": 0.7874,
"step": 220
},
{
"epoch": 2.61,
"learning_rate": 4.5875420875420876e-05,
"loss": 0.7126,
"step": 230
},
{
"epoch": 2.72,
"learning_rate": 4.545454545454546e-05,
"loss": 0.7119,
"step": 240
},
{
"epoch": 2.83,
"learning_rate": 4.5033670033670034e-05,
"loss": 0.7095,
"step": 250
},
{
"epoch": 2.95,
"learning_rate": 4.4612794612794616e-05,
"loss": 0.7243,
"step": 260
},
{
"epoch": 2.99,
"eval_accuracy": 0.7694214876033058,
"eval_loss": 0.6559494137763977,
"eval_runtime": 16.9139,
"eval_samples_per_second": 71.539,
"eval_steps_per_second": 4.493,
"step": 264
},
{
"epoch": 3.06,
"learning_rate": 4.41919191919192e-05,
"loss": 0.7109,
"step": 270
},
{
"epoch": 3.17,
"learning_rate": 4.3771043771043774e-05,
"loss": 0.5651,
"step": 280
},
{
"epoch": 3.29,
"learning_rate": 4.335016835016835e-05,
"loss": 0.5838,
"step": 290
},
{
"epoch": 3.4,
"learning_rate": 4.292929292929293e-05,
"loss": 0.5879,
"step": 300
},
{
"epoch": 3.51,
"learning_rate": 4.250841750841751e-05,
"loss": 0.5541,
"step": 310
},
{
"epoch": 3.63,
"learning_rate": 4.208754208754209e-05,
"loss": 0.5522,
"step": 320
},
{
"epoch": 3.74,
"learning_rate": 4.166666666666667e-05,
"loss": 0.5105,
"step": 330
},
{
"epoch": 3.85,
"learning_rate": 4.124579124579125e-05,
"loss": 0.5289,
"step": 340
},
{
"epoch": 3.97,
"learning_rate": 4.082491582491583e-05,
"loss": 0.5158,
"step": 350
},
{
"epoch": 4.0,
"eval_accuracy": 0.7950413223140496,
"eval_loss": 0.5982227921485901,
"eval_runtime": 16.8053,
"eval_samples_per_second": 72.001,
"eval_steps_per_second": 4.522,
"step": 353
},
{
"epoch": 4.08,
"learning_rate": 4.0404040404040405e-05,
"loss": 0.4296,
"step": 360
},
{
"epoch": 4.19,
"learning_rate": 3.998316498316498e-05,
"loss": 0.4105,
"step": 370
},
{
"epoch": 4.31,
"learning_rate": 3.956228956228956e-05,
"loss": 0.4658,
"step": 380
},
{
"epoch": 4.42,
"learning_rate": 3.9141414141414145e-05,
"loss": 0.4464,
"step": 390
},
{
"epoch": 4.53,
"learning_rate": 3.872053872053872e-05,
"loss": 0.431,
"step": 400
},
{
"epoch": 4.65,
"learning_rate": 3.82996632996633e-05,
"loss": 0.4499,
"step": 410
},
{
"epoch": 4.76,
"learning_rate": 3.787878787878788e-05,
"loss": 0.4568,
"step": 420
},
{
"epoch": 4.87,
"learning_rate": 3.745791245791246e-05,
"loss": 0.4397,
"step": 430
},
{
"epoch": 4.99,
"learning_rate": 3.7037037037037037e-05,
"loss": 0.4605,
"step": 440
},
{
"epoch": 5.0,
"eval_accuracy": 0.8082644628099174,
"eval_loss": 0.5856056809425354,
"eval_runtime": 16.7221,
"eval_samples_per_second": 72.359,
"eval_steps_per_second": 4.545,
"step": 441
},
{
"epoch": 5.1,
"learning_rate": 3.661616161616162e-05,
"loss": 0.3757,
"step": 450
},
{
"epoch": 5.21,
"learning_rate": 3.61952861952862e-05,
"loss": 0.369,
"step": 460
},
{
"epoch": 5.33,
"learning_rate": 3.577441077441078e-05,
"loss": 0.3952,
"step": 470
},
{
"epoch": 5.44,
"learning_rate": 3.535353535353535e-05,
"loss": 0.3841,
"step": 480
},
{
"epoch": 5.55,
"learning_rate": 3.4932659932659934e-05,
"loss": 0.3707,
"step": 490
},
{
"epoch": 5.67,
"learning_rate": 3.451178451178451e-05,
"loss": 0.3874,
"step": 500
},
{
"epoch": 5.78,
"learning_rate": 3.409090909090909e-05,
"loss": 0.347,
"step": 510
},
{
"epoch": 5.89,
"learning_rate": 3.3670033670033675e-05,
"loss": 0.332,
"step": 520
},
{
"epoch": 5.99,
"eval_accuracy": 0.8355371900826446,
"eval_loss": 0.5138491988182068,
"eval_runtime": 16.6373,
"eval_samples_per_second": 72.728,
"eval_steps_per_second": 4.568,
"step": 529
},
{
"epoch": 6.01,
"learning_rate": 3.324915824915825e-05,
"loss": 0.3767,
"step": 530
},
{
"epoch": 6.12,
"learning_rate": 3.282828282828283e-05,
"loss": 0.2646,
"step": 540
},
{
"epoch": 6.23,
"learning_rate": 3.240740740740741e-05,
"loss": 0.3142,
"step": 550
},
{
"epoch": 6.35,
"learning_rate": 3.198653198653199e-05,
"loss": 0.3171,
"step": 560
},
{
"epoch": 6.46,
"learning_rate": 3.1565656565656566e-05,
"loss": 0.3318,
"step": 570
},
{
"epoch": 6.57,
"learning_rate": 3.114478114478115e-05,
"loss": 0.309,
"step": 580
},
{
"epoch": 6.69,
"learning_rate": 3.072390572390573e-05,
"loss": 0.2928,
"step": 590
},
{
"epoch": 6.8,
"learning_rate": 3.0303030303030306e-05,
"loss": 0.3622,
"step": 600
},
{
"epoch": 6.91,
"learning_rate": 2.9882154882154885e-05,
"loss": 0.3375,
"step": 610
},
{
"epoch": 6.99,
"eval_accuracy": 0.8264462809917356,
"eval_loss": 0.5094661116600037,
"eval_runtime": 16.7644,
"eval_samples_per_second": 72.177,
"eval_steps_per_second": 4.533,
"step": 617
},
{
"epoch": 7.03,
"learning_rate": 2.946127946127946e-05,
"loss": 0.2897,
"step": 620
},
{
"epoch": 7.14,
"learning_rate": 2.904040404040404e-05,
"loss": 0.2764,
"step": 630
},
{
"epoch": 7.25,
"learning_rate": 2.8619528619528618e-05,
"loss": 0.2627,
"step": 640
},
{
"epoch": 7.37,
"learning_rate": 2.8198653198653204e-05,
"loss": 0.2899,
"step": 650
},
{
"epoch": 7.48,
"learning_rate": 2.777777777777778e-05,
"loss": 0.2316,
"step": 660
},
{
"epoch": 7.59,
"learning_rate": 2.7356902356902358e-05,
"loss": 0.2729,
"step": 670
},
{
"epoch": 7.71,
"learning_rate": 2.6936026936026937e-05,
"loss": 0.2598,
"step": 680
},
{
"epoch": 7.82,
"learning_rate": 2.6515151515151516e-05,
"loss": 0.2331,
"step": 690
},
{
"epoch": 7.93,
"learning_rate": 2.6094276094276095e-05,
"loss": 0.2188,
"step": 700
},
{
"epoch": 8.0,
"eval_accuracy": 0.8322314049586776,
"eval_loss": 0.5088740587234497,
"eval_runtime": 16.6796,
"eval_samples_per_second": 72.544,
"eval_steps_per_second": 4.556,
"step": 706
},
{
"epoch": 8.05,
"learning_rate": 2.5673400673400677e-05,
"loss": 0.2445,
"step": 710
},
{
"epoch": 8.16,
"learning_rate": 2.5252525252525256e-05,
"loss": 0.2144,
"step": 720
},
{
"epoch": 8.27,
"learning_rate": 2.4831649831649835e-05,
"loss": 0.1956,
"step": 730
},
{
"epoch": 8.39,
"learning_rate": 2.441077441077441e-05,
"loss": 0.2103,
"step": 740
},
{
"epoch": 8.5,
"learning_rate": 2.398989898989899e-05,
"loss": 0.2539,
"step": 750
},
{
"epoch": 8.61,
"learning_rate": 2.356902356902357e-05,
"loss": 0.2078,
"step": 760
},
{
"epoch": 8.73,
"learning_rate": 2.314814814814815e-05,
"loss": 0.2112,
"step": 770
},
{
"epoch": 8.84,
"learning_rate": 2.272727272727273e-05,
"loss": 0.2418,
"step": 780
},
{
"epoch": 8.95,
"learning_rate": 2.2306397306397308e-05,
"loss": 0.2112,
"step": 790
},
{
"epoch": 9.0,
"eval_accuracy": 0.8380165289256198,
"eval_loss": 0.5125700235366821,
"eval_runtime": 16.6395,
"eval_samples_per_second": 72.718,
"eval_steps_per_second": 4.567,
"step": 794
},
{
"epoch": 9.07,
"learning_rate": 2.1885521885521887e-05,
"loss": 0.207,
"step": 800
},
{
"epoch": 9.18,
"learning_rate": 2.1464646464646466e-05,
"loss": 0.1844,
"step": 810
},
{
"epoch": 9.29,
"learning_rate": 2.1043771043771045e-05,
"loss": 0.1742,
"step": 820
},
{
"epoch": 9.41,
"learning_rate": 2.0622895622895624e-05,
"loss": 0.178,
"step": 830
},
{
"epoch": 9.52,
"learning_rate": 2.0202020202020203e-05,
"loss": 0.1957,
"step": 840
},
{
"epoch": 9.63,
"learning_rate": 1.978114478114478e-05,
"loss": 0.1833,
"step": 850
},
{
"epoch": 9.75,
"learning_rate": 1.936026936026936e-05,
"loss": 0.1955,
"step": 860
},
{
"epoch": 9.86,
"learning_rate": 1.893939393939394e-05,
"loss": 0.2161,
"step": 870
},
{
"epoch": 9.97,
"learning_rate": 1.8518518518518518e-05,
"loss": 0.1895,
"step": 880
},
{
"epoch": 9.99,
"eval_accuracy": 0.8363636363636363,
"eval_loss": 0.5057435631752014,
"eval_runtime": 16.6718,
"eval_samples_per_second": 72.578,
"eval_steps_per_second": 4.559,
"step": 882
},
{
"epoch": 10.08,
"learning_rate": 1.80976430976431e-05,
"loss": 0.1848,
"step": 890
},
{
"epoch": 10.2,
"learning_rate": 1.7676767676767676e-05,
"loss": 0.1328,
"step": 900
},
{
"epoch": 10.31,
"learning_rate": 1.7255892255892255e-05,
"loss": 0.1383,
"step": 910
},
{
"epoch": 10.42,
"learning_rate": 1.6835016835016837e-05,
"loss": 0.1773,
"step": 920
},
{
"epoch": 10.54,
"learning_rate": 1.6414141414141416e-05,
"loss": 0.1363,
"step": 930
},
{
"epoch": 10.65,
"learning_rate": 1.5993265993265995e-05,
"loss": 0.1516,
"step": 940
},
{
"epoch": 10.76,
"learning_rate": 1.5572390572390574e-05,
"loss": 0.1917,
"step": 950
},
{
"epoch": 10.88,
"learning_rate": 1.5151515151515153e-05,
"loss": 0.1928,
"step": 960
},
{
"epoch": 10.99,
"learning_rate": 1.473063973063973e-05,
"loss": 0.1593,
"step": 970
},
{
"epoch": 10.99,
"eval_accuracy": 0.8528925619834711,
"eval_loss": 0.4851677417755127,
"eval_runtime": 16.8845,
"eval_samples_per_second": 71.664,
"eval_steps_per_second": 4.501,
"step": 970
},
{
"epoch": 11.1,
"learning_rate": 1.4309764309764309e-05,
"loss": 0.1578,
"step": 980
},
{
"epoch": 11.22,
"learning_rate": 1.388888888888889e-05,
"loss": 0.1757,
"step": 990
},
{
"epoch": 11.33,
"learning_rate": 1.3468013468013468e-05,
"loss": 0.125,
"step": 1000
},
{
"epoch": 11.44,
"learning_rate": 1.3047138047138047e-05,
"loss": 0.1566,
"step": 1010
},
{
"epoch": 11.56,
"learning_rate": 1.2626262626262628e-05,
"loss": 0.1841,
"step": 1020
},
{
"epoch": 11.67,
"learning_rate": 1.2205387205387205e-05,
"loss": 0.1832,
"step": 1030
},
{
"epoch": 11.78,
"learning_rate": 1.1784511784511786e-05,
"loss": 0.1604,
"step": 1040
},
{
"epoch": 11.9,
"learning_rate": 1.1363636363636365e-05,
"loss": 0.1463,
"step": 1050
},
{
"epoch": 12.0,
"eval_accuracy": 0.8429752066115702,
"eval_loss": 0.49344199895858765,
"eval_runtime": 16.7817,
"eval_samples_per_second": 72.102,
"eval_steps_per_second": 4.529,
"step": 1059
},
{
"epoch": 12.01,
"learning_rate": 1.0942760942760944e-05,
"loss": 0.133,
"step": 1060
},
{
"epoch": 12.12,
"learning_rate": 1.0521885521885522e-05,
"loss": 0.1293,
"step": 1070
},
{
"epoch": 12.24,
"learning_rate": 1.0101010101010101e-05,
"loss": 0.1232,
"step": 1080
},
{
"epoch": 12.35,
"learning_rate": 9.68013468013468e-06,
"loss": 0.1415,
"step": 1090
},
{
"epoch": 12.46,
"learning_rate": 9.259259259259259e-06,
"loss": 0.1425,
"step": 1100
},
{
"epoch": 12.58,
"learning_rate": 8.838383838383838e-06,
"loss": 0.1062,
"step": 1110
},
{
"epoch": 12.69,
"learning_rate": 8.417508417508419e-06,
"loss": 0.1193,
"step": 1120
},
{
"epoch": 12.8,
"learning_rate": 7.996632996632998e-06,
"loss": 0.1539,
"step": 1130
},
{
"epoch": 12.92,
"learning_rate": 7.5757575757575764e-06,
"loss": 0.1565,
"step": 1140
},
{
"epoch": 13.0,
"eval_accuracy": 0.8495867768595041,
"eval_loss": 0.47939661145210266,
"eval_runtime": 16.5708,
"eval_samples_per_second": 73.02,
"eval_steps_per_second": 4.586,
"step": 1147
},
{
"epoch": 13.03,
"learning_rate": 7.1548821548821545e-06,
"loss": 0.1193,
"step": 1150
},
{
"epoch": 13.14,
"learning_rate": 6.734006734006734e-06,
"loss": 0.1287,
"step": 1160
},
{
"epoch": 13.26,
"learning_rate": 6.313131313131314e-06,
"loss": 0.1186,
"step": 1170
},
{
"epoch": 13.37,
"learning_rate": 5.892255892255893e-06,
"loss": 0.1562,
"step": 1180
},
{
"epoch": 13.48,
"learning_rate": 5.471380471380472e-06,
"loss": 0.1297,
"step": 1190
},
{
"epoch": 13.6,
"learning_rate": 5.050505050505051e-06,
"loss": 0.158,
"step": 1200
},
{
"epoch": 13.71,
"learning_rate": 4.6296296296296296e-06,
"loss": 0.1188,
"step": 1210
},
{
"epoch": 13.82,
"learning_rate": 4.208754208754209e-06,
"loss": 0.1237,
"step": 1220
},
{
"epoch": 13.94,
"learning_rate": 3.7878787878787882e-06,
"loss": 0.1236,
"step": 1230
},
{
"epoch": 13.99,
"eval_accuracy": 0.8462809917355372,
"eval_loss": 0.48630189895629883,
"eval_runtime": 16.6832,
"eval_samples_per_second": 72.528,
"eval_steps_per_second": 4.555,
"step": 1235
},
{
"epoch": 14.05,
"learning_rate": 3.367003367003367e-06,
"loss": 0.103,
"step": 1240
},
{
"epoch": 14.16,
"learning_rate": 2.9461279461279464e-06,
"loss": 0.1352,
"step": 1250
},
{
"epoch": 14.28,
"learning_rate": 2.5252525252525253e-06,
"loss": 0.1372,
"step": 1260
},
{
"epoch": 14.39,
"learning_rate": 2.1043771043771047e-06,
"loss": 0.1215,
"step": 1270
},
{
"epoch": 14.5,
"learning_rate": 1.6835016835016836e-06,
"loss": 0.1334,
"step": 1280
},
{
"epoch": 14.62,
"learning_rate": 1.2626262626262627e-06,
"loss": 0.1326,
"step": 1290
},
{
"epoch": 14.73,
"learning_rate": 8.417508417508418e-07,
"loss": 0.0859,
"step": 1300
},
{
"epoch": 14.84,
"learning_rate": 4.208754208754209e-07,
"loss": 0.1313,
"step": 1310
},
{
"epoch": 14.96,
"learning_rate": 0.0,
"loss": 0.1407,
"step": 1320
},
{
"epoch": 14.96,
"eval_accuracy": 0.8495867768595041,
"eval_loss": 0.48121175169944763,
"eval_runtime": 16.5675,
"eval_samples_per_second": 73.034,
"eval_steps_per_second": 4.587,
"step": 1320
},
{
"epoch": 14.96,
"step": 1320,
"total_flos": 6.546875329145733e+18,
"train_loss": 0.44751356618874,
"train_runtime": 3414.1758,
"train_samples_per_second": 24.814,
"train_steps_per_second": 0.387
}
],
"logging_steps": 10,
"max_steps": 1320,
"num_train_epochs": 15,
"save_steps": 500,
"total_flos": 6.546875329145733e+18,
"trial_name": null,
"trial_params": null
}