| { | |
| "best_metric": 0.8528925619834711, | |
| "best_model_checkpoint": "vit-base-patch16-224-brand/checkpoint-970", | |
| "epoch": 14.957507082152974, | |
| "eval_steps": 500, | |
| "global_step": 1320, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 3.7878787878787882e-06, | |
| "loss": 2.2055, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 7.5757575757575764e-06, | |
| "loss": 2.162, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.1363636363636365e-05, | |
| "loss": 2.0983, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.5151515151515153e-05, | |
| "loss": 2.0166, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 1.893939393939394e-05, | |
| "loss": 1.9044, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 2.272727272727273e-05, | |
| "loss": 1.7381, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.6515151515151516e-05, | |
| "loss": 1.6089, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.0303030303030306e-05, | |
| "loss": 1.4669, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.5611570247933885, | |
| "eval_loss": 1.3067070245742798, | |
| "eval_runtime": 16.7756, | |
| "eval_samples_per_second": 72.129, | |
| "eval_steps_per_second": 4.53, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.409090909090909e-05, | |
| "loss": 1.4484, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.787878787878788e-05, | |
| "loss": 1.2513, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 1.1741, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 4.545454545454546e-05, | |
| "loss": 1.0883, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 4.9242424242424245e-05, | |
| "loss": 1.0522, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 4.966329966329967e-05, | |
| "loss": 1.0214, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 4.9242424242424245e-05, | |
| "loss": 0.9231, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 4.882154882154882e-05, | |
| "loss": 0.8961, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 4.84006734006734e-05, | |
| "loss": 0.8898, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "eval_accuracy": 0.7140495867768595, | |
| "eval_loss": 0.8380156755447388, | |
| "eval_runtime": 16.8759, | |
| "eval_samples_per_second": 71.7, | |
| "eval_steps_per_second": 4.503, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 4.797979797979798e-05, | |
| "loss": 0.8856, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 4.755892255892256e-05, | |
| "loss": 0.7916, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 4.713804713804714e-05, | |
| "loss": 0.7614, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 4.671717171717172e-05, | |
| "loss": 0.7696, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 4.62962962962963e-05, | |
| "loss": 0.7874, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 4.5875420875420876e-05, | |
| "loss": 0.7126, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.545454545454546e-05, | |
| "loss": 0.7119, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 4.5033670033670034e-05, | |
| "loss": 0.7095, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 4.4612794612794616e-05, | |
| "loss": 0.7243, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_accuracy": 0.7694214876033058, | |
| "eval_loss": 0.6559494137763977, | |
| "eval_runtime": 16.9139, | |
| "eval_samples_per_second": 71.539, | |
| "eval_steps_per_second": 4.493, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 4.41919191919192e-05, | |
| "loss": 0.7109, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 4.3771043771043774e-05, | |
| "loss": 0.5651, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 4.335016835016835e-05, | |
| "loss": 0.5838, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 4.292929292929293e-05, | |
| "loss": 0.5879, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 4.250841750841751e-05, | |
| "loss": 0.5541, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 4.208754208754209e-05, | |
| "loss": 0.5522, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 0.5105, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 4.124579124579125e-05, | |
| "loss": 0.5289, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 4.082491582491583e-05, | |
| "loss": 0.5158, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7950413223140496, | |
| "eval_loss": 0.5982227921485901, | |
| "eval_runtime": 16.8053, | |
| "eval_samples_per_second": 72.001, | |
| "eval_steps_per_second": 4.522, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 4.0404040404040405e-05, | |
| "loss": 0.4296, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 3.998316498316498e-05, | |
| "loss": 0.4105, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 3.956228956228956e-05, | |
| "loss": 0.4658, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 3.9141414141414145e-05, | |
| "loss": 0.4464, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 3.872053872053872e-05, | |
| "loss": 0.431, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 3.82996632996633e-05, | |
| "loss": 0.4499, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 3.787878787878788e-05, | |
| "loss": 0.4568, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 3.745791245791246e-05, | |
| "loss": 0.4397, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 3.7037037037037037e-05, | |
| "loss": 0.4605, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.8082644628099174, | |
| "eval_loss": 0.5856056809425354, | |
| "eval_runtime": 16.7221, | |
| "eval_samples_per_second": 72.359, | |
| "eval_steps_per_second": 4.545, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 3.661616161616162e-05, | |
| "loss": 0.3757, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 3.61952861952862e-05, | |
| "loss": 0.369, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 3.577441077441078e-05, | |
| "loss": 0.3952, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 3.535353535353535e-05, | |
| "loss": 0.3841, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "learning_rate": 3.4932659932659934e-05, | |
| "loss": 0.3707, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 3.451178451178451e-05, | |
| "loss": 0.3874, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "learning_rate": 3.409090909090909e-05, | |
| "loss": 0.347, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 3.3670033670033675e-05, | |
| "loss": 0.332, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "eval_accuracy": 0.8355371900826446, | |
| "eval_loss": 0.5138491988182068, | |
| "eval_runtime": 16.6373, | |
| "eval_samples_per_second": 72.728, | |
| "eval_steps_per_second": 4.568, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "learning_rate": 3.324915824915825e-05, | |
| "loss": 0.3767, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 3.282828282828283e-05, | |
| "loss": 0.2646, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "learning_rate": 3.240740740740741e-05, | |
| "loss": 0.3142, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 3.198653198653199e-05, | |
| "loss": 0.3171, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "learning_rate": 3.1565656565656566e-05, | |
| "loss": 0.3318, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "learning_rate": 3.114478114478115e-05, | |
| "loss": 0.309, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 3.072390572390573e-05, | |
| "loss": 0.2928, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "learning_rate": 3.0303030303030306e-05, | |
| "loss": 0.3622, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "learning_rate": 2.9882154882154885e-05, | |
| "loss": 0.3375, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "eval_accuracy": 0.8264462809917356, | |
| "eval_loss": 0.5094661116600037, | |
| "eval_runtime": 16.7644, | |
| "eval_samples_per_second": 72.177, | |
| "eval_steps_per_second": 4.533, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "learning_rate": 2.946127946127946e-05, | |
| "loss": 0.2897, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 2.904040404040404e-05, | |
| "loss": 0.2764, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "learning_rate": 2.8619528619528618e-05, | |
| "loss": 0.2627, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "learning_rate": 2.8198653198653204e-05, | |
| "loss": 0.2899, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 0.2316, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "learning_rate": 2.7356902356902358e-05, | |
| "loss": 0.2729, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "learning_rate": 2.6936026936026937e-05, | |
| "loss": 0.2598, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "learning_rate": 2.6515151515151516e-05, | |
| "loss": 0.2331, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "learning_rate": 2.6094276094276095e-05, | |
| "loss": 0.2188, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.8322314049586776, | |
| "eval_loss": 0.5088740587234497, | |
| "eval_runtime": 16.6796, | |
| "eval_samples_per_second": 72.544, | |
| "eval_steps_per_second": 4.556, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 8.05, | |
| "learning_rate": 2.5673400673400677e-05, | |
| "loss": 0.2445, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "learning_rate": 2.5252525252525256e-05, | |
| "loss": 0.2144, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "learning_rate": 2.4831649831649835e-05, | |
| "loss": 0.1956, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 2.441077441077441e-05, | |
| "loss": 0.2103, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "learning_rate": 2.398989898989899e-05, | |
| "loss": 0.2539, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 8.61, | |
| "learning_rate": 2.356902356902357e-05, | |
| "loss": 0.2078, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 8.73, | |
| "learning_rate": 2.314814814814815e-05, | |
| "loss": 0.2112, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 8.84, | |
| "learning_rate": 2.272727272727273e-05, | |
| "loss": 0.2418, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "learning_rate": 2.2306397306397308e-05, | |
| "loss": 0.2112, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.8380165289256198, | |
| "eval_loss": 0.5125700235366821, | |
| "eval_runtime": 16.6395, | |
| "eval_samples_per_second": 72.718, | |
| "eval_steps_per_second": 4.567, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 9.07, | |
| "learning_rate": 2.1885521885521887e-05, | |
| "loss": 0.207, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 9.18, | |
| "learning_rate": 2.1464646464646466e-05, | |
| "loss": 0.1844, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 9.29, | |
| "learning_rate": 2.1043771043771045e-05, | |
| "loss": 0.1742, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 9.41, | |
| "learning_rate": 2.0622895622895624e-05, | |
| "loss": 0.178, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 9.52, | |
| "learning_rate": 2.0202020202020203e-05, | |
| "loss": 0.1957, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "learning_rate": 1.978114478114478e-05, | |
| "loss": 0.1833, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 9.75, | |
| "learning_rate": 1.936026936026936e-05, | |
| "loss": 0.1955, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 9.86, | |
| "learning_rate": 1.893939393939394e-05, | |
| "loss": 0.2161, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 9.97, | |
| "learning_rate": 1.8518518518518518e-05, | |
| "loss": 0.1895, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 9.99, | |
| "eval_accuracy": 0.8363636363636363, | |
| "eval_loss": 0.5057435631752014, | |
| "eval_runtime": 16.6718, | |
| "eval_samples_per_second": 72.578, | |
| "eval_steps_per_second": 4.559, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 10.08, | |
| "learning_rate": 1.80976430976431e-05, | |
| "loss": 0.1848, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 10.2, | |
| "learning_rate": 1.7676767676767676e-05, | |
| "loss": 0.1328, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 10.31, | |
| "learning_rate": 1.7255892255892255e-05, | |
| "loss": 0.1383, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 10.42, | |
| "learning_rate": 1.6835016835016837e-05, | |
| "loss": 0.1773, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 10.54, | |
| "learning_rate": 1.6414141414141416e-05, | |
| "loss": 0.1363, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 10.65, | |
| "learning_rate": 1.5993265993265995e-05, | |
| "loss": 0.1516, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 10.76, | |
| "learning_rate": 1.5572390572390574e-05, | |
| "loss": 0.1917, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 10.88, | |
| "learning_rate": 1.5151515151515153e-05, | |
| "loss": 0.1928, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "learning_rate": 1.473063973063973e-05, | |
| "loss": 0.1593, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "eval_accuracy": 0.8528925619834711, | |
| "eval_loss": 0.4851677417755127, | |
| "eval_runtime": 16.8845, | |
| "eval_samples_per_second": 71.664, | |
| "eval_steps_per_second": 4.501, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 11.1, | |
| "learning_rate": 1.4309764309764309e-05, | |
| "loss": 0.1578, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 11.22, | |
| "learning_rate": 1.388888888888889e-05, | |
| "loss": 0.1757, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 11.33, | |
| "learning_rate": 1.3468013468013468e-05, | |
| "loss": 0.125, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 11.44, | |
| "learning_rate": 1.3047138047138047e-05, | |
| "loss": 0.1566, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 11.56, | |
| "learning_rate": 1.2626262626262628e-05, | |
| "loss": 0.1841, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 11.67, | |
| "learning_rate": 1.2205387205387205e-05, | |
| "loss": 0.1832, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 11.78, | |
| "learning_rate": 1.1784511784511786e-05, | |
| "loss": 0.1604, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 11.9, | |
| "learning_rate": 1.1363636363636365e-05, | |
| "loss": 0.1463, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.8429752066115702, | |
| "eval_loss": 0.49344199895858765, | |
| "eval_runtime": 16.7817, | |
| "eval_samples_per_second": 72.102, | |
| "eval_steps_per_second": 4.529, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 12.01, | |
| "learning_rate": 1.0942760942760944e-05, | |
| "loss": 0.133, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 12.12, | |
| "learning_rate": 1.0521885521885522e-05, | |
| "loss": 0.1293, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 12.24, | |
| "learning_rate": 1.0101010101010101e-05, | |
| "loss": 0.1232, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 12.35, | |
| "learning_rate": 9.68013468013468e-06, | |
| "loss": 0.1415, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 12.46, | |
| "learning_rate": 9.259259259259259e-06, | |
| "loss": 0.1425, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 12.58, | |
| "learning_rate": 8.838383838383838e-06, | |
| "loss": 0.1062, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 12.69, | |
| "learning_rate": 8.417508417508419e-06, | |
| "loss": 0.1193, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 12.8, | |
| "learning_rate": 7.996632996632998e-06, | |
| "loss": 0.1539, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 12.92, | |
| "learning_rate": 7.5757575757575764e-06, | |
| "loss": 0.1565, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.8495867768595041, | |
| "eval_loss": 0.47939661145210266, | |
| "eval_runtime": 16.5708, | |
| "eval_samples_per_second": 73.02, | |
| "eval_steps_per_second": 4.586, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 13.03, | |
| "learning_rate": 7.1548821548821545e-06, | |
| "loss": 0.1193, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 13.14, | |
| "learning_rate": 6.734006734006734e-06, | |
| "loss": 0.1287, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 13.26, | |
| "learning_rate": 6.313131313131314e-06, | |
| "loss": 0.1186, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 13.37, | |
| "learning_rate": 5.892255892255893e-06, | |
| "loss": 0.1562, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 13.48, | |
| "learning_rate": 5.471380471380472e-06, | |
| "loss": 0.1297, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 13.6, | |
| "learning_rate": 5.050505050505051e-06, | |
| "loss": 0.158, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 13.71, | |
| "learning_rate": 4.6296296296296296e-06, | |
| "loss": 0.1188, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 13.82, | |
| "learning_rate": 4.208754208754209e-06, | |
| "loss": 0.1237, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 13.94, | |
| "learning_rate": 3.7878787878787882e-06, | |
| "loss": 0.1236, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 13.99, | |
| "eval_accuracy": 0.8462809917355372, | |
| "eval_loss": 0.48630189895629883, | |
| "eval_runtime": 16.6832, | |
| "eval_samples_per_second": 72.528, | |
| "eval_steps_per_second": 4.555, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 14.05, | |
| "learning_rate": 3.367003367003367e-06, | |
| "loss": 0.103, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 14.16, | |
| "learning_rate": 2.9461279461279464e-06, | |
| "loss": 0.1352, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 14.28, | |
| "learning_rate": 2.5252525252525253e-06, | |
| "loss": 0.1372, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 14.39, | |
| "learning_rate": 2.1043771043771047e-06, | |
| "loss": 0.1215, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 14.5, | |
| "learning_rate": 1.6835016835016836e-06, | |
| "loss": 0.1334, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 14.62, | |
| "learning_rate": 1.2626262626262627e-06, | |
| "loss": 0.1326, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 14.73, | |
| "learning_rate": 8.417508417508418e-07, | |
| "loss": 0.0859, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 14.84, | |
| "learning_rate": 4.208754208754209e-07, | |
| "loss": 0.1313, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 14.96, | |
| "learning_rate": 0.0, | |
| "loss": 0.1407, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 14.96, | |
| "eval_accuracy": 0.8495867768595041, | |
| "eval_loss": 0.48121175169944763, | |
| "eval_runtime": 16.5675, | |
| "eval_samples_per_second": 73.034, | |
| "eval_steps_per_second": 4.587, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 14.96, | |
| "step": 1320, | |
| "total_flos": 6.546875329145733e+18, | |
| "train_loss": 0.44751356618874, | |
| "train_runtime": 3414.1758, | |
| "train_samples_per_second": 24.814, | |
| "train_steps_per_second": 0.387 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1320, | |
| "num_train_epochs": 15, | |
| "save_steps": 500, | |
| "total_flos": 6.546875329145733e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |