| { | |
| "best_metric": 0.9726247987117552, | |
| "best_model_checkpoint": "swin-tiny-patch4-window7-224-bottom_cleaned_data/checkpoint-1047", | |
| "epoch": 9.97134670487106, | |
| "global_step": 1740, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 1.3661, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.994192799070848e-05, | |
| "loss": 1.1811, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.965156794425087e-05, | |
| "loss": 0.9235, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.9361207897793264e-05, | |
| "loss": 0.84, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.907084785133566e-05, | |
| "loss": 0.6387, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.878048780487805e-05, | |
| "loss": 0.7581, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.8490127758420445e-05, | |
| "loss": 0.6915, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.819976771196283e-05, | |
| "loss": 0.6255, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.7909407665505226e-05, | |
| "loss": 0.527, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.761904761904762e-05, | |
| "loss": 0.5023, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.7328687572590014e-05, | |
| "loss": 0.5157, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.703832752613241e-05, | |
| "loss": 0.4598, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.6747967479674795e-05, | |
| "loss": 0.4344, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.6457607433217196e-05, | |
| "loss": 0.4745, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.616724738675958e-05, | |
| "loss": 0.4485, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.587688734030198e-05, | |
| "loss": 0.3849, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.5586527293844364e-05, | |
| "loss": 0.4444, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9162640901771336, | |
| "eval_loss": 0.2271285504102707, | |
| "eval_runtime": 8.1654, | |
| "eval_samples_per_second": 76.052, | |
| "eval_steps_per_second": 9.552, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.529616724738676e-05, | |
| "loss": 0.4314, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 4.500580720092916e-05, | |
| "loss": 0.325, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 4.4715447154471546e-05, | |
| "loss": 0.3656, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 4.442508710801394e-05, | |
| "loss": 0.3626, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 4.413472706155633e-05, | |
| "loss": 0.3937, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 4.384436701509873e-05, | |
| "loss": 0.3097, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 4.3554006968641115e-05, | |
| "loss": 0.3426, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 4.326364692218351e-05, | |
| "loss": 0.3182, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 4.29732868757259e-05, | |
| "loss": 0.3147, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 4.26829268292683e-05, | |
| "loss": 0.3389, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 4.239256678281069e-05, | |
| "loss": 0.3291, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 4.210220673635308e-05, | |
| "loss": 0.3706, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 4.181184668989547e-05, | |
| "loss": 0.3576, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 4.1521486643437866e-05, | |
| "loss": 0.3543, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 4.123112659698026e-05, | |
| "loss": 0.305, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 4.0940766550522653e-05, | |
| "loss": 0.3487, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 4.065040650406504e-05, | |
| "loss": 0.3518, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9033816425120773, | |
| "eval_loss": 0.24492110311985016, | |
| "eval_runtime": 9.5576, | |
| "eval_samples_per_second": 64.975, | |
| "eval_steps_per_second": 8.161, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 4.0360046457607435e-05, | |
| "loss": 0.2923, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 4.006968641114983e-05, | |
| "loss": 0.3222, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 3.977932636469222e-05, | |
| "loss": 0.317, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 3.948896631823461e-05, | |
| "loss": 0.3217, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 3.9198606271777003e-05, | |
| "loss": 0.2979, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 3.89082462253194e-05, | |
| "loss": 0.259, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 3.861788617886179e-05, | |
| "loss": 0.3416, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 3.8327526132404185e-05, | |
| "loss": 0.3103, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 3.803716608594657e-05, | |
| "loss": 0.2513, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 3.7746806039488966e-05, | |
| "loss": 0.2599, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 3.745644599303136e-05, | |
| "loss": 0.3085, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 3.7166085946573754e-05, | |
| "loss": 0.2228, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 3.687572590011615e-05, | |
| "loss": 0.2387, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 3.6585365853658535e-05, | |
| "loss": 0.2562, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.629500580720093e-05, | |
| "loss": 0.2431, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 3.600464576074332e-05, | |
| "loss": 0.2774, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 3.571428571428572e-05, | |
| "loss": 0.2445, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 3.5423925667828104e-05, | |
| "loss": 0.225, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9500805152979066, | |
| "eval_loss": 0.13247297704219818, | |
| "eval_runtime": 8.7139, | |
| "eval_samples_per_second": 71.265, | |
| "eval_steps_per_second": 8.951, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 3.51335656213705e-05, | |
| "loss": 0.2198, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 3.48432055749129e-05, | |
| "loss": 0.2398, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 3.4552845528455286e-05, | |
| "loss": 0.1821, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 3.426248548199768e-05, | |
| "loss": 0.2148, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 3.397212543554007e-05, | |
| "loss": 0.3183, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 3.368176538908247e-05, | |
| "loss": 0.2292, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 3.3391405342624855e-05, | |
| "loss": 0.1987, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 3.310104529616725e-05, | |
| "loss": 0.2434, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 3.281068524970964e-05, | |
| "loss": 0.2906, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 3.2520325203252037e-05, | |
| "loss": 0.2117, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 3.222996515679443e-05, | |
| "loss": 0.22, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 3.193960511033682e-05, | |
| "loss": 0.1831, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 3.164924506387921e-05, | |
| "loss": 0.1943, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 3.13588850174216e-05, | |
| "loss": 0.2528, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 3.1068524970964e-05, | |
| "loss": 0.2057, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 3.077816492450639e-05, | |
| "loss": 0.2584, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 3.048780487804878e-05, | |
| "loss": 0.2195, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9549114331723028, | |
| "eval_loss": 0.10237770527601242, | |
| "eval_runtime": 9.1181, | |
| "eval_samples_per_second": 68.106, | |
| "eval_steps_per_second": 8.554, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "learning_rate": 3.0197444831591178e-05, | |
| "loss": 0.2832, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 2.9907084785133565e-05, | |
| "loss": 0.2087, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 2.9616724738675962e-05, | |
| "loss": 0.1709, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 2.932636469221835e-05, | |
| "loss": 0.1891, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 2.9036004645760743e-05, | |
| "loss": 0.1923, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 2.874564459930314e-05, | |
| "loss": 0.2178, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 2.8455284552845528e-05, | |
| "loss": 0.2256, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "learning_rate": 2.8164924506387925e-05, | |
| "loss": 0.2184, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 2.7874564459930312e-05, | |
| "loss": 0.245, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 2.758420441347271e-05, | |
| "loss": 0.229, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 2.7293844367015097e-05, | |
| "loss": 0.2236, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 2.7003484320557494e-05, | |
| "loss": 0.2517, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 2.6713124274099888e-05, | |
| "loss": 0.1755, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 2.642276422764228e-05, | |
| "loss": 0.1663, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 2.6132404181184672e-05, | |
| "loss": 0.1958, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 2.5842044134727063e-05, | |
| "loss": 0.2334, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 2.5551684088269457e-05, | |
| "loss": 0.2576, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 2.5261324041811847e-05, | |
| "loss": 0.2627, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9629629629629629, | |
| "eval_loss": 0.1045805960893631, | |
| "eval_runtime": 9.624, | |
| "eval_samples_per_second": 64.526, | |
| "eval_steps_per_second": 8.105, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "learning_rate": 2.497096399535424e-05, | |
| "loss": 0.2139, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 2.4680603948896632e-05, | |
| "loss": 0.2114, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "learning_rate": 2.4390243902439026e-05, | |
| "loss": 0.2097, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 2.4099883855981416e-05, | |
| "loss": 0.2136, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 2.380952380952381e-05, | |
| "loss": 0.2523, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 2.3519163763066204e-05, | |
| "loss": 0.1787, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 5.39, | |
| "learning_rate": 2.3228803716608598e-05, | |
| "loss": 0.1828, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 2.293844367015099e-05, | |
| "loss": 0.1549, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "learning_rate": 2.264808362369338e-05, | |
| "loss": 0.1531, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 2.2357723577235773e-05, | |
| "loss": 0.1957, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 2.2067363530778164e-05, | |
| "loss": 0.222, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 2.1777003484320557e-05, | |
| "loss": 0.2211, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "learning_rate": 2.148664343786295e-05, | |
| "loss": 0.1711, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "learning_rate": 2.1196283391405345e-05, | |
| "loss": 0.1759, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "learning_rate": 2.0905923344947736e-05, | |
| "loss": 0.2333, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "learning_rate": 2.061556329849013e-05, | |
| "loss": 0.2269, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "learning_rate": 2.032520325203252e-05, | |
| "loss": 0.142, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.9726247987117552, | |
| "eval_loss": 0.08394750207662582, | |
| "eval_runtime": 8.0552, | |
| "eval_samples_per_second": 77.093, | |
| "eval_steps_per_second": 9.683, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 2.0034843205574914e-05, | |
| "loss": 0.1847, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 1.9744483159117305e-05, | |
| "loss": 0.1569, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "learning_rate": 1.94541231126597e-05, | |
| "loss": 0.2001, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 6.19, | |
| "learning_rate": 1.9163763066202093e-05, | |
| "loss": 0.1721, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "learning_rate": 1.8873403019744483e-05, | |
| "loss": 0.1406, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "learning_rate": 1.8583042973286877e-05, | |
| "loss": 0.1943, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "learning_rate": 1.8292682926829268e-05, | |
| "loss": 0.1812, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 1.800232288037166e-05, | |
| "loss": 0.105, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 1.7711962833914052e-05, | |
| "loss": 0.1356, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 1.742160278745645e-05, | |
| "loss": 0.1678, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "learning_rate": 1.713124274099884e-05, | |
| "loss": 0.1989, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "learning_rate": 1.6840882694541234e-05, | |
| "loss": 0.0919, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 1.6550522648083624e-05, | |
| "loss": 0.1554, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 1.6260162601626018e-05, | |
| "loss": 0.1257, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "learning_rate": 1.596980255516841e-05, | |
| "loss": 0.2064, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 1.56794425087108e-05, | |
| "loss": 0.1877, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 1.5389082462253197e-05, | |
| "loss": 0.1878, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 1.5098722415795589e-05, | |
| "loss": 0.1516, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.9629629629629629, | |
| "eval_loss": 0.09183160960674286, | |
| "eval_runtime": 9.7233, | |
| "eval_samples_per_second": 63.867, | |
| "eval_steps_per_second": 8.022, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "learning_rate": 1.4808362369337981e-05, | |
| "loss": 0.1592, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 7.11, | |
| "learning_rate": 1.4518002322880372e-05, | |
| "loss": 0.198, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 7.16, | |
| "learning_rate": 1.4227642276422764e-05, | |
| "loss": 0.1892, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 7.22, | |
| "learning_rate": 1.3937282229965156e-05, | |
| "loss": 0.1777, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "learning_rate": 1.3646922183507548e-05, | |
| "loss": 0.0918, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "learning_rate": 1.3356562137049944e-05, | |
| "loss": 0.0896, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "learning_rate": 1.3066202090592336e-05, | |
| "loss": 0.1273, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 1.2775842044134728e-05, | |
| "loss": 0.1521, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 7.51, | |
| "learning_rate": 1.248548199767712e-05, | |
| "loss": 0.1628, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "learning_rate": 1.2195121951219513e-05, | |
| "loss": 0.1587, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 7.62, | |
| "learning_rate": 1.1904761904761905e-05, | |
| "loss": 0.205, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "learning_rate": 1.1614401858304299e-05, | |
| "loss": 0.1415, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "learning_rate": 1.132404181184669e-05, | |
| "loss": 0.1982, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 7.79, | |
| "learning_rate": 1.1033681765389082e-05, | |
| "loss": 0.1079, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 7.85, | |
| "learning_rate": 1.0743321718931476e-05, | |
| "loss": 0.1115, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "learning_rate": 1.0452961672473868e-05, | |
| "loss": 0.2234, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 7.97, | |
| "learning_rate": 1.016260162601626e-05, | |
| "loss": 0.1498, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.9726247987117552, | |
| "eval_loss": 0.07796485722064972, | |
| "eval_runtime": 9.3543, | |
| "eval_samples_per_second": 66.386, | |
| "eval_steps_per_second": 8.338, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 8.02, | |
| "learning_rate": 9.872241579558652e-06, | |
| "loss": 0.1299, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "learning_rate": 9.581881533101046e-06, | |
| "loss": 0.1389, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "learning_rate": 9.291521486643439e-06, | |
| "loss": 0.1499, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 8.19, | |
| "learning_rate": 9.00116144018583e-06, | |
| "loss": 0.2523, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 8.25, | |
| "learning_rate": 8.710801393728225e-06, | |
| "loss": 0.1067, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 8.31, | |
| "learning_rate": 8.420441347270617e-06, | |
| "loss": 0.1696, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 8.37, | |
| "learning_rate": 8.130081300813009e-06, | |
| "loss": 0.1279, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 8.42, | |
| "learning_rate": 7.8397212543554e-06, | |
| "loss": 0.1523, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "learning_rate": 7.5493612078977944e-06, | |
| "loss": 0.1335, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 8.54, | |
| "learning_rate": 7.259001161440186e-06, | |
| "loss": 0.1122, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 8.6, | |
| "learning_rate": 6.968641114982578e-06, | |
| "loss": 0.1596, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 8.65, | |
| "learning_rate": 6.678281068524972e-06, | |
| "loss": 0.0975, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 8.71, | |
| "learning_rate": 6.387921022067364e-06, | |
| "loss": 0.1002, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 8.77, | |
| "learning_rate": 6.0975609756097564e-06, | |
| "loss": 0.0895, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 8.83, | |
| "learning_rate": 5.8072009291521495e-06, | |
| "loss": 0.0911, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "learning_rate": 5.516840882694541e-06, | |
| "loss": 0.157, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 8.94, | |
| "learning_rate": 5.226480836236934e-06, | |
| "loss": 0.1513, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 4.936120789779326e-06, | |
| "loss": 0.1189, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.966183574879227, | |
| "eval_loss": 0.07211676239967346, | |
| "eval_runtime": 8.3308, | |
| "eval_samples_per_second": 74.542, | |
| "eval_steps_per_second": 9.363, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 9.05, | |
| "learning_rate": 4.645760743321719e-06, | |
| "loss": 0.1239, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 9.11, | |
| "learning_rate": 4.355400696864112e-06, | |
| "loss": 0.1039, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 9.17, | |
| "learning_rate": 4.0650406504065046e-06, | |
| "loss": 0.1144, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 9.23, | |
| "learning_rate": 3.7746806039488972e-06, | |
| "loss": 0.1033, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 9.28, | |
| "learning_rate": 3.484320557491289e-06, | |
| "loss": 0.0829, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 9.34, | |
| "learning_rate": 3.193960511033682e-06, | |
| "loss": 0.1072, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "learning_rate": 2.9036004645760748e-06, | |
| "loss": 0.1257, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 9.46, | |
| "learning_rate": 2.613240418118467e-06, | |
| "loss": 0.0804, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 9.51, | |
| "learning_rate": 2.3228803716608596e-06, | |
| "loss": 0.119, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 9.57, | |
| "learning_rate": 2.0325203252032523e-06, | |
| "loss": 0.0765, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "learning_rate": 1.7421602787456445e-06, | |
| "loss": 0.1681, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 9.68, | |
| "learning_rate": 1.4518002322880374e-06, | |
| "loss": 0.1005, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 9.74, | |
| "learning_rate": 1.1614401858304298e-06, | |
| "loss": 0.1498, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 9.8, | |
| "learning_rate": 8.710801393728223e-07, | |
| "loss": 0.1365, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 9.86, | |
| "learning_rate": 5.807200929152149e-07, | |
| "loss": 0.122, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 9.91, | |
| "learning_rate": 2.9036004645760745e-07, | |
| "loss": 0.0906, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 9.97, | |
| "learning_rate": 0.0, | |
| "loss": 0.1594, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 9.97, | |
| "eval_accuracy": 0.9726247987117552, | |
| "eval_loss": 0.0668075904250145, | |
| "eval_runtime": 9.2098, | |
| "eval_samples_per_second": 67.428, | |
| "eval_steps_per_second": 8.469, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 9.97, | |
| "step": 1740, | |
| "total_flos": 1.3833876610752307e+18, | |
| "train_loss": 0.24886192696532983, | |
| "train_runtime": 1438.0658, | |
| "train_samples_per_second": 38.809, | |
| "train_steps_per_second": 1.21 | |
| } | |
| ], | |
| "max_steps": 1740, | |
| "num_train_epochs": 10, | |
| "total_flos": 1.3833876610752307e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |