| { | |
| "best_metric": 0.936, | |
| "best_model_checkpoint": "output/checkpoint-20000", | |
| "epoch": 8.0, | |
| "eval_steps": 500, | |
| "global_step": 20000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.3333333333333337e-06, | |
| "loss": 0.8125, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 6.6666666666666675e-06, | |
| "loss": 0.7599, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1e-05, | |
| "loss": 0.7309, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.3333333333333335e-05, | |
| "loss": 0.7104, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.6939, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2e-05, | |
| "loss": 0.6824, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.3333333333333336e-05, | |
| "loss": 0.686, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.666666666666667e-05, | |
| "loss": 0.6693, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 3e-05, | |
| "loss": 0.6436, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.6101, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.6666666666666666e-05, | |
| "loss": 0.493, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.993333333333333e-05, | |
| "loss": 0.4239, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.326666666666667e-05, | |
| "loss": 0.3391, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.66e-05, | |
| "loss": 0.3332, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.986666666666666e-05, | |
| "loss": 0.3966, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 5.32e-05, | |
| "loss": 0.3137, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 5.653333333333333e-05, | |
| "loss": 0.3072, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 5.986666666666667e-05, | |
| "loss": 0.272, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 6.32e-05, | |
| "loss": 0.2715, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 6.653333333333333e-05, | |
| "loss": 0.2892, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 6.986666666666667e-05, | |
| "loss": 0.28, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 7.32e-05, | |
| "loss": 0.3102, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 7.653333333333333e-05, | |
| "loss": 0.2663, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 7.986666666666667e-05, | |
| "loss": 0.2778, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 8.32e-05, | |
| "loss": 0.3414, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 8.653333333333334e-05, | |
| "loss": 0.2608, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 8.986666666666666e-05, | |
| "loss": 0.2847, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 9.32e-05, | |
| "loss": 0.2801, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 9.653333333333334e-05, | |
| "loss": 0.2777, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 9.986666666666668e-05, | |
| "loss": 0.2819, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.0001032, | |
| "loss": 0.2631, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00010653333333333333, | |
| "loss": 0.255, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00010986666666666668, | |
| "loss": 0.3253, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.0001132, | |
| "loss": 0.2632, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00011653333333333334, | |
| "loss": 0.2808, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00011986666666666666, | |
| "loss": 0.2487, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0001232, | |
| "loss": 0.1958, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00012653333333333334, | |
| "loss": 0.2882, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00012986666666666666, | |
| "loss": 0.2877, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0001332, | |
| "loss": 0.2412, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00013653333333333334, | |
| "loss": 0.2608, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00013986666666666666, | |
| "loss": 0.2296, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00014319999999999998, | |
| "loss": 0.2311, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00014653333333333334, | |
| "loss": 0.2603, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00014986666666666669, | |
| "loss": 0.2923, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.0001532, | |
| "loss": 0.2415, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00015646666666666668, | |
| "loss": 0.2671, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.0001598, | |
| "loss": 0.2421, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00016313333333333333, | |
| "loss": 0.2449, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00016646666666666668, | |
| "loss": 0.2399, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9037, | |
| "eval_loss": 0.2539164125919342, | |
| "eval_runtime": 132.4594, | |
| "eval_samples_per_second": 75.495, | |
| "eval_steps_per_second": 4.718, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.0001698, | |
| "loss": 0.2453, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.00017313333333333333, | |
| "loss": 0.2485, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.00017646666666666666, | |
| "loss": 0.2321, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00017979999999999998, | |
| "loss": 0.239, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.00018313333333333336, | |
| "loss": 0.2389, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.00018646666666666668, | |
| "loss": 0.2216, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.0001898, | |
| "loss": 0.3027, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.00019313333333333333, | |
| "loss": 0.2683, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.00019646666666666668, | |
| "loss": 0.2378, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.0001998, | |
| "loss": 0.2598, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.00020313333333333333, | |
| "loss": 0.2363, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 0.00020646666666666665, | |
| "loss": 0.2437, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.0002098, | |
| "loss": 0.244, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 0.00021313333333333335, | |
| "loss": 0.2264, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.00021646666666666668, | |
| "loss": 0.188, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 0.0002198, | |
| "loss": 0.2507, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.00022313333333333333, | |
| "loss": 0.2265, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 0.00022646666666666668, | |
| "loss": 0.2265, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 0.0002298, | |
| "loss": 0.226, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.00023313333333333333, | |
| "loss": 0.2502, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.00023646666666666665, | |
| "loss": 0.2436, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 0.00023980000000000003, | |
| "loss": 0.2472, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.00024313333333333335, | |
| "loss": 0.2596, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 0.0002464666666666667, | |
| "loss": 0.2381, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 0.0002498, | |
| "loss": 0.231, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 0.0002531333333333333, | |
| "loss": 0.2489, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 0.0002564666666666667, | |
| "loss": 0.1849, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 0.00025979999999999997, | |
| "loss": 0.2742, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 0.00026313333333333335, | |
| "loss": 0.2268, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 0.0002664666666666667, | |
| "loss": 0.287, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.0002698, | |
| "loss": 0.208, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 0.0002731333333333333, | |
| "loss": 0.2446, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 0.0002764666666666667, | |
| "loss": 0.2359, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 0.0002798, | |
| "loss": 0.2917, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 0.00028313333333333335, | |
| "loss": 0.2051, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.00028646666666666667, | |
| "loss": 0.2119, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 0.0002898, | |
| "loss": 0.1946, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 0.0002931333333333334, | |
| "loss": 0.2159, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.00029646666666666664, | |
| "loss": 0.2793, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 0.0002998, | |
| "loss": 0.2675, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 0.0003031333333333333, | |
| "loss": 0.1927, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.00030646666666666667, | |
| "loss": 0.177, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 0.00030980000000000005, | |
| "loss": 0.2681, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 0.00031306666666666667, | |
| "loss": 0.2123, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 0.00031640000000000005, | |
| "loss": 0.2232, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 0.0003197333333333333, | |
| "loss": 0.222, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 0.0003230666666666667, | |
| "loss": 0.2126, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 0.0003264, | |
| "loss": 0.2268, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 0.00032973333333333334, | |
| "loss": 0.2239, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 0.00033306666666666667, | |
| "loss": 0.2454, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9064, | |
| "eval_loss": 0.27525001764297485, | |
| "eval_runtime": 132.3865, | |
| "eval_samples_per_second": 75.536, | |
| "eval_steps_per_second": 4.721, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 0.0003364, | |
| "loss": 0.2001, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 0.0003397333333333333, | |
| "loss": 0.1839, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 0.0003430666666666667, | |
| "loss": 0.2544, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 0.0003464, | |
| "loss": 0.1747, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 0.00034973333333333334, | |
| "loss": 0.1857, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 0.00035306666666666667, | |
| "loss": 0.2486, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 0.0003564, | |
| "loss": 0.1884, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 0.00035973333333333337, | |
| "loss": 0.2118, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 0.00036306666666666664, | |
| "loss": 0.2187, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 0.0003664, | |
| "loss": 0.2032, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 0.00036973333333333334, | |
| "loss": 0.1931, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 0.00037306666666666666, | |
| "loss": 0.2272, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 0.00037640000000000004, | |
| "loss": 0.2196, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 0.0003797333333333333, | |
| "loss": 0.1778, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 0.0003830666666666667, | |
| "loss": 0.1637, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 0.0003864, | |
| "loss": 0.1595, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 0.00038973333333333334, | |
| "loss": 0.2069, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 0.00039306666666666666, | |
| "loss": 0.2338, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 0.0003964, | |
| "loss": 0.2159, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 0.00039973333333333336, | |
| "loss": 0.2325, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 0.0004030666666666667, | |
| "loss": 0.2506, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 0.0004064, | |
| "loss": 0.2129, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 0.00040973333333333334, | |
| "loss": 0.2239, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 0.0004130666666666667, | |
| "loss": 0.231, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 0.0004164, | |
| "loss": 0.2157, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 0.00041973333333333336, | |
| "loss": 0.2642, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 0.00042306666666666663, | |
| "loss": 0.2308, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 0.0004264, | |
| "loss": 0.2229, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 0.0004297333333333334, | |
| "loss": 0.2231, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 0.00043306666666666666, | |
| "loss": 0.1866, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 0.00043640000000000004, | |
| "loss": 0.1999, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 0.0004397333333333333, | |
| "loss": 0.2566, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 0.0004430666666666667, | |
| "loss": 0.2168, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 0.0004464, | |
| "loss": 0.2551, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 0.00044973333333333333, | |
| "loss": 0.1921, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 0.00045306666666666666, | |
| "loss": 0.2241, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 0.0004564, | |
| "loss": 0.2359, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 0.00045966666666666665, | |
| "loss": 0.2348, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 0.00046300000000000003, | |
| "loss": 0.2228, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 0.00046633333333333336, | |
| "loss": 0.2302, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 0.0004696666666666667, | |
| "loss": 0.2455, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 0.000473, | |
| "loss": 0.2731, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 0.00047633333333333333, | |
| "loss": 0.2365, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 0.0004796666666666667, | |
| "loss": 0.2554, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 0.000483, | |
| "loss": 0.2386, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 0.00048633333333333335, | |
| "loss": 0.1938, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 0.0004896666666666667, | |
| "loss": 0.23, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 0.0004930000000000001, | |
| "loss": 0.2151, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 0.0004963333333333333, | |
| "loss": 0.2073, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 0.0004996666666666667, | |
| "loss": 0.2251, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9167, | |
| "eval_loss": 0.24363669753074646, | |
| "eval_runtime": 132.3921, | |
| "eval_samples_per_second": 75.533, | |
| "eval_steps_per_second": 4.721, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 0.0004999918425141952, | |
| "loss": 0.1875, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 0.0004999636444812977, | |
| "loss": 0.2006, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 0.0004999153074629148, | |
| "loss": 0.2359, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 0.0004998468353534468, | |
| "loss": 0.1878, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 0.0004997582336695312, | |
| "loss": 0.2309, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 0.0004996495095495983, | |
| "loss": 0.2101, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 0.0004995206717532957, | |
| "loss": 0.2599, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 0.0004993717306607832, | |
| "loss": 0.2405, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 0.0004992026982718955, | |
| "loss": 0.2082, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 0.0004990135882051766, | |
| "loss": 0.2071, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 0.0004988044156967816, | |
| "loss": 0.1868, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 0.0004985751975992497, | |
| "loss": 0.2029, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 0.0004983259523801463, | |
| "loss": 0.2113, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 0.000498056700120575, | |
| "loss": 0.2172, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 0.0004977674625135598, | |
| "loss": 0.1988, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 0.0004974582628622975, | |
| "loss": 0.2154, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 0.0004971291260782797, | |
| "loss": 0.2122, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 0.0004967800786792866, | |
| "loss": 0.1891, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 0.0004964111487872495, | |
| "loss": 0.2461, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 0.000496022366125986, | |
| "loss": 0.1967, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 0.0004956137620188048, | |
| "loss": 0.2278, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 0.000495185369385982, | |
| "loss": 0.2029, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 0.0004947372227421084, | |
| "loss": 0.2094, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 0.0004942693581933101, | |
| "loss": 0.2701, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 0.000493781813434338, | |
| "loss": 0.1984, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 0.0004932746277455317, | |
| "loss": 0.2277, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 0.0004927478419896543, | |
| "loss": 0.1903, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 0.0004922014986086007, | |
| "loss": 0.1693, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 0.0004916356416199778, | |
| "loss": 0.2198, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 0.0004910503166135578, | |
| "loss": 0.1728, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 0.000490445570747606, | |
| "loss": 0.2523, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 0.0004898214527450807, | |
| "loss": 0.2319, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 0.0004891910707082325, | |
| "loss": 0.2402, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "learning_rate": 0.0004885287457226171, | |
| "loss": 0.1891, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 0.00048784720303453175, | |
| "loss": 0.1828, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 0.0004871464975542746, | |
| "loss": 0.2131, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 0.0004864266857360456, | |
| "loss": 0.1854, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 0.00048568782557339713, | |
| "loss": 0.1927, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 0.0004849299765945623, | |
| "loss": 0.206, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 0.00048415319985765877, | |
| "loss": 0.2008, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 0.0004833575579457691, | |
| "loss": 0.1961, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 0.000482543114961899, | |
| "loss": 0.1752, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 0.00048170993652381247, | |
| "loss": 0.1751, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 0.00048085808975874506, | |
| "loss": 0.2128, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 0.000479987643297996, | |
| "loss": 0.188, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 0.0004790986672713982, | |
| "loss": 0.227, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 0.0004781912333016683, | |
| "loss": 0.1947, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 0.00047726541449863636, | |
| "loss": 0.2057, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 0.00047632128545335527, | |
| "loss": 0.195, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 0.0004753589222320913, | |
| "loss": 0.1996, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9246, | |
| "eval_loss": 0.2271093726158142, | |
| "eval_runtime": 132.5243, | |
| "eval_samples_per_second": 75.458, | |
| "eval_steps_per_second": 4.716, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 0.00047437840237019567, | |
| "loss": 0.1785, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 0.00047337980486585735, | |
| "loss": 0.2067, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 0.00047236321017373893, | |
| "loss": 0.1896, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 0.00047132870019849415, | |
| "loss": 0.1719, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 0.00047027635828816915, | |
| "loss": 0.1836, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 0.0004692062692274873, | |
| "loss": 0.1785, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 0.0004681185192310183, | |
| "loss": 0.2045, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 0.0004670131959362323, | |
| "loss": 0.175, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 0.00046589038839643865, | |
| "loss": 0.1609, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 0.0004647501870736115, | |
| "loss": 0.2075, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 0.0004635926838311014, | |
| "loss": 0.1618, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 0.0004624179719262341, | |
| "loss": 0.2275, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 0.0004612261460027968, | |
| "loss": 0.1881, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 0.0004600173020834132, | |
| "loss": 0.1863, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 0.00045879153756180714, | |
| "loss": 0.1765, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 0.00045754895119495576, | |
| "loss": 0.1964, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 0.00045628964309513267, | |
| "loss": 0.2081, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 0.00045501371472184257, | |
| "loss": 0.1695, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 0.0004537212688736466, | |
| "loss": 0.2042, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 0.00045241240967988015, | |
| "loss": 0.2003, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 0.0004510872425922635, | |
| "loss": 0.2015, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 0.0004497458743764057, | |
| "loss": 0.1409, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 0.00044838841310320266, | |
| "loss": 0.1446, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 0.0004470149681401305, | |
| "loss": 0.1834, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 0.0004456256501424335, | |
| "loss": 0.1908, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 0.0004442205710442094, | |
| "loss": 0.1727, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 0.0004427998440493908, | |
| "loss": 0.1853, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 0.00044139246031239603, | |
| "loss": 0.2239, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 0.0004399410893820698, | |
| "loss": 0.1986, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 0.00043847441534296505, | |
| "loss": 0.1802, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 0.00043699255636157463, | |
| "loss": 0.1795, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 0.0004354956318278065, | |
| "loss": 0.1828, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 0.000433983762345365, | |
| "loss": 0.1806, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 0.00043245706972203383, | |
| "loss": 0.1666, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 0.00043091567695986265, | |
| "loss": 0.1972, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 0.0004293597082452566, | |
| "loss": 0.1707, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 0.0004277892889389715, | |
| "loss": 0.1942, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 0.0004262045455660132, | |
| "loss": 0.192, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 0.0004246056058054444, | |
| "loss": 0.2138, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 0.000422992598480097, | |
| "loss": 0.2031, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 0.0004213656535461942, | |
| "loss": 0.1608, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 0.0004197249020828794, | |
| "loss": 0.2179, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 0.00041807047628165576, | |
| "loss": 0.2141, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 0.00041640250943573574, | |
| "loss": 0.187, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 0.0004147211359293023, | |
| "loss": 0.1854, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 0.00041302649122668155, | |
| "loss": 0.1687, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 0.00041131871186142867, | |
| "loss": 0.1844, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 0.00040959793542532784, | |
| "loss": 0.2203, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 0.0004078643005573071, | |
| "loss": 0.1733, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 0.00040611794693226795, | |
| "loss": 0.1845, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9269, | |
| "eval_loss": 0.21157951653003693, | |
| "eval_runtime": 132.481, | |
| "eval_samples_per_second": 75.483, | |
| "eval_steps_per_second": 4.718, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "learning_rate": 0.00040435901524983245, | |
| "loss": 0.1696, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "learning_rate": 0.00040258764722300727, | |
| "loss": 0.1566, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 0.00040080398556676625, | |
| "loss": 0.1533, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "learning_rate": 0.00039900817398655223, | |
| "loss": 0.1357, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 0.0003972003571666988, | |
| "loss": 0.1493, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 0.00039538068075877376, | |
| "loss": 0.2111, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 0.0003935492913698441, | |
| "loss": 0.1651, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "learning_rate": 0.00039170633655066415, | |
| "loss": 0.1535, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 5.18, | |
| "learning_rate": 0.000389851964783788, | |
| "loss": 0.1618, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "learning_rate": 0.00038798632547160653, | |
| "loss": 0.1672, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 0.00038610956892431015, | |
| "loss": 0.1675, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "learning_rate": 0.0003842218463477791, | |
| "loss": 0.1482, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "learning_rate": 0.0003823233098314008, | |
| "loss": 0.1637, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 0.00038041411233581636, | |
| "loss": 0.1497, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "learning_rate": 0.0003784944076805974, | |
| "loss": 0.1742, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "learning_rate": 0.00037656435053185226, | |
| "loss": 0.1357, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "learning_rate": 0.0003746240963897659, | |
| "loss": 0.1938, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 0.000372673801576071, | |
| "loss": 0.1506, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 0.0003707136232214534, | |
| "loss": 0.1912, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 0.0003687437192528932, | |
| "loss": 0.1429, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 0.0003667642483809398, | |
| "loss": 0.1579, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 0.0003647753700869262, | |
| "loss": 0.1754, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "learning_rate": 0.00036277724461011905, | |
| "loss": 0.1572, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "learning_rate": 0.0003607700329348088, | |
| "loss": 0.1964, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "learning_rate": 0.00035875389677734, | |
| "loss": 0.1813, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "learning_rate": 0.0003567289985730813, | |
| "loss": 0.1673, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 0.00035469550146333947, | |
| "loss": 0.1418, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 0.0003526535692822146, | |
| "loss": 0.1536, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "learning_rate": 0.0003506033665434007, | |
| "loss": 0.1923, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 0.0003485450584269317, | |
| "loss": 0.1772, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 0.0003464788107658724, | |
| "loss": 0.152, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "learning_rate": 0.000344446345543944, | |
| "loss": 0.1638, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 0.00034236486931606976, | |
| "loss": 0.195, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 0.00034027595146696936, | |
| "loss": 0.1738, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 0.0003381797602958623, | |
| "loss": 0.1539, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "learning_rate": 0.00033607646468796263, | |
| "loss": 0.1749, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 0.0003339662341008717, | |
| "loss": 0.1713, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 0.0003318492385509267, | |
| "loss": 0.151, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "learning_rate": 0.00032972564859950137, | |
| "loss": 0.1818, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 0.0003275956353392653, | |
| "loss": 0.1499, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "learning_rate": 0.00032545937038039904, | |
| "loss": 0.1601, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 0.00032331702583676745, | |
| "loss": 0.1754, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "learning_rate": 0.0003211687743120539, | |
| "loss": 0.1466, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "learning_rate": 0.0003190147888858529, | |
| "loss": 0.1606, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "learning_rate": 0.0003168552430997262, | |
| "loss": 0.1654, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 0.0003146903109432208, | |
| "loss": 0.166, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 0.0003125201668398506, | |
| "loss": 0.1513, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "learning_rate": 0.0003103449856330443, | |
| "loss": 0.1736, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 0.0003081649425720579, | |
| "loss": 0.2036, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 0.0003059802132978558, | |
| "loss": 0.205, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.9312, | |
| "eval_loss": 0.19459494948387146, | |
| "eval_runtime": 132.3124, | |
| "eval_samples_per_second": 75.579, | |
| "eval_steps_per_second": 4.724, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 0.00030379097382895955, | |
| "loss": 0.1528, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "learning_rate": 0.0003015974005472664, | |
| "loss": 0.1183, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 0.0002993996701838391, | |
| "loss": 0.1508, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 0.0002971979598046662, | |
| "loss": 0.1454, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "learning_rate": 0.00029499244679639746, | |
| "loss": 0.1068, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 0.000292783308852051, | |
| "loss": 0.1451, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 6.14, | |
| "learning_rate": 0.0002905707239566978, | |
| "loss": 0.1349, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "learning_rate": 0.00028835487037312155, | |
| "loss": 0.1293, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "learning_rate": 0.00028613592662745665, | |
| "loss": 0.1029, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "learning_rate": 0.0002839140714948043, | |
| "loss": 0.1677, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 0.0002816894839848297, | |
| "loss": 0.137, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 0.000279462343327339, | |
| "loss": 0.1447, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "learning_rate": 0.00027723282895783963, | |
| "loss": 0.1268, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "learning_rate": 0.0002750011205030834, | |
| "loss": 0.1075, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "learning_rate": 0.00027276739776659444, | |
| "loss": 0.2144, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "learning_rate": 0.00027053184071418275, | |
| "loss": 0.1229, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "learning_rate": 0.00026829462945944505, | |
| "loss": 0.1633, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "learning_rate": 0.0002660559442492531, | |
| "loss": 0.1457, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "learning_rate": 0.0002638159654492318, | |
| "loss": 0.1168, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 0.00026157487352922783, | |
| "loss": 0.1294, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 0.0002593328490487688, | |
| "loss": 0.1347, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "learning_rate": 0.0002570900726425172, | |
| "loss": 0.1396, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "learning_rate": 0.00025484672500571576, | |
| "loss": 0.1444, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 0.0002526029868796305, | |
| "loss": 0.1418, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "learning_rate": 0.00025035903903698785, | |
| "loss": 0.1275, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 0.00024811506226741077, | |
| "loss": 0.131, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 6.54, | |
| "learning_rate": 0.000245916111203349, | |
| "loss": 0.0828, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 0.00024367261051914146, | |
| "loss": 0.1857, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "learning_rate": 0.00024142961961788734, | |
| "loss": 0.1173, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "learning_rate": 0.000239187319212111, | |
| "loss": 0.1757, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "learning_rate": 0.0002369458899587052, | |
| "loss": 0.1108, | |
| "step": 16550 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "learning_rate": 0.00023470551244437603, | |
| "loss": 0.1178, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 6.66, | |
| "learning_rate": 0.00023246636717109357, | |
| "loss": 0.1566, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "learning_rate": 0.0002302286345415488, | |
| "loss": 0.1369, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 0.00022799249484461954, | |
| "loss": 0.1529, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 0.0002257581282408449, | |
| "loss": 0.1445, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "learning_rate": 0.00022352571474790954, | |
| "loss": 0.1802, | |
| "step": 16850 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 0.000221295434226141, | |
| "loss": 0.1218, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "learning_rate": 0.00021906746636401824, | |
| "loss": 0.1158, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "learning_rate": 0.00021684199066369448, | |
| "loss": 0.1802, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "learning_rate": 0.0002146191864265354, | |
| "loss": 0.1179, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "learning_rate": 0.00021239923273867317, | |
| "loss": 0.1403, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "learning_rate": 0.00021018230845657804, | |
| "loss": 0.1483, | |
| "step": 17150 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 0.0002079685921926476, | |
| "loss": 0.1175, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 0.00020575826230081757, | |
| "loss": 0.1689, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "learning_rate": 0.00020355149686219123, | |
| "loss": 0.1084, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "learning_rate": 0.00020134847367069225, | |
| "loss": 0.1598, | |
| "step": 17350 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 0.0001991493702187403, | |
| "loss": 0.1352, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "learning_rate": 0.0001969543636829508, | |
| "loss": 0.1491, | |
| "step": 17450 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 0.00019476363090986011, | |
| "loss": 0.1352, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.9328, | |
| "eval_loss": 0.22326812148094177, | |
| "eval_runtime": 132.2934, | |
| "eval_samples_per_second": 75.59, | |
| "eval_steps_per_second": 4.724, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "learning_rate": 0.0001925773484016777, | |
| "loss": 0.0964, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 0.00019039569230206555, | |
| "loss": 0.1568, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 0.00018821883838194664, | |
| "loss": 0.0789, | |
| "step": 17650 | |
| }, | |
| { | |
| "epoch": 7.08, | |
| "learning_rate": 0.00018604696202534368, | |
| "loss": 0.0736, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 0.00018388023821524889, | |
| "loss": 0.1445, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "learning_rate": 0.0001817188415195257, | |
| "loss": 0.0839, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 0.00017956294607684436, | |
| "loss": 0.161, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 7.16, | |
| "learning_rate": 0.0001774127255826522, | |
| "loss": 0.1418, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "learning_rate": 0.0001752683532751792, | |
| "loss": 0.1222, | |
| "step": 17950 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 0.0001731300019214805, | |
| "loss": 0.1034, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 7.22, | |
| "learning_rate": 0.00017099784380351728, | |
| "loss": 0.114, | |
| "step": 18050 | |
| }, | |
| { | |
| "epoch": 7.24, | |
| "learning_rate": 0.0001688720507042762, | |
| "loss": 0.0874, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 7.26, | |
| "learning_rate": 0.00016675279389392905, | |
| "loss": 0.1183, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "learning_rate": 0.00016464024411603435, | |
| "loss": 0.1099, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 0.0001625345715737808, | |
| "loss": 0.1183, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "learning_rate": 0.00016043594591627392, | |
| "loss": 0.1456, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "learning_rate": 0.0001583445362248687, | |
| "loss": 0.1185, | |
| "step": 18350 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "learning_rate": 0.00015630211804928086, | |
| "loss": 0.0823, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 7.38, | |
| "learning_rate": 0.00015422549250609542, | |
| "loss": 0.1193, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "learning_rate": 0.0001521565832906994, | |
| "loss": 0.0982, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 7.42, | |
| "learning_rate": 0.00015009555709026364, | |
| "loss": 0.1538, | |
| "step": 18550 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "learning_rate": 0.00014804257995684273, | |
| "loss": 0.1161, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "learning_rate": 0.00014599781729399667, | |
| "loss": 0.0928, | |
| "step": 18650 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "learning_rate": 0.0001439614338434649, | |
| "loss": 0.1397, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 0.00014193359367189345, | |
| "loss": 0.1524, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "learning_rate": 0.0001399144601576161, | |
| "loss": 0.1006, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 7.54, | |
| "learning_rate": 0.000137904195977492, | |
| "loss": 0.1066, | |
| "step": 18850 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "learning_rate": 0.00013590296309379834, | |
| "loss": 0.1155, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 7.58, | |
| "learning_rate": 0.00013391092274118256, | |
| "loss": 0.09, | |
| "step": 18950 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "learning_rate": 0.00013192823541367078, | |
| "loss": 0.0766, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 7.62, | |
| "learning_rate": 0.000129955060851738, | |
| "loss": 0.1123, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "learning_rate": 0.00012799155802943818, | |
| "loss": 0.0839, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "learning_rate": 0.00012603788514159553, | |
| "loss": 0.1349, | |
| "step": 19150 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "learning_rate": 0.0001240941995910598, | |
| "loss": 0.1229, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 7.7, | |
| "learning_rate": 0.0001221606579760241, | |
| "loss": 0.1318, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "learning_rate": 0.00012023741607740837, | |
| "loss": 0.124, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "learning_rate": 0.00011832462884630888, | |
| "loss": 0.1481, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "learning_rate": 0.00011642245039151345, | |
| "loss": 0.1024, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 7.78, | |
| "learning_rate": 0.0001145310339670857, | |
| "loss": 0.0985, | |
| "step": 19450 | |
| }, | |
| { | |
| "epoch": 7.8, | |
| "learning_rate": 0.00011265053196001793, | |
| "loss": 0.1173, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "learning_rate": 0.0001107810958779531, | |
| "loss": 0.1126, | |
| "step": 19550 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "learning_rate": 0.00010892287633697847, | |
| "loss": 0.0899, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 7.86, | |
| "learning_rate": 0.00010707602304949115, | |
| "loss": 0.1293, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 7.88, | |
| "learning_rate": 0.0001052406848121357, | |
| "loss": 0.094, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "learning_rate": 0.00010341700949381593, | |
| "loss": 0.1207, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "learning_rate": 0.0001016051440237816, | |
| "loss": 0.116, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "learning_rate": 9.980523437979085e-05, | |
| "loss": 0.145, | |
| "step": 19850 | |
| }, | |
| { | |
| "epoch": 7.96, | |
| "learning_rate": 9.801742557634872e-05, | |
| "loss": 0.0873, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "learning_rate": 9.624186165302378e-05, | |
| "loss": 0.1202, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 9.447868566284355e-05, | |
| "loss": 0.1306, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.936, | |
| "eval_loss": 0.22573316097259521, | |
| "eval_runtime": 132.6149, | |
| "eval_samples_per_second": 75.406, | |
| "eval_steps_per_second": 4.713, | |
| "step": 20000 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 25000, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "total_flos": 4.667185901556672e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |