| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0993816733561657, | |
| "global_step": 50000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4e-06, | |
| "loss": 10.195, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 8e-06, | |
| "loss": 8.4977, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.2e-05, | |
| "loss": 7.7115, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.6e-05, | |
| "loss": 7.0073, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2e-05, | |
| "loss": 6.386, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.4e-05, | |
| "loss": 5.9934, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.8e-05, | |
| "loss": 5.7449, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 3.2e-05, | |
| "loss": 5.5865, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 3.6e-05, | |
| "loss": 5.4709, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4e-05, | |
| "loss": 5.3401, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.4e-05, | |
| "loss": 5.2552, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.8e-05, | |
| "loss": 5.165, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 5.2e-05, | |
| "loss": 5.0858, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 5.6e-05, | |
| "loss": 4.9837, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 6e-05, | |
| "loss": 4.9136, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 6.4e-05, | |
| "loss": 4.8214, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 6.800000000000001e-05, | |
| "loss": 4.7654, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 7.2e-05, | |
| "loss": 4.693, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 7.6e-05, | |
| "loss": 4.6122, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 8e-05, | |
| "loss": 4.5828, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 8.400000000000001e-05, | |
| "loss": 4.5066, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 8.8e-05, | |
| "loss": 4.4385, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 9.2e-05, | |
| "loss": 4.4198, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 9.6e-05, | |
| "loss": 4.3611, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0001, | |
| "loss": 4.3158, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.000104, | |
| "loss": 4.2597, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.000108, | |
| "loss": 4.2201, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.000112, | |
| "loss": 4.1731, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00011600000000000001, | |
| "loss": 4.1267, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00012, | |
| "loss": 4.0979, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.000124, | |
| "loss": 4.0423, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.000128, | |
| "loss": 4.0117, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.000132, | |
| "loss": 3.9583, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00013600000000000003, | |
| "loss": 3.9295, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00014000000000000001, | |
| "loss": 3.898, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.000144, | |
| "loss": 3.8391, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.000148, | |
| "loss": 3.8254, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.000152, | |
| "loss": 3.784, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.000156, | |
| "loss": 3.7579, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00016, | |
| "loss": 3.7455, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.000164, | |
| "loss": 3.6903, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00016800000000000002, | |
| "loss": 3.6621, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00017199999999999998, | |
| "loss": 3.6469, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.000176, | |
| "loss": 3.6307, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00017999999999999998, | |
| "loss": 3.5867, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.000184, | |
| "loss": 3.5759, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00018800000000000002, | |
| "loss": 3.5661, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.000192, | |
| "loss": 3.5462, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00019600000000000002, | |
| "loss": 3.5015, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0002, | |
| "loss": 3.4948, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.000204, | |
| "loss": 3.4657, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.000208, | |
| "loss": 3.4767, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.000212, | |
| "loss": 3.436, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.000216, | |
| "loss": 3.4037, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00022, | |
| "loss": 3.3969, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.000224, | |
| "loss": 3.3928, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.000228, | |
| "loss": 3.37, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00023200000000000003, | |
| "loss": 3.3616, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.000236, | |
| "loss": 3.3497, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00024, | |
| "loss": 3.3297, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.000244, | |
| "loss": 3.3118, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.000248, | |
| "loss": 3.3046, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.000252, | |
| "loss": 3.2869, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.000256, | |
| "loss": 3.2808, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00026000000000000003, | |
| "loss": 3.258, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.000264, | |
| "loss": 3.2364, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.000268, | |
| "loss": 3.2288, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00027200000000000005, | |
| "loss": 3.2161, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00027600000000000004, | |
| "loss": 3.2074, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00028000000000000003, | |
| "loss": 3.197, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00028399999999999996, | |
| "loss": 3.1904, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.000288, | |
| "loss": 3.1833, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.000292, | |
| "loss": 3.176, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.000296, | |
| "loss": 3.1737, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0003, | |
| "loss": 3.1608, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.000304, | |
| "loss": 3.1385, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.000308, | |
| "loss": 3.1352, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.000312, | |
| "loss": 3.1349, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.000316, | |
| "loss": 3.1161, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00032, | |
| "loss": 3.1061, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.000324, | |
| "loss": 3.1074, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.000328, | |
| "loss": 3.0859, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00033200000000000005, | |
| "loss": 3.0818, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00033600000000000004, | |
| "loss": 3.0857, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00034, | |
| "loss": 3.0687, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00034399999999999996, | |
| "loss": 3.0619, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.000348, | |
| "loss": 3.0348, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.000352, | |
| "loss": 3.0381, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.000356, | |
| "loss": 3.0196, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00035999999999999997, | |
| "loss": 3.0301, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.000364, | |
| "loss": 3.0148, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.000368, | |
| "loss": 3.0309, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.000372, | |
| "loss": 3.0174, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00037600000000000003, | |
| "loss": 3.0017, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00038, | |
| "loss": 2.9999, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.000384, | |
| "loss": 2.9966, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.000388, | |
| "loss": 2.9843, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00039200000000000004, | |
| "loss": 2.9854, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00039600000000000003, | |
| "loss": 2.9827, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.0004, | |
| "loss": 2.965, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.000404, | |
| "loss": 2.9811, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.000408, | |
| "loss": 2.9534, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.000412, | |
| "loss": 2.9656, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.000416, | |
| "loss": 2.9361, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00042, | |
| "loss": 2.9461, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.000424, | |
| "loss": 2.9473, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.000428, | |
| "loss": 2.9238, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.000432, | |
| "loss": 2.9323, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.000436, | |
| "loss": 2.9481, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00044, | |
| "loss": 2.9187, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.000444, | |
| "loss": 2.9166, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.000448, | |
| "loss": 2.9183, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00045200000000000004, | |
| "loss": 2.9118, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.000456, | |
| "loss": 2.9008, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00046, | |
| "loss": 2.8965, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00046400000000000006, | |
| "loss": 2.8985, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00046800000000000005, | |
| "loss": 2.9029, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.000472, | |
| "loss": 2.8906, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00047599999999999997, | |
| "loss": 2.9007, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00048, | |
| "loss": 2.8885, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.000484, | |
| "loss": 2.8926, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.000488, | |
| "loss": 2.8688, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.000492, | |
| "loss": 2.8697, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.000496, | |
| "loss": 2.8661, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0005, | |
| "loss": 2.8604, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.000504, | |
| "loss": 2.8673, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.000508, | |
| "loss": 2.8675, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.000512, | |
| "loss": 2.8707, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0005160000000000001, | |
| "loss": 2.8666, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0005200000000000001, | |
| "loss": 2.85, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.000524, | |
| "loss": 2.8556, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.000528, | |
| "loss": 2.8575, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.000532, | |
| "loss": 2.8361, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.000536, | |
| "loss": 2.8411, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00054, | |
| "loss": 2.8409, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0005440000000000001, | |
| "loss": 2.8363, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.0005480000000000001, | |
| "loss": 2.8257, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0005520000000000001, | |
| "loss": 2.8399, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0005560000000000001, | |
| "loss": 2.8253, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.0005600000000000001, | |
| "loss": 2.8277, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.0005639999999999999, | |
| "loss": 2.834, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.0005679999999999999, | |
| "loss": 2.831, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.0005719999999999999, | |
| "loss": 2.8291, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.000576, | |
| "loss": 2.8085, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00058, | |
| "loss": 2.8219, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.000584, | |
| "loss": 2.7945, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.000588, | |
| "loss": 2.8197, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.000592, | |
| "loss": 2.8132, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.000596, | |
| "loss": 2.8157, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.0006, | |
| "loss": 2.8101, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.000604, | |
| "loss": 2.8123, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.000608, | |
| "loss": 2.8017, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.000612, | |
| "loss": 2.8047, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.000616, | |
| "loss": 2.8224, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00062, | |
| "loss": 2.8159, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.000624, | |
| "loss": 2.8053, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.000628, | |
| "loss": 2.8039, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.000632, | |
| "loss": 2.7909, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.0006360000000000001, | |
| "loss": 2.8059, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00064, | |
| "loss": 2.8008, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.000644, | |
| "loss": 2.8071, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.000648, | |
| "loss": 2.7936, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.000652, | |
| "loss": 2.7817, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.000656, | |
| "loss": 2.8028, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00066, | |
| "loss": 2.7742, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.0006640000000000001, | |
| "loss": 2.7892, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.0006680000000000001, | |
| "loss": 2.7957, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.0006720000000000001, | |
| "loss": 2.7993, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.0006760000000000001, | |
| "loss": 2.7669, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.00068, | |
| "loss": 2.7788, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.000684, | |
| "loss": 2.7812, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.0006879999999999999, | |
| "loss": 2.7659, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.000692, | |
| "loss": 2.7729, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.000696, | |
| "loss": 2.7875, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.0007, | |
| "loss": 2.7876, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.000704, | |
| "loss": 2.7802, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.000708, | |
| "loss": 2.7826, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.000712, | |
| "loss": 2.7742, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.000716, | |
| "loss": 2.7578, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.0007199999999999999, | |
| "loss": 2.7884, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.000724, | |
| "loss": 2.7712, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.000728, | |
| "loss": 2.7708, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.000732, | |
| "loss": 2.771, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.000736, | |
| "loss": 2.7691, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.00074, | |
| "loss": 2.7716, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.000744, | |
| "loss": 2.7658, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.000748, | |
| "loss": 2.7614, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.0007520000000000001, | |
| "loss": 2.7769, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.000756, | |
| "loss": 2.7717, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.00076, | |
| "loss": 2.7608, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.000764, | |
| "loss": 2.7644, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.000768, | |
| "loss": 2.7791, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.000772, | |
| "loss": 2.77, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.000776, | |
| "loss": 2.7608, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.0007800000000000001, | |
| "loss": 2.7697, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.0007840000000000001, | |
| "loss": 2.7597, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.0007880000000000001, | |
| "loss": 2.7686, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 0.0007920000000000001, | |
| "loss": 2.7595, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 0.000796, | |
| "loss": 2.745, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 0.0008, | |
| "loss": 2.7619, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 0.000804, | |
| "loss": 2.7535, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 0.000808, | |
| "loss": 2.7519, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.0008120000000000001, | |
| "loss": 2.7566, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.000816, | |
| "loss": 2.7683, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 0.00082, | |
| "loss": 2.7643, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 0.000824, | |
| "loss": 2.7591, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 0.000828, | |
| "loss": 2.7513, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 0.000832, | |
| "loss": 2.7585, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.0008359999999999999, | |
| "loss": 2.7608, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.00084, | |
| "loss": 2.7324, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 0.000844, | |
| "loss": 2.7575, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 0.000848, | |
| "loss": 2.7659, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 0.000852, | |
| "loss": 2.7628, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 0.000856, | |
| "loss": 2.7476, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 0.00086, | |
| "loss": 2.7348, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.000864, | |
| "loss": 2.7582, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.0008680000000000001, | |
| "loss": 2.7569, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.000872, | |
| "loss": 2.754, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 0.000876, | |
| "loss": 2.7571, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 0.00088, | |
| "loss": 2.7427, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 0.000884, | |
| "loss": 2.7657, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 0.000888, | |
| "loss": 2.7527, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 0.000892, | |
| "loss": 2.7499, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 0.000896, | |
| "loss": 2.7459, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 0.0009000000000000001, | |
| "loss": 2.7472, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.0009040000000000001, | |
| "loss": 2.7761, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 0.0009080000000000001, | |
| "loss": 2.7421, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 0.000912, | |
| "loss": 2.7459, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.000916, | |
| "loss": 2.7594, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 0.00092, | |
| "loss": 2.7552, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 0.000924, | |
| "loss": 2.7298, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 0.0009280000000000001, | |
| "loss": 2.7413, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 0.0009320000000000001, | |
| "loss": 2.7566, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 0.0009360000000000001, | |
| "loss": 2.7536, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.00094, | |
| "loss": 2.7431, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.000944, | |
| "loss": 2.7381, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 0.000948, | |
| "loss": 2.7428, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 0.0009519999999999999, | |
| "loss": 2.7352, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 0.0009559999999999999, | |
| "loss": 2.7462, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 0.00096, | |
| "loss": 2.7498, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 0.000964, | |
| "loss": 2.7327, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 0.000968, | |
| "loss": 2.7351, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 0.000972, | |
| "loss": 2.7368, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 0.000976, | |
| "loss": 2.7504, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 0.00098, | |
| "loss": 2.7456, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 0.000984, | |
| "loss": 2.7398, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 0.000988, | |
| "loss": 2.7282, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 0.000992, | |
| "loss": 2.7393, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 0.000996, | |
| "loss": 2.7343, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 0.001, | |
| "loss": 2.7518, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "eval_accuracy": 0.5213318080618469, | |
| "eval_loss": 2.5872762203216553, | |
| "eval_runtime": 6159.3531, | |
| "eval_samples_per_second": 35.29, | |
| "eval_steps_per_second": 2.206, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 0.000996, | |
| "loss": 2.7304, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 0.000992, | |
| "loss": 2.7407, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 0.000988, | |
| "loss": 2.7432, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 0.000984, | |
| "loss": 2.7605, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 0.00098, | |
| "loss": 2.7445, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 0.000976, | |
| "loss": 2.7514, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 0.000972, | |
| "loss": 2.7391, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 0.000968, | |
| "loss": 2.7188, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 0.000964, | |
| "loss": 2.7218, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 0.00096, | |
| "loss": 2.7134, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.0009559999999999999, | |
| "loss": 2.7103, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.0009519999999999999, | |
| "loss": 2.755, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 0.000948, | |
| "loss": 2.7243, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 0.000944, | |
| "loss": 2.6953, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 0.00094, | |
| "loss": 2.7182, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 0.0009360000000000001, | |
| "loss": 2.7154, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 0.0009320000000000001, | |
| "loss": 2.7025, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 0.0009280000000000001, | |
| "loss": 2.6803, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.000924, | |
| "loss": 2.6936, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.00092, | |
| "loss": 2.7001, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 0.000916, | |
| "loss": 2.6993, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.000912, | |
| "loss": 2.6761, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.0009080000000000001, | |
| "loss": 2.6881, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 0.0009040000000000001, | |
| "loss": 2.6784, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 0.0009000000000000001, | |
| "loss": 2.6896, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 0.000896, | |
| "loss": 2.6848, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.000892, | |
| "loss": 2.6726, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.000888, | |
| "loss": 2.667, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 0.000884, | |
| "loss": 2.6761, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 0.00088, | |
| "loss": 2.6782, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 0.000876, | |
| "loss": 2.6735, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 0.000872, | |
| "loss": 2.6588, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 0.0008680000000000001, | |
| "loss": 2.668, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 0.000864, | |
| "loss": 2.6486, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 0.00086, | |
| "loss": 2.6698, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 0.000856, | |
| "loss": 2.6624, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.000852, | |
| "loss": 2.6426, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 0.000848, | |
| "loss": 2.6427, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 0.000844, | |
| "loss": 2.6428, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 0.00084, | |
| "loss": 2.6502, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 0.0008359999999999999, | |
| "loss": 2.6465, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.000832, | |
| "loss": 2.6323, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 0.000828, | |
| "loss": 2.6417, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 0.000824, | |
| "loss": 2.6398, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 0.00082, | |
| "loss": 2.624, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 0.000816, | |
| "loss": 2.6285, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.0008120000000000001, | |
| "loss": 2.6319, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 0.000808, | |
| "loss": 2.6353, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 0.000804, | |
| "loss": 2.6301, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 0.0008, | |
| "loss": 2.6278, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 0.000796, | |
| "loss": 2.6239, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 0.0007920000000000001, | |
| "loss": 2.6101, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 0.0007880000000000001, | |
| "loss": 2.62, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 0.0007840000000000001, | |
| "loss": 2.6216, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 0.0007800000000000001, | |
| "loss": 2.6152, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 0.000776, | |
| "loss": 2.6032, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 0.000772, | |
| "loss": 2.6224, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 0.000768, | |
| "loss": 2.6032, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 0.000764, | |
| "loss": 2.5919, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 0.00076, | |
| "loss": 2.5904, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 0.000756, | |
| "loss": 2.5956, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 0.0007520000000000001, | |
| "loss": 2.5912, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 0.000748, | |
| "loss": 2.5968, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 0.000744, | |
| "loss": 2.6009, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 0.00074, | |
| "loss": 2.5942, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 0.000736, | |
| "loss": 2.588, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 0.000732, | |
| "loss": 2.5748, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 0.000728, | |
| "loss": 2.5746, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 0.000724, | |
| "loss": 2.5879, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 0.0007199999999999999, | |
| "loss": 2.57, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 0.000716, | |
| "loss": 2.5722, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 0.000712, | |
| "loss": 2.5742, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 0.000708, | |
| "loss": 2.5778, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 0.000704, | |
| "loss": 2.5566, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 0.0007, | |
| "loss": 2.5584, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 0.000696, | |
| "loss": 2.5534, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 0.000692, | |
| "loss": 2.5508, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 0.0006879999999999999, | |
| "loss": 2.545, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 0.000684, | |
| "loss": 2.5651, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 0.00068, | |
| "loss": 2.5641, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 0.0006760000000000001, | |
| "loss": 2.5541, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 0.0006720000000000001, | |
| "loss": 2.5422, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 0.0006680000000000001, | |
| "loss": 2.538, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 0.0006640000000000001, | |
| "loss": 2.5718, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 0.00066, | |
| "loss": 2.5429, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 0.000656, | |
| "loss": 2.5339, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 0.000652, | |
| "loss": 2.5395, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 0.000648, | |
| "loss": 2.5152, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 0.000644, | |
| "loss": 2.5249, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 0.00064, | |
| "loss": 2.5356, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 0.0006360000000000001, | |
| "loss": 2.5249, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 0.000632, | |
| "loss": 2.5293, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 0.000628, | |
| "loss": 2.5128, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 0.000624, | |
| "loss": 2.5339, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 0.00062, | |
| "loss": 2.5242, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 0.000616, | |
| "loss": 2.519, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 0.000612, | |
| "loss": 2.5265, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 0.000608, | |
| "loss": 2.5162, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 0.000604, | |
| "loss": 2.5088, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 0.0006, | |
| "loss": 2.4965, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 0.000596, | |
| "loss": 2.5009, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 0.000592, | |
| "loss": 2.5142, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 0.000588, | |
| "loss": 2.5097, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 0.000584, | |
| "loss": 2.492, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 0.00058, | |
| "loss": 2.4913, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 0.000576, | |
| "loss": 2.5008, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 0.0005719999999999999, | |
| "loss": 2.4828, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 0.0005679999999999999, | |
| "loss": 2.493, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 0.0005639999999999999, | |
| "loss": 2.4934, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 0.0005600000000000001, | |
| "loss": 2.4966, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 0.0005560000000000001, | |
| "loss": 2.4813, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 0.0005520000000000001, | |
| "loss": 2.4881, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 0.0005480000000000001, | |
| "loss": 2.4766, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 0.0005440000000000001, | |
| "loss": 2.4717, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 0.00054, | |
| "loss": 2.4767, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 0.000536, | |
| "loss": 2.4731, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 0.000532, | |
| "loss": 2.4789, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 0.000528, | |
| "loss": 2.4785, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 0.000524, | |
| "loss": 2.4717, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 0.0005200000000000001, | |
| "loss": 2.4715, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 0.0005160000000000001, | |
| "loss": 2.4764, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 0.000512, | |
| "loss": 2.4633, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 0.000508, | |
| "loss": 2.4619, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 0.000504, | |
| "loss": 2.4614, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 0.0005, | |
| "loss": 2.4626, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 0.000496, | |
| "loss": 2.4524, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 0.000492, | |
| "loss": 2.4671, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 0.000488, | |
| "loss": 2.4685, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 0.000484, | |
| "loss": 2.4533, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 0.00048, | |
| "loss": 2.4287, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 0.00047599999999999997, | |
| "loss": 2.4472, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 0.000472, | |
| "loss": 2.4451, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 0.00046800000000000005, | |
| "loss": 2.4631, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 0.00046400000000000006, | |
| "loss": 2.4404, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 0.00046, | |
| "loss": 2.4413, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 0.000456, | |
| "loss": 2.4345, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 0.00045200000000000004, | |
| "loss": 2.4403, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 0.000448, | |
| "loss": 2.4408, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 0.000444, | |
| "loss": 2.4233, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 0.00044, | |
| "loss": 2.4415, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 0.000436, | |
| "loss": 2.4349, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 0.000432, | |
| "loss": 2.4287, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 0.000428, | |
| "loss": 2.4232, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 0.000424, | |
| "loss": 2.416, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 0.00042, | |
| "loss": 2.4277, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 0.000416, | |
| "loss": 2.4156, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 0.000412, | |
| "loss": 2.4263, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 0.000408, | |
| "loss": 2.4283, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 0.000404, | |
| "loss": 2.4172, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 0.0004, | |
| "loss": 2.4129, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 0.00039600000000000003, | |
| "loss": 2.4034, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 0.00039200000000000004, | |
| "loss": 2.4019, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 0.000388, | |
| "loss": 2.4149, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 0.000384, | |
| "loss": 2.395, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 0.00038, | |
| "loss": 2.4106, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 0.00037600000000000003, | |
| "loss": 2.3981, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 0.000372, | |
| "loss": 2.3877, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 0.000368, | |
| "loss": 2.3989, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 0.000364, | |
| "loss": 2.3811, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 0.00035999999999999997, | |
| "loss": 2.3954, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 0.000356, | |
| "loss": 2.4033, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 0.000352, | |
| "loss": 2.3806, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 0.000348, | |
| "loss": 2.3749, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 0.00034399999999999996, | |
| "loss": 2.3896, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 0.00034, | |
| "loss": 2.3799, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 0.00033600000000000004, | |
| "loss": 2.385, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 0.00033200000000000005, | |
| "loss": 2.3733, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 0.000328, | |
| "loss": 2.381, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 0.000324, | |
| "loss": 2.3716, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 0.00032, | |
| "loss": 2.3779, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 0.000316, | |
| "loss": 2.3693, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 0.000312, | |
| "loss": 2.3618, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 0.000308, | |
| "loss": 2.3636, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 0.000304, | |
| "loss": 2.3797, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 0.0003, | |
| "loss": 2.3638, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 0.000296, | |
| "loss": 2.3765, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 0.000292, | |
| "loss": 2.3635, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 0.000288, | |
| "loss": 2.3695, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 0.00028399999999999996, | |
| "loss": 2.35, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 0.00028000000000000003, | |
| "loss": 2.3555, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 0.00027600000000000004, | |
| "loss": 2.3592, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 0.00027200000000000005, | |
| "loss": 2.3623, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 0.000268, | |
| "loss": 2.3422, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 0.000264, | |
| "loss": 2.3495, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 0.00026000000000000003, | |
| "loss": 2.3479, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 0.000256, | |
| "loss": 2.3492, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 0.000252, | |
| "loss": 2.3475, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 0.000248, | |
| "loss": 2.3366, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 0.000244, | |
| "loss": 2.3599, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 0.00024, | |
| "loss": 2.3434, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 0.000236, | |
| "loss": 2.3507, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 0.00023200000000000003, | |
| "loss": 2.3421, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 0.000228, | |
| "loss": 2.3198, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 0.000224, | |
| "loss": 2.31, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 0.00022, | |
| "loss": 2.3369, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 0.000216, | |
| "loss": 2.3275, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 0.000212, | |
| "loss": 2.3318, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 0.000208, | |
| "loss": 2.3457, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 0.000204, | |
| "loss": 2.3336, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 0.0002, | |
| "loss": 2.3411, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 0.00019600000000000002, | |
| "loss": 2.338, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 0.000192, | |
| "loss": 2.3059, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 0.00018800000000000002, | |
| "loss": 2.3253, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 0.000184, | |
| "loss": 2.3181, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 0.00017999999999999998, | |
| "loss": 2.3329, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 0.000176, | |
| "loss": 2.3128, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 0.00017199999999999998, | |
| "loss": 2.3133, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 0.00016800000000000002, | |
| "loss": 2.3154, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 0.000164, | |
| "loss": 2.3158, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 0.00016, | |
| "loss": 2.3276, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 0.000156, | |
| "loss": 2.3017, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 0.000152, | |
| "loss": 2.3043, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 0.000148, | |
| "loss": 2.3292, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 0.000144, | |
| "loss": 2.3008, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 0.00014000000000000001, | |
| "loss": 2.3083, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 0.00013600000000000003, | |
| "loss": 2.297, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 0.000132, | |
| "loss": 2.2969, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 0.000128, | |
| "loss": 2.304, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 0.000124, | |
| "loss": 2.2976, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 0.00012, | |
| "loss": 2.2944, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 0.00011600000000000001, | |
| "loss": 2.2965, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 0.000112, | |
| "loss": 2.2915, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 0.000108, | |
| "loss": 2.2986, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 0.000104, | |
| "loss": 2.2986, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2876, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 9.6e-05, | |
| "loss": 2.2889, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 9.2e-05, | |
| "loss": 2.2914, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 8.8e-05, | |
| "loss": 2.2848, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 8.400000000000001e-05, | |
| "loss": 2.2822, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 8e-05, | |
| "loss": 2.272, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 7.6e-05, | |
| "loss": 2.2862, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 7.2e-05, | |
| "loss": 2.2756, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 6.800000000000001e-05, | |
| "loss": 2.2796, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 6.4e-05, | |
| "loss": 2.2766, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 6e-05, | |
| "loss": 2.2662, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 5.6e-05, | |
| "loss": 2.2805, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 5.2e-05, | |
| "loss": 2.2668, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 4.8e-05, | |
| "loss": 2.2776, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 4.4e-05, | |
| "loss": 2.2768, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 4e-05, | |
| "loss": 2.2644, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 3.6e-05, | |
| "loss": 2.2822, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 3.2e-05, | |
| "loss": 2.2597, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 2.8e-05, | |
| "loss": 2.2602, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 2.4e-05, | |
| "loss": 2.2677, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 2e-05, | |
| "loss": 2.2615, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 1.6e-05, | |
| "loss": 2.26, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 1.2e-05, | |
| "loss": 2.2612, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 8e-06, | |
| "loss": 2.2487, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 4e-06, | |
| "loss": 2.267, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 0.0, | |
| "loss": 2.2587, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "eval_accuracy": 0.5823700512250327, | |
| "eval_loss": 2.1487433910369873, | |
| "eval_runtime": 6151.3334, | |
| "eval_samples_per_second": 35.336, | |
| "eval_steps_per_second": 2.209, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "step": 50000, | |
| "total_flos": 7.55408847241216e+17, | |
| "train_loss": 2.8628560260009768, | |
| "train_runtime": 151959.6932, | |
| "train_samples_per_second": 84.233, | |
| "train_steps_per_second": 0.329 | |
| } | |
| ], | |
| "max_steps": 50000, | |
| "num_train_epochs": 4, | |
| "total_flos": 7.55408847241216e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |